2026-rff_mp/BolonkinNM/Task 1/Task 1.py
2026-05-23 18:45:47 +03:00

82 lines
2.3 KiB
Python

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
col_names = ['T(C)', 'Td(C)', 'HR%', 'ff(kmh)', 'Gust(kmh)', 'P_0(HPa)', 'P_sea(HPa)']
data = pd.read_csv(
"data_meteo.txt",
sep=r'\s+',
skiprows=1,
usecols=[2, 3, 4, 5, 6, 7, 8],
names=col_names,
engine='python'
)
print("загружено записей (строк):", len(data))
data = data.apply(pd.to_numeric, errors='coerce')
n_before = len(data)
data = data.dropna(subset=col_names)
n_after = len(data)
print(f"после приведения к числам и dropna: {n_after} строк (удалено {n_before - n_after})")
def correlation(vec1, vec2, center=False):
v1 = np.asarray(vec1, dtype=float)
v2 = np.asarray(vec2, dtype=float)
if center:
v1 = v1 - np.mean(v1)
v2 = v2 - np.mean(v2)
dot_product = np.sum(v1 * v2)
norm1 = np.sqrt(np.sum(v1 ** 2))
norm2 = np.sqrt(np.sum(v2 ** 2))
if norm1 == 0 or norm2 == 0:
return np.nan
return dot_product / (norm1 * norm2)
n = len(col_names)
raw_matrix = np.zeros((n, n))
center_matrix = np.zeros((n, n))
for i in range(n):
for j in range(n):
r_raw = correlation(data.iloc[:, i], data.iloc[:, j], center=False)
r_center = correlation(data.iloc[:, i], data.iloc[:, j], center=True)
raw_matrix[i, j] = np.round(r_raw, 3) if not np.isnan(r_raw) else np.nan
center_matrix[i, j] = np.round(r_center, 3) if not np.isnan(r_center) else np.nan
df_raw = pd.DataFrame(raw_matrix, index=col_names, columns=col_names)
df_center = pd.DataFrame(center_matrix, index=col_names, columns=col_names)
print("\nкорреляция (raw):")
print(df_raw.to_string())
print("\nкорреляция (centered):")
print(df_center.to_string())
print("\nразница (centered - raw):")
print((df_center - df_raw).round(3).to_string())
plt.figure(figsize=(14, 8))
for col in col_names:
centered_values = data[col] - data[col].mean()
plt.plot(centered_values.values, label=col, marker='.', linewidth=1, markersize=4)
plt.axhline(0, linestyle='--')
plt.title("центрированные метеоданные")
plt.xlabel("номер измерения")
plt.ylabel("отклонение от среднего")
plt.legend()
plt.grid()
plt.tight_layout()
plt.savefig("meteo_analysis.png", dpi=150)
plt.show()