2026-rff_mp/ProninVV/aufgabe-1-data-structures/graphiki.py

132 lines
5.6 KiB
Python

import pandas as pd
import glob
import re
import os
import matplotlib.pyplot as plt
from matplotlib.ticker import AutoMinorLocator
import numpy as np
from scipy.interpolate import interp1d, CubicSpline
from scipy.optimize import curve_fit
from numpy.polynomial import Polynomial
# folder_path = 'results'
# # Список размеров (500, 1000, 2000, 5000, 10000)
# sizes = ['500', '1000', '2000', '5000', '10000']
# for size in sizes:
# files = glob.glob(os.path.join(folder_path, f'timedata_{size}_epochs_*.csv'))
# if not files:
# continue
# # Читаем файлы
# dfs = [pd.read_csv(f) for f in files]
# # Определяем, какие колонки текстовые (не числовые)
# # Предполагаем, что во всех файлах они одинаковые
# text_cols = dfs[0].select_dtypes(exclude=['number']).columns.tolist()
# # Объединяем и считаем среднее
# # Группируем по текстовым колонкам, чтобы они остались в результате
# if text_cols:
# combined = pd.concat(dfs)
# mean_df = combined.groupby(text_cols).mean().reset_index()
# else:
# # Если текста нет, просто среднее по строкам
# mean_df = pd.concat(dfs).groupby(level=0).mean()
# output_name = f'average_timedata_{size}.csv'
# mean_df.to_csv(os.path.join(folder_path, output_name), index=False)
# print(f"Файл {output_name} успешно создан")
df500 = pd.read_csv("results/average_timedata_500.csv")
df1000 = pd.read_csv("results/average_timedata_1000.csv")
df2000 = pd.read_csv("results/average_timedata_2000.csv")
df5000 = pd.read_csv("results/average_timedata_5000.csv")
df10000 = pd.read_csv("results/average_timedata_10000.csv")
def select_data_list(ax):
dfs = [df500, df1000, df2000, df5000, df10000]
Nvals = [500, 1000, 2000, 5000, 10000]
# delete, find, insert
# список:
valsSort = [list(arr[(arr['Структура'] == "linklist") & (arr['Режим'] == "sorted")]["Время (сек)"]) for arr in dfs]
valsShuff = [list(arr[(arr['Структура'] == "linklist") & (arr['Режим'] == "shuffled")]["Время (сек)"]) for arr in dfs]
# 0 - sorted 1 - shuffled
# delete
ax[0].plot(Nvals, [row[0] for row in valsSort], label="delete", color='red')
ax[1].plot(Nvals, [row[0] for row in valsShuff], color='red')
# find
ax[0].plot(Nvals, [row[1] for row in valsSort], label="find", color='blue')
ax[1].plot(Nvals, [row[1] for row in valsShuff], color='blue')
# insert
ax[0].plot(Nvals, [row[2] for row in valsSort], label="insert", color='green')
ax[1].plot(Nvals, [row[2] for row in valsShuff], color='green')
def select_data_hasht(ax):
dfs = [df500, df1000, df2000, df5000, df10000]
Nvals = [500, 1000, 2000, 5000, 10000]
# delete, find, insert
# список:
valsSort = [list(arr[(arr['Структура'] == "hashtable") & (arr['Режим'] == "sorted")]["Время (сек)"]) for arr in dfs]
valsShuff = [list(arr[(arr['Структура'] == "hashtable") & (arr['Режим'] == "shuffled")]["Время (сек)"]) for arr in dfs]
# 0 - sorted 1 - shuffled
# delete
ax[0].plot(Nvals, [row[0] for row in valsSort], label="delete", color='red')
ax[1].plot(Nvals, [row[0] for row in valsShuff], color='red')
# find
ax[0].plot(Nvals, [row[1] for row in valsSort], label="find", color='blue')
ax[1].plot(Nvals, [row[1] for row in valsShuff], color='blue')
# insert
ax[0].plot(Nvals, [row[2] for row in valsSort], label="insert", color='green')
ax[1].plot(Nvals, [row[2] for row in valsShuff], color='green')
def select_data_tree(ax):
dfs = [df500, df1000, df2000, df5000, df10000]
Nvals = [500, 1000, 2000, 5000, 10000]
# delete, find, insert
# список:
valsSort = [list(arr[(arr['Структура'] == "bintree") & (arr['Режим'] == "sorted")]["Время (сек)"]) for arr in dfs]
valsShuff = [list(arr[(arr['Структура'] == "bintree") & (arr['Режим'] == "shuffled")]["Время (сек)"]) for arr in dfs]
# 0 - sorted 1 - shuffled
# delete
ax[0].plot(Nvals, [row[0] for row in valsSort], label="delete", color='red')
ax[1].plot(Nvals, [row[0] for row in valsShuff], color='red')
# find
ax[0].plot(Nvals, [row[1] for row in valsSort], label="find", color='blue')
ax[1].plot(Nvals, [row[1] for row in valsShuff], color='blue')
# insert
ax[0].plot(Nvals, [row[2] for row in valsSort], label="insert", color='green')
ax[1].plot(Nvals, [row[2] for row in valsShuff], color='green')
# list(df500[(df500['Структура'] == "linklist") & (df500['Режим'] == "shuffled")]["Время (сек)"])
# построение графика
fig, ax = plt.subplots(figsize=(10, 5), nrows=1, ncols=2)
for i in range(2):
# select_data_list(ax)
# select_data_hasht(ax)
select_data_tree(ax)
ax[0].set_title("График сложностей для дерева (sort)")
ax[1].set_title("График сложностей для дерева (shuff)")
ax[i].set_xlabel("N")
ax[i].set_ylabel("сек * ")
ax[i].grid(which="major", linewidth=1.5)
ax[i].grid(which="minor", color="gray", linewidth=0.5)
ax[i].xaxis.set_minor_locator(AutoMinorLocator())
ax[i].yaxis.set_minor_locator(AutoMinorLocator())
ax[i].legend()
ax[i].set_ylim(0, 0.1)
plt.savefig('graphics\Tree1.png', dpi=200)
plt.savefig('graphics\Tre1.eps', dpi=200)
plt.show()