2026-rff_mp/groshevava/docs/data/experiments.py

198 lines
6.9 KiB
Python

#проведение экспериментов и замер производительности
import time
import random
import csv
from linked_list import ll_insert, ll_find, ll_delete, ll_list_all
from hash_table import ht_create, ht_insert, ht_find, ht_delete, ht_list_all
from bst import bst_insert, bst_find, bst_delete, bst_list_all
#генерирует записи справочника. shuffled- случайный порядок, sorted-отсортированный по имени
def generate_test_data(n=10000, seed=42):
random.seed(seed)
names = [f"User_{i:05d}" for i in range(n)]
phones = [f"+7-999-{i:07d}" for i in range(n)]
records = list(zip(names, phones))
records_shuffled = records.copy()
random.shuffle(records_shuffled)
records_sorted = sorted(records, key=lambda x: x[0])
return records_shuffled, records_sorted
#замеряем время
def measure_insert(ll_structure, ht_structure, bst_structure, records, mode_name):
results = []
# Связный список
start = time.perf_counter()
head = None
for name, phone in records:
head = ll_insert(head, name, phone)
end = time.perf_counter()
results.append(["LinkedList", mode_name, "вставка", end - start])
# Хеш-таблица
start = time.perf_counter()
buckets = ht_create(256)
for name, phone in records:
ht_insert(buckets, name, phone)
end = time.perf_counter()
results.append(["HashTable", mode_name, "вставка", end - start])
# BST
start = time.perf_counter()
root = None
for name, phone in records:
root = bst_insert(root, name, phone)
end = time.perf_counter()
results.append(["BST", mode_name, "вставка", end - start])
return results, head, buckets, root
def measure_find(head, buckets, root, all_names, mode_name):
results = []
# Выбираем 100 случайных существующих и 10 несуществующих имён
existing_names = random.sample(all_names, 100)
non_existing_names = [f"None_{i}" for i in range(10)]
search_names = existing_names + non_existing_names
# Связный список
start = time.perf_counter()
for name in search_names:
ll_find(head, name)
end = time.perf_counter()
results.append(["LinkedList", mode_name, "поиск", end - start])
# Хеш-таблица
start = time.perf_counter()
for name in search_names:
ht_find(buckets, name)
end = time.perf_counter()
results.append(["HashTable", mode_name, "поиск", end - start])
# BST
start = time.perf_counter()
for name in search_names:
bst_find(root, name)
end = time.perf_counter()
results.append(["BST", mode_name, "поиск", end - start])
return results
def measure_delete(head, buckets, root, all_names, mode_name):
results = []
# Выбираем 50 случайных имён для удаления
delete_names = random.sample(all_names, 50)
# Связный список
start = time.perf_counter()
for name in delete_names:
head = ll_delete(head, name)
end = time.perf_counter()
results.append(["LinkedList", mode_name, "удаление", end - start])
# Хеш-таблица
start = time.perf_counter()
for name in delete_names:
ht_delete(buckets, name)
end = time.perf_counter()
results.append(["HashTable", mode_name, "удаление", end - start])
# BST
start = time.perf_counter()
for name in delete_names:
root = bst_delete(root, name)
end = time.perf_counter()
results.append(["BST", mode_name, "удаление", end - start])
return results
#проводим эксперименты
def run_experiments(records_shuffled, records_sorted, repetitions=5):
all_results = [
["Структура", "Режим", "Операция", "Время (сек)"]
]
all_names = [record[0] for record in records_shuffled]
for rep in range(repetitions):
print(f"Повторение {rep + 1}/{repetitions}")
# Шаффлированные данные
results, head, buckets, root = measure_insert(
None, None, None, records_shuffled, "случайный"
)
all_results.extend(results)
results = measure_find(head, buckets, root, all_names, "случайный")
all_results.extend(results)
results = measure_delete(head, buckets, root, all_names, "случайный")
all_results.extend(results)
# Отсортированные данные
results, head, buckets, root = measure_insert(
None, None, None, records_sorted, "отсортированный"
)
all_results.extend(results)
results = measure_find(head, buckets, root, all_names, "отсортированный")
all_results.extend(results)
results = measure_delete(head, buckets, root, all_names, "отсортированный")
all_results.extend(results)
return all_results
def save_results(results, filename="results.csv"):
with open(filename, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerows(results)
print(f"Результаты сохранены в {filename}")
def analyze_results(filename="results.csv"):
from collections import defaultdict
with open(filename, 'r', encoding='utf-8') as f:
reader = csv.reader(f)
next(reader) # пропускаем заголовок
data = list(reader)
# Группируем для вычисления средних
stats = defaultdict(list)
for row in data:
structure, mode, operation, time_str = row
key = (structure, mode, operation)
stats[key].append(float(time_str))
print("\nСредние времена выполнения (сек):")
print("-" * 60)
print(f"{'Структура':<15} {'Режим':<20} {'Операция':<10} {'Время':<10}")
print("-" * 60)
for (structure, mode, operation), times in sorted(stats.items()):
avg_time = sum(times) / len(times)
print(f"{structure:<15} {mode:<20} {operation:<10} {avg_time:<10.6f}")
if __name__ == "__main__":
print("Генерация тестовых данных...")
records_shuffled, records_sorted = generate_test_data(10000)
print("Запуск экспериментов...")
results = run_experiments(records_shuffled, records_sorted, repetitions=5)
save_results(results)
analyze_results()