2026-rff_mp/SobolevNS/docs/data/task1_data_structures/experiment.py
2026-05-22 12:20:40 +03:00

195 lines
6.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
experiment.py
Замеры производительности трёх структур данных на одних и тех же данных:
LinkedList, HashTable, BST
в двух режимах:
случайный порядок (shuffled), отсортированный порядок (sorted)
для трёх операций:
insert N записей, find 110 раз, delete 50 раз
Каждый эксперимент повторяется TRIALS раз. Сохраняем все замеры + средние
в CSV. Для BST на отсортированных данных снижаем N - иначе эксперимент
длится десятки минут (вырожденное дерево, O(N^2) вставка).
"""
import csv
import os
import random
import time
import phonebook as pb
# ---------- параметры эксперимента ----------
N = 10_000 # число записей в основном эксперименте
N_BST_SORTED = 2_000 # для BST на отсортированных данных - меньше (O(N^2))
TRIALS = 5 # количество повторов каждого замера
N_FIND_EXISTING = 100
N_FIND_MISSING = 10
N_DELETE = 50
HT_SIZE = 2048 # размер хеш-таблицы
RNG_SEED = 42
OUT_CSV = os.path.join("docs", "data", "results.csv")
# --------------------------------------------
def gen_records(n):
"""Генерирует n записей вида ('User_00001', '555-0001-...')."""
return [(f"User_{i:05d}", f"555-{i:07d}") for i in range(n)]
def pick_keys(records, k_exist, k_miss, rng):
"""Выбирает k_exist существующих имён и k_miss отсутствующих."""
existing = [name for name, _ in rng.sample(records, k_exist)]
missing = [f"None_{i}" for i in range(k_miss)]
return existing + missing
# ---------- замеры по структурам ----------
def measure_linked_list(records, find_keys, delete_keys):
# вставка
t0 = time.perf_counter()
head = pb.ll_create()
for name, phone in records:
head = pb.ll_insert(head, name, phone)
t_insert = time.perf_counter() - t0
# поиск
t0 = time.perf_counter()
for name in find_keys:
pb.ll_find(head, name)
t_find = time.perf_counter() - t0
# удаление
t0 = time.perf_counter()
for name in delete_keys:
head = pb.ll_delete(head, name)
t_delete = time.perf_counter() - t0
return t_insert, t_find, t_delete
def measure_hash_table(records, find_keys, delete_keys):
t0 = time.perf_counter()
ht = pb.ht_create(size=HT_SIZE)
for name, phone in records:
pb.ht_insert(ht, name, phone)
t_insert = time.perf_counter() - t0
t0 = time.perf_counter()
for name in find_keys:
pb.ht_find(ht, name)
t_find = time.perf_counter() - t0
t0 = time.perf_counter()
for name in delete_keys:
pb.ht_delete(ht, name)
t_delete = time.perf_counter() - t0
return t_insert, t_find, t_delete
def measure_bst(records, find_keys, delete_keys):
t0 = time.perf_counter()
root = pb.bst_create()
for name, phone in records:
root = pb.bst_insert(root, name, phone)
t_insert = time.perf_counter() - t0
t0 = time.perf_counter()
for name in find_keys:
pb.bst_find(root, name)
t_find = time.perf_counter() - t0
t0 = time.perf_counter()
for name in delete_keys:
root = pb.bst_delete(root, name)
t_delete = time.perf_counter() - t0
return t_insert, t_find, t_delete
# ---------- запуск ----------
def run_one(structure_name, mode, n, rng_seed):
"""Готовит данные, прогоняет TRIALS раз и возвращает список (insert, find, delete)."""
base_records = gen_records(n)
runs = []
for trial in range(TRIALS):
# отдельный rng для воспроизводимости и независимости попыток
rng = random.Random(rng_seed + trial)
if mode == "shuffled":
records = base_records[:]
rng.shuffle(records)
elif mode == "sorted":
records = sorted(base_records, key=lambda x: x[0])
else:
raise ValueError(mode)
find_keys = pick_keys(records, N_FIND_EXISTING, N_FIND_MISSING, rng)
delete_keys = [name for name, _ in rng.sample(records, N_DELETE)]
if structure_name == "LinkedList":
r = measure_linked_list(records, find_keys, delete_keys)
elif structure_name == "HashTable":
r = measure_hash_table(records, find_keys, delete_keys)
elif structure_name == "BST":
r = measure_bst(records, find_keys, delete_keys)
else:
raise ValueError(structure_name)
runs.append(r)
return runs
def main():
os.makedirs(os.path.dirname(OUT_CSV), exist_ok=True)
rows = [["Структура", "Режим", "Операция", "N", "Trial", "Время (сек)"]]
summary = [] # (structure, mode, op, n, mean, all_trials)
configs = [
("LinkedList", "shuffled", N),
("LinkedList", "sorted", N),
("HashTable", "shuffled", N),
("HashTable", "sorted", N),
("BST", "shuffled", N),
("BST", "sorted", N_BST_SORTED), # вырожденный случай
]
for structure, mode, n in configs:
print(f"==> {structure:10s} | {mode:9s} | N={n}")
runs = run_one(structure, mode, n, RNG_SEED)
# runs = [(insert, find, delete), ...]
ops = ["insert", "find", "delete"]
for op_idx, op in enumerate(ops):
vals = [r[op_idx] for r in runs]
mean = sum(vals) / len(vals)
for trial_idx, v in enumerate(vals):
rows.append([structure, mode, op, n, trial_idx + 1, f"{v:.6f}"])
summary.append((structure, mode, op, n, mean, vals))
print(f" {op:7s}: mean={mean*1000:.3f} ms "
f"runs={[f'{v*1000:.3f}' for v in vals]}")
# сводная строка со средними
rows.append([])
rows.append(["--- СРЕДНИЕ ---"])
rows.append(["Структура", "Режим", "Операция", "N",
"Среднее (сек)", "Все замеры (сек)"])
for s, mode, op, n, mean, vals in summary:
rows.append([s, mode, op, n, f"{mean:.6f}",
";".join(f"{v:.6f}" for v in vals)])
with open(OUT_CSV, "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerows(rows)
print(f"\nГотово. Результаты записаны в {OUT_CSV}")
if __name__ == "__main__":
main()