""" experiment.py Замеры производительности трёх структур данных на одних и тех же данных: LinkedList, HashTable, BST в двух режимах: случайный порядок (shuffled), отсортированный порядок (sorted) для трёх операций: insert N записей, find 110 раз, delete 50 раз Каждый эксперимент повторяется TRIALS раз. Сохраняем все замеры + средние в CSV. Для BST на отсортированных данных снижаем N - иначе эксперимент длится десятки минут (вырожденное дерево, O(N^2) вставка). """ import csv import os import random import time import phonebook as pb # ---------- параметры эксперимента ---------- N = 10_000 # число записей в основном эксперименте N_BST_SORTED = 2_000 # для BST на отсортированных данных - меньше (O(N^2)) TRIALS = 5 # количество повторов каждого замера N_FIND_EXISTING = 100 N_FIND_MISSING = 10 N_DELETE = 50 HT_SIZE = 2048 # размер хеш-таблицы RNG_SEED = 42 OUT_CSV = os.path.join("docs", "data", "results.csv") # -------------------------------------------- def gen_records(n): """Генерирует n записей вида ('User_00001', '555-0001-...').""" return [(f"User_{i:05d}", f"555-{i:07d}") for i in range(n)] def pick_keys(records, k_exist, k_miss, rng): """Выбирает k_exist существующих имён и k_miss отсутствующих.""" existing = [name for name, _ in rng.sample(records, k_exist)] missing = [f"None_{i}" for i in range(k_miss)] return existing + missing # ---------- замеры по структурам ---------- def measure_linked_list(records, find_keys, delete_keys): # вставка t0 = time.perf_counter() head = pb.ll_create() for name, phone in records: head = pb.ll_insert(head, name, phone) t_insert = time.perf_counter() - t0 # поиск t0 = time.perf_counter() for name in find_keys: pb.ll_find(head, name) t_find = time.perf_counter() - t0 # удаление t0 = time.perf_counter() for name in delete_keys: head = pb.ll_delete(head, name) t_delete = time.perf_counter() - t0 return t_insert, t_find, t_delete def measure_hash_table(records, find_keys, delete_keys): t0 = time.perf_counter() ht = pb.ht_create(size=HT_SIZE) for name, phone in records: pb.ht_insert(ht, name, phone) t_insert = time.perf_counter() - t0 t0 = time.perf_counter() for name in find_keys: pb.ht_find(ht, name) t_find = time.perf_counter() - t0 t0 = time.perf_counter() for name in delete_keys: pb.ht_delete(ht, name) t_delete = time.perf_counter() - t0 return t_insert, t_find, t_delete def measure_bst(records, find_keys, delete_keys): t0 = time.perf_counter() root = pb.bst_create() for name, phone in records: root = pb.bst_insert(root, name, phone) t_insert = time.perf_counter() - t0 t0 = time.perf_counter() for name in find_keys: pb.bst_find(root, name) t_find = time.perf_counter() - t0 t0 = time.perf_counter() for name in delete_keys: root = pb.bst_delete(root, name) t_delete = time.perf_counter() - t0 return t_insert, t_find, t_delete # ---------- запуск ---------- def run_one(structure_name, mode, n, rng_seed): """Готовит данные, прогоняет TRIALS раз и возвращает список (insert, find, delete).""" base_records = gen_records(n) runs = [] for trial in range(TRIALS): # отдельный rng для воспроизводимости и независимости попыток rng = random.Random(rng_seed + trial) if mode == "shuffled": records = base_records[:] rng.shuffle(records) elif mode == "sorted": records = sorted(base_records, key=lambda x: x[0]) else: raise ValueError(mode) find_keys = pick_keys(records, N_FIND_EXISTING, N_FIND_MISSING, rng) delete_keys = [name for name, _ in rng.sample(records, N_DELETE)] if structure_name == "LinkedList": r = measure_linked_list(records, find_keys, delete_keys) elif structure_name == "HashTable": r = measure_hash_table(records, find_keys, delete_keys) elif structure_name == "BST": r = measure_bst(records, find_keys, delete_keys) else: raise ValueError(structure_name) runs.append(r) return runs def main(): os.makedirs(os.path.dirname(OUT_CSV), exist_ok=True) rows = [["Структура", "Режим", "Операция", "N", "Trial", "Время (сек)"]] summary = [] # (structure, mode, op, n, mean, all_trials) configs = [ ("LinkedList", "shuffled", N), ("LinkedList", "sorted", N), ("HashTable", "shuffled", N), ("HashTable", "sorted", N), ("BST", "shuffled", N), ("BST", "sorted", N_BST_SORTED), # вырожденный случай ] for structure, mode, n in configs: print(f"==> {structure:10s} | {mode:9s} | N={n}") runs = run_one(structure, mode, n, RNG_SEED) # runs = [(insert, find, delete), ...] ops = ["insert", "find", "delete"] for op_idx, op in enumerate(ops): vals = [r[op_idx] for r in runs] mean = sum(vals) / len(vals) for trial_idx, v in enumerate(vals): rows.append([structure, mode, op, n, trial_idx + 1, f"{v:.6f}"]) summary.append((structure, mode, op, n, mean, vals)) print(f" {op:7s}: mean={mean*1000:.3f} ms " f"runs={[f'{v*1000:.3f}' for v in vals]}") # сводная строка со средними rows.append([]) rows.append(["--- СРЕДНИЕ ---"]) rows.append(["Структура", "Режим", "Операция", "N", "Среднее (сек)", "Все замеры (сек)"]) for s, mode, op, n, mean, vals in summary: rows.append([s, mode, op, n, f"{mean:.6f}", ";".join(f"{v:.6f}" for v in vals)]) with open(OUT_CSV, "w", newline="", encoding="utf-8") as f: writer = csv.writer(f) writer.writerows(rows) print(f"\nГотово. Результаты записаны в {OUT_CSV}") if __name__ == "__main__": main()