diff --git a/SobolevNS/docs/data/task1_data_structures/experiment.py b/SobolevNS/docs/data/task1_data_structures/experiment.py new file mode 100644 index 0000000..1e4ce39 --- /dev/null +++ b/SobolevNS/docs/data/task1_data_structures/experiment.py @@ -0,0 +1,194 @@ +""" +experiment.py + +Замеры производительности трёх структур данных на одних и тех же данных: + LinkedList, HashTable, BST +в двух режимах: + случайный порядок (shuffled), отсортированный порядок (sorted) +для трёх операций: + insert N записей, find 110 раз, delete 50 раз +Каждый эксперимент повторяется TRIALS раз. Сохраняем все замеры + средние +в CSV. Для BST на отсортированных данных снижаем N - иначе эксперимент +длится десятки минут (вырожденное дерево, O(N^2) вставка). +""" + +import csv +import os +import random +import time + +import phonebook as pb + + +# ---------- параметры эксперимента ---------- +N = 10_000 # число записей в основном эксперименте +N_BST_SORTED = 2_000 # для BST на отсортированных данных - меньше (O(N^2)) +TRIALS = 5 # количество повторов каждого замера +N_FIND_EXISTING = 100 +N_FIND_MISSING = 10 +N_DELETE = 50 +HT_SIZE = 2048 # размер хеш-таблицы +RNG_SEED = 42 +OUT_CSV = os.path.join("docs", "data", "results.csv") +# -------------------------------------------- + + +def gen_records(n): + """Генерирует n записей вида ('User_00001', '555-0001-...').""" + return [(f"User_{i:05d}", f"555-{i:07d}") for i in range(n)] + + +def pick_keys(records, k_exist, k_miss, rng): + """Выбирает k_exist существующих имён и k_miss отсутствующих.""" + existing = [name for name, _ in rng.sample(records, k_exist)] + missing = [f"None_{i}" for i in range(k_miss)] + return existing + missing + + +# ---------- замеры по структурам ---------- + +def measure_linked_list(records, find_keys, delete_keys): + # вставка + t0 = time.perf_counter() + head = pb.ll_create() + for name, phone in records: + head = pb.ll_insert(head, name, phone) + t_insert = time.perf_counter() - t0 + + # поиск + t0 = time.perf_counter() + for name in find_keys: + pb.ll_find(head, name) + t_find = time.perf_counter() - t0 + + # удаление + t0 = time.perf_counter() + for name in delete_keys: + head = pb.ll_delete(head, name) + t_delete = time.perf_counter() - t0 + + return t_insert, t_find, t_delete + + +def measure_hash_table(records, find_keys, delete_keys): + t0 = time.perf_counter() + ht = pb.ht_create(size=HT_SIZE) + for name, phone in records: + pb.ht_insert(ht, name, phone) + t_insert = time.perf_counter() - t0 + + t0 = time.perf_counter() + for name in find_keys: + pb.ht_find(ht, name) + t_find = time.perf_counter() - t0 + + t0 = time.perf_counter() + for name in delete_keys: + pb.ht_delete(ht, name) + t_delete = time.perf_counter() - t0 + + return t_insert, t_find, t_delete + + +def measure_bst(records, find_keys, delete_keys): + t0 = time.perf_counter() + root = pb.bst_create() + for name, phone in records: + root = pb.bst_insert(root, name, phone) + t_insert = time.perf_counter() - t0 + + t0 = time.perf_counter() + for name in find_keys: + pb.bst_find(root, name) + t_find = time.perf_counter() - t0 + + t0 = time.perf_counter() + for name in delete_keys: + root = pb.bst_delete(root, name) + t_delete = time.perf_counter() - t0 + + return t_insert, t_find, t_delete + + +# ---------- запуск ---------- + +def run_one(structure_name, mode, n, rng_seed): + """Готовит данные, прогоняет TRIALS раз и возвращает список (insert, find, delete).""" + base_records = gen_records(n) + + runs = [] + for trial in range(TRIALS): + # отдельный rng для воспроизводимости и независимости попыток + rng = random.Random(rng_seed + trial) + + if mode == "shuffled": + records = base_records[:] + rng.shuffle(records) + elif mode == "sorted": + records = sorted(base_records, key=lambda x: x[0]) + else: + raise ValueError(mode) + + find_keys = pick_keys(records, N_FIND_EXISTING, N_FIND_MISSING, rng) + delete_keys = [name for name, _ in rng.sample(records, N_DELETE)] + + if structure_name == "LinkedList": + r = measure_linked_list(records, find_keys, delete_keys) + elif structure_name == "HashTable": + r = measure_hash_table(records, find_keys, delete_keys) + elif structure_name == "BST": + r = measure_bst(records, find_keys, delete_keys) + else: + raise ValueError(structure_name) + + runs.append(r) + return runs + + +def main(): + os.makedirs(os.path.dirname(OUT_CSV), exist_ok=True) + + rows = [["Структура", "Режим", "Операция", "N", "Trial", "Время (сек)"]] + summary = [] # (structure, mode, op, n, mean, all_trials) + + configs = [ + ("LinkedList", "shuffled", N), + ("LinkedList", "sorted", N), + ("HashTable", "shuffled", N), + ("HashTable", "sorted", N), + ("BST", "shuffled", N), + ("BST", "sorted", N_BST_SORTED), # вырожденный случай + ] + + for structure, mode, n in configs: + print(f"==> {structure:10s} | {mode:9s} | N={n}") + runs = run_one(structure, mode, n, RNG_SEED) + # runs = [(insert, find, delete), ...] + ops = ["insert", "find", "delete"] + for op_idx, op in enumerate(ops): + vals = [r[op_idx] for r in runs] + mean = sum(vals) / len(vals) + for trial_idx, v in enumerate(vals): + rows.append([structure, mode, op, n, trial_idx + 1, f"{v:.6f}"]) + summary.append((structure, mode, op, n, mean, vals)) + print(f" {op:7s}: mean={mean*1000:.3f} ms " + f"runs={[f'{v*1000:.3f}' for v in vals]}") + + # сводная строка со средними + rows.append([]) + rows.append(["--- СРЕДНИЕ ---"]) + rows.append(["Структура", "Режим", "Операция", "N", + "Среднее (сек)", "Все замеры (сек)"]) + for s, mode, op, n, mean, vals in summary: + rows.append([s, mode, op, n, f"{mean:.6f}", + ";".join(f"{v:.6f}" for v in vals)]) + + with open(OUT_CSV, "w", newline="", encoding="utf-8") as f: + writer = csv.writer(f) + writer.writerows(rows) + + print(f"\nГотово. Результаты записаны в {OUT_CSV}") + + +if __name__ == "__main__": + main()