add experiment.py

This commit is contained in:
SobolevNS 2026-05-22 12:20:40 +03:00
parent 666bc0af37
commit 2ff608b88f

View File

@ -0,0 +1,194 @@
"""
experiment.py
Замеры производительности трёх структур данных на одних и тех же данных:
LinkedList, HashTable, BST
в двух режимах:
случайный порядок (shuffled), отсортированный порядок (sorted)
для трёх операций:
insert N записей, find 110 раз, delete 50 раз
Каждый эксперимент повторяется TRIALS раз. Сохраняем все замеры + средние
в CSV. Для BST на отсортированных данных снижаем N - иначе эксперимент
длится десятки минут (вырожденное дерево, O(N^2) вставка).
"""
import csv
import os
import random
import time
import phonebook as pb
# ---------- параметры эксперимента ----------
N = 10_000 # число записей в основном эксперименте
N_BST_SORTED = 2_000 # для BST на отсортированных данных - меньше (O(N^2))
TRIALS = 5 # количество повторов каждого замера
N_FIND_EXISTING = 100
N_FIND_MISSING = 10
N_DELETE = 50
HT_SIZE = 2048 # размер хеш-таблицы
RNG_SEED = 42
OUT_CSV = os.path.join("docs", "data", "results.csv")
# --------------------------------------------
def gen_records(n):
"""Генерирует n записей вида ('User_00001', '555-0001-...')."""
return [(f"User_{i:05d}", f"555-{i:07d}") for i in range(n)]
def pick_keys(records, k_exist, k_miss, rng):
"""Выбирает k_exist существующих имён и k_miss отсутствующих."""
existing = [name for name, _ in rng.sample(records, k_exist)]
missing = [f"None_{i}" for i in range(k_miss)]
return existing + missing
# ---------- замеры по структурам ----------
def measure_linked_list(records, find_keys, delete_keys):
# вставка
t0 = time.perf_counter()
head = pb.ll_create()
for name, phone in records:
head = pb.ll_insert(head, name, phone)
t_insert = time.perf_counter() - t0
# поиск
t0 = time.perf_counter()
for name in find_keys:
pb.ll_find(head, name)
t_find = time.perf_counter() - t0
# удаление
t0 = time.perf_counter()
for name in delete_keys:
head = pb.ll_delete(head, name)
t_delete = time.perf_counter() - t0
return t_insert, t_find, t_delete
def measure_hash_table(records, find_keys, delete_keys):
t0 = time.perf_counter()
ht = pb.ht_create(size=HT_SIZE)
for name, phone in records:
pb.ht_insert(ht, name, phone)
t_insert = time.perf_counter() - t0
t0 = time.perf_counter()
for name in find_keys:
pb.ht_find(ht, name)
t_find = time.perf_counter() - t0
t0 = time.perf_counter()
for name in delete_keys:
pb.ht_delete(ht, name)
t_delete = time.perf_counter() - t0
return t_insert, t_find, t_delete
def measure_bst(records, find_keys, delete_keys):
t0 = time.perf_counter()
root = pb.bst_create()
for name, phone in records:
root = pb.bst_insert(root, name, phone)
t_insert = time.perf_counter() - t0
t0 = time.perf_counter()
for name in find_keys:
pb.bst_find(root, name)
t_find = time.perf_counter() - t0
t0 = time.perf_counter()
for name in delete_keys:
root = pb.bst_delete(root, name)
t_delete = time.perf_counter() - t0
return t_insert, t_find, t_delete
# ---------- запуск ----------
def run_one(structure_name, mode, n, rng_seed):
"""Готовит данные, прогоняет TRIALS раз и возвращает список (insert, find, delete)."""
base_records = gen_records(n)
runs = []
for trial in range(TRIALS):
# отдельный rng для воспроизводимости и независимости попыток
rng = random.Random(rng_seed + trial)
if mode == "shuffled":
records = base_records[:]
rng.shuffle(records)
elif mode == "sorted":
records = sorted(base_records, key=lambda x: x[0])
else:
raise ValueError(mode)
find_keys = pick_keys(records, N_FIND_EXISTING, N_FIND_MISSING, rng)
delete_keys = [name for name, _ in rng.sample(records, N_DELETE)]
if structure_name == "LinkedList":
r = measure_linked_list(records, find_keys, delete_keys)
elif structure_name == "HashTable":
r = measure_hash_table(records, find_keys, delete_keys)
elif structure_name == "BST":
r = measure_bst(records, find_keys, delete_keys)
else:
raise ValueError(structure_name)
runs.append(r)
return runs
def main():
os.makedirs(os.path.dirname(OUT_CSV), exist_ok=True)
rows = [["Структура", "Режим", "Операция", "N", "Trial", "Время (сек)"]]
summary = [] # (structure, mode, op, n, mean, all_trials)
configs = [
("LinkedList", "shuffled", N),
("LinkedList", "sorted", N),
("HashTable", "shuffled", N),
("HashTable", "sorted", N),
("BST", "shuffled", N),
("BST", "sorted", N_BST_SORTED), # вырожденный случай
]
for structure, mode, n in configs:
print(f"==> {structure:10s} | {mode:9s} | N={n}")
runs = run_one(structure, mode, n, RNG_SEED)
# runs = [(insert, find, delete), ...]
ops = ["insert", "find", "delete"]
for op_idx, op in enumerate(ops):
vals = [r[op_idx] for r in runs]
mean = sum(vals) / len(vals)
for trial_idx, v in enumerate(vals):
rows.append([structure, mode, op, n, trial_idx + 1, f"{v:.6f}"])
summary.append((structure, mode, op, n, mean, vals))
print(f" {op:7s}: mean={mean*1000:.3f} ms "
f"runs={[f'{v*1000:.3f}' for v in vals]}")
# сводная строка со средними
rows.append([])
rows.append(["--- СРЕДНИЕ ---"])
rows.append(["Структура", "Режим", "Операция", "N",
"Среднее (сек)", "Все замеры (сек)"])
for s, mode, op, n, mean, vals in summary:
rows.append([s, mode, op, n, f"{mean:.6f}",
";".join(f"{v:.6f}" for v in vals)])
with open(OUT_CSV, "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerows(rows)
print(f"\nГотово. Результаты записаны в {OUT_CSV}")
if __name__ == "__main__":
main()