195 lines
6.7 KiB
Python
195 lines
6.7 KiB
Python
"""
|
||
experiment.py
|
||
|
||
Замеры производительности трёх структур данных на одних и тех же данных:
|
||
LinkedList, HashTable, BST
|
||
в двух режимах:
|
||
случайный порядок (shuffled), отсортированный порядок (sorted)
|
||
для трёх операций:
|
||
insert N записей, find 110 раз, delete 50 раз
|
||
Каждый эксперимент повторяется TRIALS раз. Сохраняем все замеры + средние
|
||
в CSV. Для BST на отсортированных данных снижаем N - иначе эксперимент
|
||
длится десятки минут (вырожденное дерево, O(N^2) вставка).
|
||
"""
|
||
|
||
import csv
|
||
import os
|
||
import random
|
||
import time
|
||
|
||
import phonebook as pb
|
||
|
||
|
||
# ---------- параметры эксперимента ----------
|
||
N = 10_000 # число записей в основном эксперименте
|
||
N_BST_SORTED = 2_000 # для BST на отсортированных данных - меньше (O(N^2))
|
||
TRIALS = 5 # количество повторов каждого замера
|
||
N_FIND_EXISTING = 100
|
||
N_FIND_MISSING = 10
|
||
N_DELETE = 50
|
||
HT_SIZE = 2048 # размер хеш-таблицы
|
||
RNG_SEED = 42
|
||
OUT_CSV = os.path.join("docs", "data", "results.csv")
|
||
# --------------------------------------------
|
||
|
||
|
||
def gen_records(n):
|
||
"""Генерирует n записей вида ('User_00001', '555-0001-...')."""
|
||
return [(f"User_{i:05d}", f"555-{i:07d}") for i in range(n)]
|
||
|
||
|
||
def pick_keys(records, k_exist, k_miss, rng):
|
||
"""Выбирает k_exist существующих имён и k_miss отсутствующих."""
|
||
existing = [name for name, _ in rng.sample(records, k_exist)]
|
||
missing = [f"None_{i}" for i in range(k_miss)]
|
||
return existing + missing
|
||
|
||
|
||
# ---------- замеры по структурам ----------
|
||
|
||
def measure_linked_list(records, find_keys, delete_keys):
|
||
# вставка
|
||
t0 = time.perf_counter()
|
||
head = pb.ll_create()
|
||
for name, phone in records:
|
||
head = pb.ll_insert(head, name, phone)
|
||
t_insert = time.perf_counter() - t0
|
||
|
||
# поиск
|
||
t0 = time.perf_counter()
|
||
for name in find_keys:
|
||
pb.ll_find(head, name)
|
||
t_find = time.perf_counter() - t0
|
||
|
||
# удаление
|
||
t0 = time.perf_counter()
|
||
for name in delete_keys:
|
||
head = pb.ll_delete(head, name)
|
||
t_delete = time.perf_counter() - t0
|
||
|
||
return t_insert, t_find, t_delete
|
||
|
||
|
||
def measure_hash_table(records, find_keys, delete_keys):
|
||
t0 = time.perf_counter()
|
||
ht = pb.ht_create(size=HT_SIZE)
|
||
for name, phone in records:
|
||
pb.ht_insert(ht, name, phone)
|
||
t_insert = time.perf_counter() - t0
|
||
|
||
t0 = time.perf_counter()
|
||
for name in find_keys:
|
||
pb.ht_find(ht, name)
|
||
t_find = time.perf_counter() - t0
|
||
|
||
t0 = time.perf_counter()
|
||
for name in delete_keys:
|
||
pb.ht_delete(ht, name)
|
||
t_delete = time.perf_counter() - t0
|
||
|
||
return t_insert, t_find, t_delete
|
||
|
||
|
||
def measure_bst(records, find_keys, delete_keys):
|
||
t0 = time.perf_counter()
|
||
root = pb.bst_create()
|
||
for name, phone in records:
|
||
root = pb.bst_insert(root, name, phone)
|
||
t_insert = time.perf_counter() - t0
|
||
|
||
t0 = time.perf_counter()
|
||
for name in find_keys:
|
||
pb.bst_find(root, name)
|
||
t_find = time.perf_counter() - t0
|
||
|
||
t0 = time.perf_counter()
|
||
for name in delete_keys:
|
||
root = pb.bst_delete(root, name)
|
||
t_delete = time.perf_counter() - t0
|
||
|
||
return t_insert, t_find, t_delete
|
||
|
||
|
||
# ---------- запуск ----------
|
||
|
||
def run_one(structure_name, mode, n, rng_seed):
|
||
"""Готовит данные, прогоняет TRIALS раз и возвращает список (insert, find, delete)."""
|
||
base_records = gen_records(n)
|
||
|
||
runs = []
|
||
for trial in range(TRIALS):
|
||
# отдельный rng для воспроизводимости и независимости попыток
|
||
rng = random.Random(rng_seed + trial)
|
||
|
||
if mode == "shuffled":
|
||
records = base_records[:]
|
||
rng.shuffle(records)
|
||
elif mode == "sorted":
|
||
records = sorted(base_records, key=lambda x: x[0])
|
||
else:
|
||
raise ValueError(mode)
|
||
|
||
find_keys = pick_keys(records, N_FIND_EXISTING, N_FIND_MISSING, rng)
|
||
delete_keys = [name for name, _ in rng.sample(records, N_DELETE)]
|
||
|
||
if structure_name == "LinkedList":
|
||
r = measure_linked_list(records, find_keys, delete_keys)
|
||
elif structure_name == "HashTable":
|
||
r = measure_hash_table(records, find_keys, delete_keys)
|
||
elif structure_name == "BST":
|
||
r = measure_bst(records, find_keys, delete_keys)
|
||
else:
|
||
raise ValueError(structure_name)
|
||
|
||
runs.append(r)
|
||
return runs
|
||
|
||
|
||
def main():
|
||
os.makedirs(os.path.dirname(OUT_CSV), exist_ok=True)
|
||
|
||
rows = [["Структура", "Режим", "Операция", "N", "Trial", "Время (сек)"]]
|
||
summary = [] # (structure, mode, op, n, mean, all_trials)
|
||
|
||
configs = [
|
||
("LinkedList", "shuffled", N),
|
||
("LinkedList", "sorted", N),
|
||
("HashTable", "shuffled", N),
|
||
("HashTable", "sorted", N),
|
||
("BST", "shuffled", N),
|
||
("BST", "sorted", N_BST_SORTED), # вырожденный случай
|
||
]
|
||
|
||
for structure, mode, n in configs:
|
||
print(f"==> {structure:10s} | {mode:9s} | N={n}")
|
||
runs = run_one(structure, mode, n, RNG_SEED)
|
||
# runs = [(insert, find, delete), ...]
|
||
ops = ["insert", "find", "delete"]
|
||
for op_idx, op in enumerate(ops):
|
||
vals = [r[op_idx] for r in runs]
|
||
mean = sum(vals) / len(vals)
|
||
for trial_idx, v in enumerate(vals):
|
||
rows.append([structure, mode, op, n, trial_idx + 1, f"{v:.6f}"])
|
||
summary.append((structure, mode, op, n, mean, vals))
|
||
print(f" {op:7s}: mean={mean*1000:.3f} ms "
|
||
f"runs={[f'{v*1000:.3f}' for v in vals]}")
|
||
|
||
# сводная строка со средними
|
||
rows.append([])
|
||
rows.append(["--- СРЕДНИЕ ---"])
|
||
rows.append(["Структура", "Режим", "Операция", "N",
|
||
"Среднее (сек)", "Все замеры (сек)"])
|
||
for s, mode, op, n, mean, vals in summary:
|
||
rows.append([s, mode, op, n, f"{mean:.6f}",
|
||
";".join(f"{v:.6f}" for v in vals)])
|
||
|
||
with open(OUT_CSV, "w", newline="", encoding="utf-8") as f:
|
||
writer = csv.writer(f)
|
||
writer.writerows(rows)
|
||
|
||
print(f"\nГотово. Результаты записаны в {OUT_CSV}")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|