add experiment.py
This commit is contained in:
parent
666bc0af37
commit
2ff608b88f
194
SobolevNS/docs/data/task1_data_structures/experiment.py
Normal file
194
SobolevNS/docs/data/task1_data_structures/experiment.py
Normal file
|
|
@ -0,0 +1,194 @@
|
|||
"""
|
||||
experiment.py
|
||||
|
||||
Замеры производительности трёх структур данных на одних и тех же данных:
|
||||
LinkedList, HashTable, BST
|
||||
в двух режимах:
|
||||
случайный порядок (shuffled), отсортированный порядок (sorted)
|
||||
для трёх операций:
|
||||
insert N записей, find 110 раз, delete 50 раз
|
||||
Каждый эксперимент повторяется TRIALS раз. Сохраняем все замеры + средние
|
||||
в CSV. Для BST на отсортированных данных снижаем N - иначе эксперимент
|
||||
длится десятки минут (вырожденное дерево, O(N^2) вставка).
|
||||
"""
|
||||
|
||||
import csv
|
||||
import os
|
||||
import random
|
||||
import time
|
||||
|
||||
import phonebook as pb
|
||||
|
||||
|
||||
# ---------- параметры эксперимента ----------
|
||||
N = 10_000 # число записей в основном эксперименте
|
||||
N_BST_SORTED = 2_000 # для BST на отсортированных данных - меньше (O(N^2))
|
||||
TRIALS = 5 # количество повторов каждого замера
|
||||
N_FIND_EXISTING = 100
|
||||
N_FIND_MISSING = 10
|
||||
N_DELETE = 50
|
||||
HT_SIZE = 2048 # размер хеш-таблицы
|
||||
RNG_SEED = 42
|
||||
OUT_CSV = os.path.join("docs", "data", "results.csv")
|
||||
# --------------------------------------------
|
||||
|
||||
|
||||
def gen_records(n):
|
||||
"""Генерирует n записей вида ('User_00001', '555-0001-...')."""
|
||||
return [(f"User_{i:05d}", f"555-{i:07d}") for i in range(n)]
|
||||
|
||||
|
||||
def pick_keys(records, k_exist, k_miss, rng):
|
||||
"""Выбирает k_exist существующих имён и k_miss отсутствующих."""
|
||||
existing = [name for name, _ in rng.sample(records, k_exist)]
|
||||
missing = [f"None_{i}" for i in range(k_miss)]
|
||||
return existing + missing
|
||||
|
||||
|
||||
# ---------- замеры по структурам ----------
|
||||
|
||||
def measure_linked_list(records, find_keys, delete_keys):
|
||||
# вставка
|
||||
t0 = time.perf_counter()
|
||||
head = pb.ll_create()
|
||||
for name, phone in records:
|
||||
head = pb.ll_insert(head, name, phone)
|
||||
t_insert = time.perf_counter() - t0
|
||||
|
||||
# поиск
|
||||
t0 = time.perf_counter()
|
||||
for name in find_keys:
|
||||
pb.ll_find(head, name)
|
||||
t_find = time.perf_counter() - t0
|
||||
|
||||
# удаление
|
||||
t0 = time.perf_counter()
|
||||
for name in delete_keys:
|
||||
head = pb.ll_delete(head, name)
|
||||
t_delete = time.perf_counter() - t0
|
||||
|
||||
return t_insert, t_find, t_delete
|
||||
|
||||
|
||||
def measure_hash_table(records, find_keys, delete_keys):
|
||||
t0 = time.perf_counter()
|
||||
ht = pb.ht_create(size=HT_SIZE)
|
||||
for name, phone in records:
|
||||
pb.ht_insert(ht, name, phone)
|
||||
t_insert = time.perf_counter() - t0
|
||||
|
||||
t0 = time.perf_counter()
|
||||
for name in find_keys:
|
||||
pb.ht_find(ht, name)
|
||||
t_find = time.perf_counter() - t0
|
||||
|
||||
t0 = time.perf_counter()
|
||||
for name in delete_keys:
|
||||
pb.ht_delete(ht, name)
|
||||
t_delete = time.perf_counter() - t0
|
||||
|
||||
return t_insert, t_find, t_delete
|
||||
|
||||
|
||||
def measure_bst(records, find_keys, delete_keys):
|
||||
t0 = time.perf_counter()
|
||||
root = pb.bst_create()
|
||||
for name, phone in records:
|
||||
root = pb.bst_insert(root, name, phone)
|
||||
t_insert = time.perf_counter() - t0
|
||||
|
||||
t0 = time.perf_counter()
|
||||
for name in find_keys:
|
||||
pb.bst_find(root, name)
|
||||
t_find = time.perf_counter() - t0
|
||||
|
||||
t0 = time.perf_counter()
|
||||
for name in delete_keys:
|
||||
root = pb.bst_delete(root, name)
|
||||
t_delete = time.perf_counter() - t0
|
||||
|
||||
return t_insert, t_find, t_delete
|
||||
|
||||
|
||||
# ---------- запуск ----------
|
||||
|
||||
def run_one(structure_name, mode, n, rng_seed):
|
||||
"""Готовит данные, прогоняет TRIALS раз и возвращает список (insert, find, delete)."""
|
||||
base_records = gen_records(n)
|
||||
|
||||
runs = []
|
||||
for trial in range(TRIALS):
|
||||
# отдельный rng для воспроизводимости и независимости попыток
|
||||
rng = random.Random(rng_seed + trial)
|
||||
|
||||
if mode == "shuffled":
|
||||
records = base_records[:]
|
||||
rng.shuffle(records)
|
||||
elif mode == "sorted":
|
||||
records = sorted(base_records, key=lambda x: x[0])
|
||||
else:
|
||||
raise ValueError(mode)
|
||||
|
||||
find_keys = pick_keys(records, N_FIND_EXISTING, N_FIND_MISSING, rng)
|
||||
delete_keys = [name for name, _ in rng.sample(records, N_DELETE)]
|
||||
|
||||
if structure_name == "LinkedList":
|
||||
r = measure_linked_list(records, find_keys, delete_keys)
|
||||
elif structure_name == "HashTable":
|
||||
r = measure_hash_table(records, find_keys, delete_keys)
|
||||
elif structure_name == "BST":
|
||||
r = measure_bst(records, find_keys, delete_keys)
|
||||
else:
|
||||
raise ValueError(structure_name)
|
||||
|
||||
runs.append(r)
|
||||
return runs
|
||||
|
||||
|
||||
def main():
|
||||
os.makedirs(os.path.dirname(OUT_CSV), exist_ok=True)
|
||||
|
||||
rows = [["Структура", "Режим", "Операция", "N", "Trial", "Время (сек)"]]
|
||||
summary = [] # (structure, mode, op, n, mean, all_trials)
|
||||
|
||||
configs = [
|
||||
("LinkedList", "shuffled", N),
|
||||
("LinkedList", "sorted", N),
|
||||
("HashTable", "shuffled", N),
|
||||
("HashTable", "sorted", N),
|
||||
("BST", "shuffled", N),
|
||||
("BST", "sorted", N_BST_SORTED), # вырожденный случай
|
||||
]
|
||||
|
||||
for structure, mode, n in configs:
|
||||
print(f"==> {structure:10s} | {mode:9s} | N={n}")
|
||||
runs = run_one(structure, mode, n, RNG_SEED)
|
||||
# runs = [(insert, find, delete), ...]
|
||||
ops = ["insert", "find", "delete"]
|
||||
for op_idx, op in enumerate(ops):
|
||||
vals = [r[op_idx] for r in runs]
|
||||
mean = sum(vals) / len(vals)
|
||||
for trial_idx, v in enumerate(vals):
|
||||
rows.append([structure, mode, op, n, trial_idx + 1, f"{v:.6f}"])
|
||||
summary.append((structure, mode, op, n, mean, vals))
|
||||
print(f" {op:7s}: mean={mean*1000:.3f} ms "
|
||||
f"runs={[f'{v*1000:.3f}' for v in vals]}")
|
||||
|
||||
# сводная строка со средними
|
||||
rows.append([])
|
||||
rows.append(["--- СРЕДНИЕ ---"])
|
||||
rows.append(["Структура", "Режим", "Операция", "N",
|
||||
"Среднее (сек)", "Все замеры (сек)"])
|
||||
for s, mode, op, n, mean, vals in summary:
|
||||
rows.append([s, mode, op, n, f"{mean:.6f}",
|
||||
";".join(f"{v:.6f}" for v in vals)])
|
||||
|
||||
with open(OUT_CSV, "w", newline="", encoding="utf-8") as f:
|
||||
writer = csv.writer(f)
|
||||
writer.writerows(rows)
|
||||
|
||||
print(f"\nГотово. Результаты записаны в {OUT_CSV}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Reference in New Issue
Block a user