2026-rff_mp/BolonkinNM/Task 1/experiments.py
2026-05-23 18:45:47 +03:00

173 lines
5.7 KiB
Python

from __future__ import annotations
import csv
import random
import time
from pathlib import Path
from typing import Dict, List, Tuple
from linked_list import ll_insert, ll_find, ll_delete
from hash_table import ht_insert, ht_find, ht_delete
from bst import bst_insert, bst_find, bst_delete
from utils import generate_records, prepare_records_variants
Record = Tuple[str, str]
def make_missing_names(count: int = 10) -> List[str]:
return [f"None_{i}" for i in range(count)]
def pick_existing_names(records: List[Record], count: int, seed: int = 42) -> List[str]:
rng = random.Random(seed)
unique_names = list(dict.fromkeys(name for name, _ in records))
if len(unique_names) < count:
raise ValueError(f"Not enough unique names: need {count}, got {len(unique_names)}")
return rng.sample(unique_names, count)
def pick_delete_names(records: List[Record], count: int = 50, seed: int = 43) -> List[str]:
rng = random.Random(seed)
unique_names = list(dict.fromkeys(name for name, _ in records))
if len(unique_names) < count:
raise ValueError(f"Not enough unique names: need {count}, got {len(unique_names)}")
return rng.sample(unique_names, count)
def build_structure(structure_name: str, records: List[Record], buckets_count: int = 2048):
if structure_name == "linked_list":
structure = None
for name, phone in records:
structure = ll_insert(structure, name, phone)
return structure
if structure_name == "hash_table":
buckets = [None] * buckets_count
for name, phone in records:
buckets = ht_insert(buckets, name, phone)
return buckets
if structure_name == "bst":
root = None
for name, phone in records:
root = bst_insert(root, name, phone)
return root
raise ValueError(f"Unknown structure: {structure_name}")
def do_find(structure_name: str, structure: object, existing_names: List[str], missing_names: List[str]) -> None:
if structure_name == "linked_list":
for name in existing_names:
ll_find(structure, name)
for name in missing_names:
ll_find(structure, name)
return
if structure_name == "hash_table":
for name in existing_names:
ht_find(structure, name)
for name in missing_names:
ht_find(structure, name)
return
if structure_name == "bst":
for name in existing_names:
bst_find(structure, name)
for name in missing_names:
bst_find(structure, name)
return
raise ValueError(f"Unknown structure: {structure_name}")
def do_delete(structure_name: str, structure: object, delete_names: List[str]):
if structure_name == "linked_list":
for name in delete_names:
structure = ll_delete(structure, name)
return structure
if structure_name == "hash_table":
for name in delete_names:
structure = ht_delete(structure, name)
return structure
if structure_name == "bst":
for name in delete_names:
structure = bst_delete(structure, name)
return structure
raise ValueError(f"Unknown structure: {structure_name}")
def measure_once(structure_name: str, records: List[Record], buckets_count: int = 2048) -> Dict[str, float]:
existing_names = pick_existing_names(records, 100, seed=42)
missing_names = make_missing_names(10)
delete_names = pick_delete_names(records, 50, seed=43)
start = time.perf_counter()
structure = build_structure(structure_name, records, buckets_count=buckets_count)
insert_time = time.perf_counter() - start
start = time.perf_counter()
do_find(structure_name, structure, existing_names, missing_names)
find_time = time.perf_counter() - start
start = time.perf_counter()
structure = do_delete(structure_name, structure, delete_names)
delete_time = time.perf_counter() - start
return {"insert": insert_time, "find": find_time, "delete": delete_time}
def run_experiments(n: int = 10000, buckets_count: int = 2048, repeats: int = 5):
records = generate_records(n, repeat_names=False)
records_shuffled, records_sorted = prepare_records_variants(records)
datasets = [
("случайный", records_shuffled),
("отсортированный", records_sorted),
]
structures = [
("LinkedList", "linked_list"),
("HashTable", "hash_table"),
("BST", "bst"),
]
operations = ("insert", "find", "delete")
rows = [["Структура", "Режим", "Операция", "Замер", "Время (сек)"]]
for mode_name, dataset_records in datasets:
for human_name, structure_name in structures:
times_by_op = {op: [] for op in operations}
for attempt in range(1, repeats + 1):
result = measure_once(structure_name, dataset_records, buckets_count=buckets_count)
for op_name in operations:
elapsed = result[op_name]
times_by_op[op_name].append(elapsed)
rows.append([human_name, mode_name, op_name, attempt, f"{elapsed:.10f}"])
for op_name in operations:
avg_time = sum(times_by_op[op_name]) / len(times_by_op[op_name])
rows.append([human_name, mode_name, op_name, "среднее", f"{avg_time:.10f}"])
return rows
def save_results_csv(rows, filename: str = "results.csv"):
with open(filename, "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerows(rows)
def main():
rows = run_experiments(n=10000, buckets_count=2048, repeats=5)
save_results_csv(rows, "results.csv")
print("Saved results.csv")
if __name__ == "__main__":
main()