"""Run performance experiments for the procedural phone book structures.""" import argparse import csv import html import math import random import time from pathlib import Path from phonebook import ( bst_delete, bst_find, bst_insert, create_hash_table, ht_delete, ht_find, ht_insert, ll_delete, ll_find, ll_insert, ) STRUCTURES = ("LinkedList", "HashTable", "BST") MODES = ("shuffled", "sorted") OPERATIONS = ("insert", "find", "delete") def generate_records(count): return [(f"User_{index:05d}", f"+7-900-{index:05d}") for index in range(count)] def prepare_records(count, seed): records_sorted = generate_records(count) records_shuffled = records_sorted[:] random.Random(seed).shuffle(records_shuffled) return { "sorted": records_sorted, "shuffled": records_shuffled, } def _insert_all(structure_name, records, bucket_count): if structure_name == "LinkedList": head = None for name, phone in records: head = ll_insert(head, name, phone) return head if structure_name == "HashTable": buckets = create_hash_table(bucket_count) for name, phone in records: ht_insert(buckets, name, phone) return buckets if structure_name == "BST": root = None for name, phone in records: root = bst_insert(root, name, phone) return root raise ValueError(f"Unknown structure: {structure_name}") def _find_all(structure_name, structure, names): if structure_name == "LinkedList": for name in names: ll_find(structure, name) return structure if structure_name == "HashTable": for name in names: ht_find(structure, name) return structure if structure_name == "BST": for name in names: bst_find(structure, name) return structure raise ValueError(f"Unknown structure: {structure_name}") def _delete_all(structure_name, structure, names): if structure_name == "LinkedList": head = structure for name in names: head = ll_delete(head, name) return head if structure_name == "HashTable": for name in names: ht_delete(structure, name) return structure if structure_name == "BST": root = structure for name in names: root = bst_delete(root, name) return root raise ValueError(f"Unknown structure: {structure_name}") def _elapsed(action): start = time.perf_counter() result = action() end = time.perf_counter() return result, end - start def run_experiment(count=10000, repeats=5, seed=42, bucket_count=20011): record_sets = prepare_records(count, seed) all_names = [name for name, _phone in record_sets["sorted"]] results = [] for structure_name in STRUCTURES: for mode in MODES: records = record_sets[mode] names_for_sampling = [name for name, _phone in records] for repeat in range(1, repeats + 1): rng = random.Random(seed + repeat * 1000 + len(structure_name) + len(mode)) find_existing = rng.sample(names_for_sampling, min(100, count)) find_missing = [f"None_{repeat}_{index}" for index in range(10)] find_names = find_existing + find_missing delete_names = rng.sample(all_names, min(50, count)) structure, insert_time = _elapsed( lambda: _insert_all(structure_name, records, bucket_count) ) results.append( { "structure": structure_name, "mode": mode, "operation": "insert", "repeat": repeat, "time_sec": insert_time, "n": count, "bucket_count": bucket_count if structure_name == "HashTable" else "", } ) structure, find_time = _elapsed( lambda: _find_all(structure_name, structure, find_names) ) results.append( { "structure": structure_name, "mode": mode, "operation": "find", "repeat": repeat, "time_sec": find_time, "n": count, "bucket_count": bucket_count if structure_name == "HashTable" else "", } ) structure, delete_time = _elapsed( lambda: _delete_all(structure_name, structure, delete_names) ) results.append( { "structure": structure_name, "mode": mode, "operation": "delete", "repeat": repeat, "time_sec": delete_time, "n": count, "bucket_count": bucket_count if structure_name == "HashTable" else "", } ) return results def summarize(results): grouped = {} for row in results: key = (row["structure"], row["mode"], row["operation"]) grouped.setdefault(key, []).append(row["time_sec"]) summary = [] for structure_name in STRUCTURES: for mode in MODES: for operation in OPERATIONS: values = grouped[(structure_name, mode, operation)] summary.append( { "structure": structure_name, "mode": mode, "operation": operation, "average_time_sec": sum(values) / len(values), "measurements_sec": ";".join(f"{value:.9f}" for value in values), } ) return summary def write_csv(path, rows, fieldnames): path.parent.mkdir(parents=True, exist_ok=True) with path.open("w", encoding="utf-8", newline="") as file: writer = csv.DictWriter(file, fieldnames=fieldnames) writer.writeheader() writer.writerows(rows) def write_chart(path, summary): try: import matplotlib.pyplot as plt except ModuleNotFoundError: write_svg_chart(path, summary) return labels = [ f"{row['structure']}\n{row['mode']}\n{row['operation']}" for row in summary ] values = [row["average_time_sec"] for row in summary] colors_by_operation = { "insert": "#4C78A8", "find": "#F58518", "delete": "#54A24B", } colors = [colors_by_operation[row["operation"]] for row in summary] path.parent.mkdir(parents=True, exist_ok=True) plt.figure(figsize=(14, 7)) plt.bar(range(len(values)), values, color=colors) plt.yscale("log") plt.ylabel("Среднее время, секунд (логарифмическая шкала)") plt.title("Сравнение операций телефонного справочника") plt.xticks(range(len(labels)), labels, rotation=45, ha="right", fontsize=8) plt.tight_layout() plt.savefig(path, dpi=160) plt.close() def write_svg_chart(path, summary): width = 1500 height = 760 margin_left = 90 margin_right = 40 margin_top = 70 margin_bottom = 210 plot_width = width - margin_left - margin_right plot_height = height - margin_top - margin_bottom baseline = margin_top + plot_height values = [max(row["average_time_sec"], 1e-12) for row in summary] log_min = math.floor(math.log10(min(values))) log_max = math.ceil(math.log10(max(values))) if log_min == log_max: log_min -= 1 log_max += 1 def y_for(value): log_value = math.log10(max(value, 1e-12)) return margin_top + (log_max - log_value) / (log_max - log_min) * plot_height colors_by_operation = { "insert": "#4C78A8", "find": "#F58518", "delete": "#54A24B", } slot_width = plot_width / len(summary) bar_width = slot_width * 0.62 lines = [ '', f'', '', '', f'Сравнение операций телефонного справочника', f'', f'', ] for exponent in range(log_min, log_max + 1): value = 10 ** exponent y = y_for(value) lines.append( f'' ) lines.append( f'1e{exponent}' ) for index, row in enumerate(summary): x = margin_left + index * slot_width + (slot_width - bar_width) / 2 y = y_for(row["average_time_sec"]) bar_height = baseline - y color = colors_by_operation[row["operation"]] label = f"{row['structure']} / {row['mode']} / {row['operation']}" lines.append( f'' ) lines.append( f'{row["average_time_sec"]:.3g}' ) lines.append( f'{html.escape(label)}' ) legend_x = margin_left legend_y = height - 30 for offset, (operation, color) in enumerate(colors_by_operation.items()): x = legend_x + offset * 130 lines.append(f'') lines.append(f'{operation}') lines.append( f'Среднее время, секунд (логарифмическая шкала)' ) lines.append("") path.parent.mkdir(parents=True, exist_ok=True) path.write_text("\n".join(lines), encoding="utf-8") def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("--n", type=int, default=10000, help="number of generated records") parser.add_argument("--repeats", type=int, default=5, help="number of repeated measurements") parser.add_argument("--seed", type=int, default=42, help="random seed") parser.add_argument("--bucket-count", type=int, default=20011, help="hash-table bucket count") parser.add_argument("--output-dir", type=Path, default=Path("docs/data")) args = parser.parse_args() results = run_experiment( count=args.n, repeats=args.repeats, seed=args.seed, bucket_count=args.bucket_count, ) summary = summarize(results) write_csv( args.output_dir / "results.csv", results, ["structure", "mode", "operation", "repeat", "time_sec", "n", "bucket_count"], ) write_csv( args.output_dir / "summary.csv", summary, ["structure", "mode", "operation", "average_time_sec", "measurements_sec"], ) chart_path = args.output_dir / "performance.svg" write_chart(chart_path, summary) print(f"Saved detailed results to {args.output_dir / 'results.csv'}") print(f"Saved summary to {args.output_dir / 'summary.csv'}") print(f"Saved chart to {chart_path}") if __name__ == "__main__": main()