From e10b075b065cb1b8d465154241448d906d6403c0 Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 19 May 2026 22:11:31 +0300 Subject: [PATCH] [1] Task 1 --- shahovaa/zadanie1/.gitignore | 3 + shahovaa/zadanie1/README.md | 24 + shahovaa/zadanie1/benchmark.py | 359 +++ shahovaa/zadanie1/docs/data/performance.svg | 2431 +++++++++++++++++++ shahovaa/zadanie1/docs/data/results.csv | 91 + shahovaa/zadanie1/docs/data/summary.csv | 19 + shahovaa/zadanie1/docs/report.md | 112 + shahovaa/zadanie1/phonebook.py | 255 ++ shahovaa/zadanie1/requirements.txt | 1 + 9 files changed, 3295 insertions(+) create mode 100644 shahovaa/zadanie1/.gitignore create mode 100644 shahovaa/zadanie1/README.md create mode 100644 shahovaa/zadanie1/benchmark.py create mode 100644 shahovaa/zadanie1/docs/data/performance.svg create mode 100644 shahovaa/zadanie1/docs/data/results.csv create mode 100644 shahovaa/zadanie1/docs/data/summary.csv create mode 100644 shahovaa/zadanie1/docs/report.md create mode 100644 shahovaa/zadanie1/phonebook.py create mode 100644 shahovaa/zadanie1/requirements.txt diff --git a/shahovaa/zadanie1/.gitignore b/shahovaa/zadanie1/.gitignore new file mode 100644 index 0000000..4a5bb25 --- /dev/null +++ b/shahovaa/zadanie1/.gitignore @@ -0,0 +1,3 @@ +__pycache__/ +*.py[cod] +.DS_Store diff --git a/shahovaa/zadanie1/README.md b/shahovaa/zadanie1/README.md new file mode 100644 index 0000000..3a1b5cf --- /dev/null +++ b/shahovaa/zadanie1/README.md @@ -0,0 +1,24 @@ +# Задание 1: структуры данных + +Реализация телефонного справочника на трех структурах данных без классов: + +- связный список; +- хеш-таблица с цепочками; +- двоичное дерево поиска. + +## Запуск + +Проверка базовых операций: + +```bash +python3 phonebook.py +``` + +Экспериментальные замеры и построение графика: + +```bash +python3 benchmark.py +``` + +По умолчанию используется `N = 10000`, `5` повторов, результаты сохраняются в +`docs/data/results.csv`, `docs/data/summary.csv` и `docs/data/performance.svg`. diff --git a/shahovaa/zadanie1/benchmark.py b/shahovaa/zadanie1/benchmark.py new file mode 100644 index 0000000..c1f1069 --- /dev/null +++ b/shahovaa/zadanie1/benchmark.py @@ -0,0 +1,359 @@ +"""Run performance experiments for the procedural phone book structures.""" + +import argparse +import csv +import html +import math +import random +import time +from pathlib import Path + +from phonebook import ( + bst_delete, + bst_find, + bst_insert, + create_hash_table, + ht_delete, + ht_find, + ht_insert, + ll_delete, + ll_find, + ll_insert, +) + + +STRUCTURES = ("LinkedList", "HashTable", "BST") +MODES = ("shuffled", "sorted") +OPERATIONS = ("insert", "find", "delete") + + +def generate_records(count): + return [(f"User_{index:05d}", f"+7-900-{index:05d}") for index in range(count)] + + +def prepare_records(count, seed): + records_sorted = generate_records(count) + records_shuffled = records_sorted[:] + random.Random(seed).shuffle(records_shuffled) + return { + "sorted": records_sorted, + "shuffled": records_shuffled, + } + + +def _insert_all(structure_name, records, bucket_count): + if structure_name == "LinkedList": + head = None + for name, phone in records: + head = ll_insert(head, name, phone) + return head + + if structure_name == "HashTable": + buckets = create_hash_table(bucket_count) + for name, phone in records: + ht_insert(buckets, name, phone) + return buckets + + if structure_name == "BST": + root = None + for name, phone in records: + root = bst_insert(root, name, phone) + return root + + raise ValueError(f"Unknown structure: {structure_name}") + + +def _find_all(structure_name, structure, names): + if structure_name == "LinkedList": + for name in names: + ll_find(structure, name) + return structure + + if structure_name == "HashTable": + for name in names: + ht_find(structure, name) + return structure + + if structure_name == "BST": + for name in names: + bst_find(structure, name) + return structure + + raise ValueError(f"Unknown structure: {structure_name}") + + +def _delete_all(structure_name, structure, names): + if structure_name == "LinkedList": + head = structure + for name in names: + head = ll_delete(head, name) + return head + + if structure_name == "HashTable": + for name in names: + ht_delete(structure, name) + return structure + + if structure_name == "BST": + root = structure + for name in names: + root = bst_delete(root, name) + return root + + raise ValueError(f"Unknown structure: {structure_name}") + + +def _elapsed(action): + start = time.perf_counter() + result = action() + end = time.perf_counter() + return result, end - start + + +def run_experiment(count=10000, repeats=5, seed=42, bucket_count=20011): + record_sets = prepare_records(count, seed) + all_names = [name for name, _phone in record_sets["sorted"]] + results = [] + + for structure_name in STRUCTURES: + for mode in MODES: + records = record_sets[mode] + names_for_sampling = [name for name, _phone in records] + + for repeat in range(1, repeats + 1): + rng = random.Random(seed + repeat * 1000 + len(structure_name) + len(mode)) + find_existing = rng.sample(names_for_sampling, min(100, count)) + find_missing = [f"None_{repeat}_{index}" for index in range(10)] + find_names = find_existing + find_missing + delete_names = rng.sample(all_names, min(50, count)) + + structure, insert_time = _elapsed( + lambda: _insert_all(structure_name, records, bucket_count) + ) + results.append( + { + "structure": structure_name, + "mode": mode, + "operation": "insert", + "repeat": repeat, + "time_sec": insert_time, + "n": count, + "bucket_count": bucket_count if structure_name == "HashTable" else "", + } + ) + + structure, find_time = _elapsed( + lambda: _find_all(structure_name, structure, find_names) + ) + results.append( + { + "structure": structure_name, + "mode": mode, + "operation": "find", + "repeat": repeat, + "time_sec": find_time, + "n": count, + "bucket_count": bucket_count if structure_name == "HashTable" else "", + } + ) + + structure, delete_time = _elapsed( + lambda: _delete_all(structure_name, structure, delete_names) + ) + results.append( + { + "structure": structure_name, + "mode": mode, + "operation": "delete", + "repeat": repeat, + "time_sec": delete_time, + "n": count, + "bucket_count": bucket_count if structure_name == "HashTable" else "", + } + ) + + return results + + +def summarize(results): + grouped = {} + for row in results: + key = (row["structure"], row["mode"], row["operation"]) + grouped.setdefault(key, []).append(row["time_sec"]) + + summary = [] + for structure_name in STRUCTURES: + for mode in MODES: + for operation in OPERATIONS: + values = grouped[(structure_name, mode, operation)] + summary.append( + { + "structure": structure_name, + "mode": mode, + "operation": operation, + "average_time_sec": sum(values) / len(values), + "measurements_sec": ";".join(f"{value:.9f}" for value in values), + } + ) + return summary + + +def write_csv(path, rows, fieldnames): + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("w", encoding="utf-8", newline="") as file: + writer = csv.DictWriter(file, fieldnames=fieldnames) + writer.writeheader() + writer.writerows(rows) + + +def write_chart(path, summary): + try: + import matplotlib.pyplot as plt + except ModuleNotFoundError: + write_svg_chart(path, summary) + return + + labels = [ + f"{row['structure']}\n{row['mode']}\n{row['operation']}" + for row in summary + ] + values = [row["average_time_sec"] for row in summary] + colors_by_operation = { + "insert": "#4C78A8", + "find": "#F58518", + "delete": "#54A24B", + } + colors = [colors_by_operation[row["operation"]] for row in summary] + + path.parent.mkdir(parents=True, exist_ok=True) + plt.figure(figsize=(14, 7)) + plt.bar(range(len(values)), values, color=colors) + plt.yscale("log") + plt.ylabel("Среднее время, секунд (логарифмическая шкала)") + plt.title("Сравнение операций телефонного справочника") + plt.xticks(range(len(labels)), labels, rotation=45, ha="right", fontsize=8) + plt.tight_layout() + plt.savefig(path, dpi=160) + plt.close() + + +def write_svg_chart(path, summary): + width = 1500 + height = 760 + margin_left = 90 + margin_right = 40 + margin_top = 70 + margin_bottom = 210 + plot_width = width - margin_left - margin_right + plot_height = height - margin_top - margin_bottom + baseline = margin_top + plot_height + + values = [max(row["average_time_sec"], 1e-12) for row in summary] + log_min = math.floor(math.log10(min(values))) + log_max = math.ceil(math.log10(max(values))) + if log_min == log_max: + log_min -= 1 + log_max += 1 + + def y_for(value): + log_value = math.log10(max(value, 1e-12)) + return margin_top + (log_max - log_value) / (log_max - log_min) * plot_height + + colors_by_operation = { + "insert": "#4C78A8", + "find": "#F58518", + "delete": "#54A24B", + } + slot_width = plot_width / len(summary) + bar_width = slot_width * 0.62 + + lines = [ + '', + f'', + '', + '', + f'Сравнение операций телефонного справочника', + f'', + f'', + ] + + for exponent in range(log_min, log_max + 1): + value = 10 ** exponent + y = y_for(value) + lines.append( + f'' + ) + lines.append( + f'1e{exponent}' + ) + + for index, row in enumerate(summary): + x = margin_left + index * slot_width + (slot_width - bar_width) / 2 + y = y_for(row["average_time_sec"]) + bar_height = baseline - y + color = colors_by_operation[row["operation"]] + label = f"{row['structure']} / {row['mode']} / {row['operation']}" + + lines.append( + f'' + ) + lines.append( + f'{row["average_time_sec"]:.3g}' + ) + lines.append( + f'{html.escape(label)}' + ) + + legend_x = margin_left + legend_y = height - 30 + for offset, (operation, color) in enumerate(colors_by_operation.items()): + x = legend_x + offset * 130 + lines.append(f'') + lines.append(f'{operation}') + + lines.append( + f'Среднее время, секунд (логарифмическая шкала)' + ) + lines.append("") + + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text("\n".join(lines), encoding="utf-8") + + +def main(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--n", type=int, default=10000, help="number of generated records") + parser.add_argument("--repeats", type=int, default=5, help="number of repeated measurements") + parser.add_argument("--seed", type=int, default=42, help="random seed") + parser.add_argument("--bucket-count", type=int, default=20011, help="hash-table bucket count") + parser.add_argument("--output-dir", type=Path, default=Path("docs/data")) + args = parser.parse_args() + + results = run_experiment( + count=args.n, + repeats=args.repeats, + seed=args.seed, + bucket_count=args.bucket_count, + ) + summary = summarize(results) + + write_csv( + args.output_dir / "results.csv", + results, + ["structure", "mode", "operation", "repeat", "time_sec", "n", "bucket_count"], + ) + write_csv( + args.output_dir / "summary.csv", + summary, + ["structure", "mode", "operation", "average_time_sec", "measurements_sec"], + ) + chart_path = args.output_dir / "performance.svg" + write_chart(chart_path, summary) + + print(f"Saved detailed results to {args.output_dir / 'results.csv'}") + print(f"Saved summary to {args.output_dir / 'summary.csv'}") + print(f"Saved chart to {chart_path}") + + +if __name__ == "__main__": + main() diff --git a/shahovaa/zadanie1/docs/data/performance.svg b/shahovaa/zadanie1/docs/data/performance.svg new file mode 100644 index 0000000..5f3cc72 --- /dev/null +++ b/shahovaa/zadanie1/docs/data/performance.svg @@ -0,0 +1,2431 @@ + + + + + + + + 2026-05-19T21:32:18.823317 + image/svg+xml + + + Matplotlib v3.10.9, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/shahovaa/zadanie1/docs/data/results.csv b/shahovaa/zadanie1/docs/data/results.csv new file mode 100644 index 0000000..38ba829 --- /dev/null +++ b/shahovaa/zadanie1/docs/data/results.csv @@ -0,0 +1,91 @@ +structure,mode,operation,repeat,time_sec,n,bucket_count +LinkedList,shuffled,insert,1,1.5487497089998215,10000, +LinkedList,shuffled,find,1,0.013355207998756669,10000, +LinkedList,shuffled,delete,1,0.006138000000646571,10000, +LinkedList,shuffled,insert,2,1.6062446670002828,10000, +LinkedList,shuffled,find,2,0.014175791999150533,10000, +LinkedList,shuffled,delete,2,0.007367083000644925,10000, +LinkedList,shuffled,insert,3,1.5470056670001213,10000, +LinkedList,shuffled,find,3,0.014115500000116299,10000, +LinkedList,shuffled,delete,3,0.006011666999256704,10000, +LinkedList,shuffled,insert,4,1.5362317910003185,10000, +LinkedList,shuffled,find,4,0.01460650000080932,10000, +LinkedList,shuffled,delete,4,0.006377084000632749,10000, +LinkedList,shuffled,insert,5,1.541476624999632,10000, +LinkedList,shuffled,find,5,0.014646625000750646,10000, +LinkedList,shuffled,delete,5,0.005829540999911842,10000, +LinkedList,sorted,insert,1,1.4639895000000251,10000, +LinkedList,sorted,find,1,0.012882999999419553,10000, +LinkedList,sorted,delete,1,0.005734124999435153,10000, +LinkedList,sorted,insert,2,1.4757493329998397,10000, +LinkedList,sorted,find,2,0.013435208000373677,10000, +LinkedList,sorted,delete,2,0.006567624999661348,10000, +LinkedList,sorted,insert,3,1.474924916999953,10000, +LinkedList,sorted,find,3,0.012946166998517583,10000, +LinkedList,sorted,delete,3,0.005636875001073349,10000, +LinkedList,sorted,insert,4,1.6074728750008944,10000, +LinkedList,sorted,find,4,0.012849667000409681,10000, +LinkedList,sorted,delete,4,0.006610207999983686,10000, +LinkedList,sorted,insert,5,1.5465652919992863,10000, +LinkedList,sorted,find,5,0.012851292000050307,10000, +LinkedList,sorted,delete,5,0.005656833000102779,10000, +HashTable,shuffled,insert,1,0.005485583000336192,10000,20011 +HashTable,shuffled,find,1,5.770799907622859e-05,10000,20011 +HashTable,shuffled,delete,1,3.570800072338898e-05,10000,20011 +HashTable,shuffled,insert,2,0.006064958999559167,10000,20011 +HashTable,shuffled,find,2,5.854200026078615e-05,10000,20011 +HashTable,shuffled,delete,2,3.495800046948716e-05,10000,20011 +HashTable,shuffled,insert,3,0.005850707999343285,10000,20011 +HashTable,shuffled,find,3,5.441699977382086e-05,10000,20011 +HashTable,shuffled,delete,3,2.7292000595480204e-05,10000,20011 +HashTable,shuffled,insert,4,0.005818375000671949,10000,20011 +HashTable,shuffled,find,4,5.387499913922511e-05,10000,20011 +HashTable,shuffled,delete,4,2.683300044736825e-05,10000,20011 +HashTable,shuffled,insert,5,0.006451041999753215,10000,20011 +HashTable,shuffled,find,5,5.6000000768108293e-05,10000,20011 +HashTable,shuffled,delete,5,2.937499994004611e-05,10000,20011 +HashTable,sorted,insert,1,0.005557000000408152,10000,20011 +HashTable,sorted,find,1,5.608300125459209e-05,10000,20011 +HashTable,sorted,delete,1,2.8624999686144292e-05,10000,20011 +HashTable,sorted,insert,2,0.005895457999940845,10000,20011 +HashTable,sorted,find,2,6.0874999689986e-05,10000,20011 +HashTable,sorted,delete,2,3.199999991920777e-05,10000,20011 +HashTable,sorted,insert,3,0.005766083999333205,10000,20011 +HashTable,sorted,find,3,5.500000042957254e-05,10000,20011 +HashTable,sorted,delete,3,2.7874999432242475e-05,10000,20011 +HashTable,sorted,insert,4,0.005590124999798718,10000,20011 +HashTable,sorted,find,4,5.337499896995723e-05,10000,20011 +HashTable,sorted,delete,4,2.6959000024362467e-05,10000,20011 +HashTable,sorted,insert,5,0.007889499998782412,10000,20011 +HashTable,sorted,find,5,5.549999877985101e-05,10000,20011 +HashTable,sorted,delete,5,2.7749998480430804e-05,10000,20011 +BST,shuffled,insert,1,0.011201125000297907,10000, +BST,shuffled,find,1,9.245900037058163e-05,10000, +BST,shuffled,delete,1,6.958300036785658e-05,10000, +BST,shuffled,insert,2,0.011337707999700797,10000, +BST,shuffled,find,2,9.545799912302755e-05,10000, +BST,shuffled,delete,2,7.141599962778855e-05,10000, +BST,shuffled,insert,3,0.01119999999900756,10000, +BST,shuffled,find,3,9.308299922849983e-05,10000, +BST,shuffled,delete,3,6.779199975426309e-05,10000, +BST,shuffled,insert,4,0.011189917000592686,10000, +BST,shuffled,find,4,9.675000001152512e-05,10000, +BST,shuffled,delete,4,6.624999878113158e-05,10000, +BST,shuffled,insert,5,0.01118529100131127,10000, +BST,shuffled,find,5,8.670799979881849e-05,10000, +BST,shuffled,delete,5,6.904200017743278e-05,10000, +BST,sorted,insert,1,2.2425066659998265,10000, +BST,sorted,find,1,0.018234625000332016,10000, +BST,sorted,delete,1,0.010230416999547742,10000, +BST,sorted,insert,2,2.26542979199985,10000, +BST,sorted,find,2,0.021546082998611382,10000, +BST,sorted,delete,2,0.011778292000599322,10000, +BST,sorted,insert,3,2.246992708000107,10000, +BST,sorted,find,3,0.01936033300080453,10000, +BST,sorted,delete,3,0.010003166000387864,10000, +BST,sorted,insert,4,2.2515108749994397,10000, +BST,sorted,find,4,0.021122417001606664,10000, +BST,sorted,delete,4,0.01173120800012839,10000, +BST,sorted,insert,5,2.2457697090012516,10000, +BST,sorted,find,5,0.01902170900029887,10000, +BST,sorted,delete,5,0.010273834001054638,10000, diff --git a/shahovaa/zadanie1/docs/data/summary.csv b/shahovaa/zadanie1/docs/data/summary.csv new file mode 100644 index 0000000..91ac219 --- /dev/null +++ b/shahovaa/zadanie1/docs/data/summary.csv @@ -0,0 +1,19 @@ +structure,mode,operation,average_time_sec,measurements_sec +LinkedList,shuffled,insert,1.5559416918000353,1.548749709;1.606244667;1.547005667;1.536231791;1.541476625 +LinkedList,shuffled,find,0.014179924999916693,0.013355208;0.014175792;0.014115500;0.014606500;0.014646625 +LinkedList,shuffled,delete,0.006344675000218558,0.006138000;0.007367083;0.006011667;0.006377084;0.005829541 +LinkedList,sorted,insert,1.5137403833999996,1.463989500;1.475749333;1.474924917;1.607472875;1.546565292 +LinkedList,sorted,find,0.01299306679975416,0.012883000;0.013435208;0.012946167;0.012849667;0.012851292 +LinkedList,sorted,delete,0.006041133200051263,0.005734125;0.006567625;0.005636875;0.006610208;0.005656833 +HashTable,shuffled,insert,0.005934133399932762,0.005485583;0.006064959;0.005850708;0.005818375;0.006451042 +HashTable,shuffled,find,5.61083998036338e-05,0.000057708;0.000058542;0.000054417;0.000053875;0.000056000 +HashTable,shuffled,delete,3.083320043515414e-05,0.000035708;0.000034958;0.000027292;0.000026833;0.000029375 +HashTable,sorted,insert,0.006139633399652666,0.005557000;0.005895458;0.005766084;0.005590125;0.007889500 +HashTable,sorted,find,5.6166599824791776e-05,0.000056083;0.000060875;0.000055000;0.000053375;0.000055500 +HashTable,sorted,delete,2.8641799508477563e-05,0.000028625;0.000032000;0.000027875;0.000026959;0.000027750 +BST,shuffled,insert,0.011222808200182044,0.011201125;0.011337708;0.011200000;0.011189917;0.011185291 +BST,shuffled,find,9.289159970649052e-05,0.000092459;0.000095458;0.000093083;0.000096750;0.000086708 +BST,shuffled,delete,6.881659974169451e-05,0.000069583;0.000071416;0.000067792;0.000066250;0.000069042 +BST,sorted,insert,2.250441950000095,2.242506666;2.265429792;2.246992708;2.251510875;2.245769709 +BST,sorted,find,0.019857033400330692,0.018234625;0.021546083;0.019360333;0.021122417;0.019021709 +BST,sorted,delete,0.010803383400343591,0.010230417;0.011778292;0.010003166;0.011731208;0.010273834 diff --git a/shahovaa/zadanie1/docs/report.md b/shahovaa/zadanie1/docs/report.md new file mode 100644 index 0000000..d8fb3fe --- /dev/null +++ b/shahovaa/zadanie1/docs/report.md @@ -0,0 +1,112 @@ +# Отчет по заданию 1: структуры данных + +## Цель + +Реализовать три структуры данных с нуля в процедурной парадигме и сравнить +скорость основных операций телефонного справочника: + +- `insert(name, phone)` - добавить или обновить запись; +- `find(name)` - найти телефон по имени; +- `delete(name)` - удалить запись; +- `list_all()` - получить все записи, отсортированные по имени. + +Классы не использовались. Узлы связного списка и дерева представлены +словарями, хеш-таблица представлена списком бакетов. + +## Реализация + +Код находится в файле `phonebook.py`. + +Реализованы функции: + +- связный список: `ll_insert`, `ll_find`, `ll_delete`, `ll_list_all`; +- хеш-таблица: `create_hash_table`, `ht_insert`, `ht_find`, `ht_delete`, `ht_list_all`; +- двоичное дерево поиска: `bst_insert`, `bst_find`, `bst_delete`, `bst_list_all`. + +Для хеш-таблицы используется метод цепочек: каждый бакет хранит голову +связного списка. Хеш-функция написана вручную, чтобы результат не зависел от +рандомизации встроенной функции `hash()` в Python. + +Для BST вставка, поиск, удаление и обход написаны без классов. Обход +`bst_list_all` реализован итеративно, чтобы отсортированный вход на 10000 +элементов не приводил к переполнению стека рекурсии. + +## Методика эксперимента + +Скрипт эксперимента находится в файле `benchmark.py`. + +Параметры запуска: + +- количество записей: `N = 10000`; +- число повторов каждого эксперимента: `5`; +- имена: `User_00000`, `User_00001`, ..., `User_09999`; +- два режима входных данных: `shuffled` и `sorted`; +- поиск: 100 существующих имен и 10 отсутствующих; +- удаление: 50 случайных существующих имен; +- размер хеш-таблицы: `20011` бакетов. + +После вставки структура не пересоздается: поиск и удаление выполняются на той +же заполненной структуре. Для каждого режима и каждой структуры создается новая +структура. + +Файлы с результатами: + +- `docs/data/results.csv` - все отдельные замеры; +- `docs/data/summary.csv` - среднее время и список всех пяти замеров; +- `docs/data/performance.svg` - столбчатая диаграмма средних значений. + +![График производительности](data/performance.svg) + +## Средние результаты + +Время указано в секундах. + +| Структура | Режим | Вставка | Поиск | Удаление | +|---|---:|---:|---:|---:| +| LinkedList | shuffled | 1.555942 | 0.014180 | 0.006345 | +| LinkedList | sorted | 1.513740 | 0.012993 | 0.006041 | +| HashTable | shuffled | 0.005934 | 0.000056 | 0.000031 | +| HashTable | sorted | 0.006140 | 0.000056 | 0.000029 | +| BST | shuffled | 0.011223 | 0.000093 | 0.000069 | +| BST | sorted | 2.250442 | 0.019857 | 0.010803 | + +## Анализ + +Связный список оказался самым медленным на вставке и поиске. Причина в том, что +для корректной операции `insert` нужно проверить, есть ли уже запись с таким +именем. При уникальных именах почти каждая вставка проходит по всему текущему +списку, поэтому суммарная сложность вставки всех записей становится `O(n^2)`. +Порядок входных данных почти не влияет на результат, потому что структура не +использует порядок ключей. + +Хеш-таблица показала лучшие результаты почти во всех операциях. При хорошем +распределении по бакетам вставка, поиск и удаление близки к `O(1)`. Порядок +входных данных почти не влияет на время, так как индекс бакета определяется +хешем имени, а не расположением записи во входном списке. + +BST хорошо работает на перемешанных данных: дерево получается сравнительно +сбалансированным, поэтому операции близки к `O(log n)`. На отсортированном +входе обычное двоичное дерево поиска вырождается в цепочку: каждый новый ключ +становится правым потомком предыдущего. Из-за этого вставка всех записей +становится `O(n^2)`, а поиск и удаление приближаются к поведению связного +списка. + +Удаление у хеш-таблицы быстрое по той же причине, что и поиск: сначала +вычисляется бакет, затем просматривается короткая цепочка. В BST удаление +быстрое на перемешанном дереве, но на вырожденном дереве оно замедляется. +В связном списке удаление требует линейного поиска удаляемого элемента. + +## Вывод + +Для частого поиска, обновления и удаления по точному имени лучше выбирать +хеш-таблицу. Она быстрее всего в эксперименте и почти не зависит от порядка +вставки. + +Если нужно часто получать данные в отсортированном порядке, дерево поиска дает +удобный `in-order` обход без отдельной сортировки. Но обычный BST чувствителен +к порядку входных данных, поэтому на практике лучше использовать +самобалансирующееся дерево или готовую структуру из библиотеки. + +Связный список подходит только для маленьких наборов данных или учебных задач. +Для телефонного справочника с частым поиском он неудачен, потому что каждая +операция поиска требует последовательного прохода по элементам. diff --git a/shahovaa/zadanie1/phonebook.py b/shahovaa/zadanie1/phonebook.py new file mode 100644 index 0000000..1cfbe59 --- /dev/null +++ b/shahovaa/zadanie1/phonebook.py @@ -0,0 +1,255 @@ +"""Procedural phone book data structures for assignment 1. + +The task explicitly asks to avoid classes, so every structure is represented +with plain dictionaries, lists and functions. +""" + + +def _make_ll_node(name, phone, next_node=None): + return {"name": name, "phone": phone, "next": next_node} + + +def ll_insert(head, name, phone): + """Insert or update a record in a linked list, returning the head.""" + if head is None: + return _make_ll_node(name, phone) + + current = head + while current is not None: + if current["name"] == name: + current["phone"] = phone + return head + if current["next"] is None: + break + current = current["next"] + + current["next"] = _make_ll_node(name, phone) + return head + + +def ll_find(head, name): + """Return a phone by name or None if there is no such record.""" + current = head + while current is not None: + if current["name"] == name: + return current["phone"] + current = current["next"] + return None + + +def ll_delete(head, name): + """Delete a record by name, returning the possibly changed head.""" + previous = None + current = head + + while current is not None: + if current["name"] == name: + if previous is None: + return current["next"] + previous["next"] = current["next"] + return head + + previous = current + current = current["next"] + + return head + + +def ll_list_all(head): + """Return all linked-list records sorted by name.""" + records = [] + current = head + while current is not None: + records.append((current["name"], current["phone"])) + current = current["next"] + return sorted(records, key=lambda item: item[0]) + + +def create_hash_table(size=20011): + """Create a fixed-size hash table with separate chaining.""" + return [None for _ in range(size)] + + +def _hash_name(name, bucket_count): + """Stable polynomial hash, unlike Python's randomized built-in hash().""" + value = 0 + for char in name: + value = (value * 31 + ord(char)) % bucket_count + return value + + +def ht_insert(buckets, name, phone): + """Insert or update a record in the hash table.""" + index = _hash_name(name, len(buckets)) + buckets[index] = ll_insert(buckets[index], name, phone) + + +def ht_find(buckets, name): + """Return a phone by name or None if there is no such record.""" + index = _hash_name(name, len(buckets)) + return ll_find(buckets[index], name) + + +def ht_delete(buckets, name): + """Delete a record by name if it exists.""" + index = _hash_name(name, len(buckets)) + buckets[index] = ll_delete(buckets[index], name) + + +def ht_list_all(buckets): + """Return all hash-table records sorted by name.""" + records = [] + for head in buckets: + current = head + while current is not None: + records.append((current["name"], current["phone"])) + current = current["next"] + return sorted(records, key=lambda item: item[0]) + + +def _make_bst_node(name, phone): + return {"name": name, "phone": phone, "left": None, "right": None} + + +def bst_insert(root, name, phone): + """Insert or update a record in a binary search tree.""" + if root is None: + return _make_bst_node(name, phone) + + current = root + while True: + if name == current["name"]: + current["phone"] = phone + return root + + if name < current["name"]: + if current["left"] is None: + current["left"] = _make_bst_node(name, phone) + return root + current = current["left"] + else: + if current["right"] is None: + current["right"] = _make_bst_node(name, phone) + return root + current = current["right"] + + +def bst_find(root, name): + """Return a phone by name or None if there is no such record.""" + current = root + while current is not None: + if name == current["name"]: + return current["phone"] + if name < current["name"]: + current = current["left"] + else: + current = current["right"] + return None + + +def _detach_min(node): + """Detach the minimal node from a subtree and return (new_subtree, min).""" + parent = None + current = node + + while current["left"] is not None: + parent = current + current = current["left"] + + if parent is None: + return current["right"], current + + parent["left"] = current["right"] + current["right"] = None + return node, current + + +def bst_delete(root, name): + """Delete a record from the tree, returning the possibly changed root.""" + parent = None + current = root + + while current is not None and current["name"] != name: + parent = current + if name < current["name"]: + current = current["left"] + else: + current = current["right"] + + if current is None: + return root + + if current["left"] is None: + replacement = current["right"] + elif current["right"] is None: + replacement = current["left"] + else: + new_right, successor = _detach_min(current["right"]) + successor["left"] = current["left"] + successor["right"] = new_right + replacement = successor + + if parent is None: + return replacement + + if parent["left"] is current: + parent["left"] = replacement + else: + parent["right"] = replacement + + return root + + +def bst_list_all(root): + """Return all BST records sorted by name using in-order traversal.""" + records = [] + stack = [] + current = root + + while current is not None or stack: + while current is not None: + stack.append(current) + current = current["left"] + + current = stack.pop() + records.append((current["name"], current["phone"])) + current = current["right"] + + return records + + +def _assert_basic_operations(): + records = [("Boris", "222"), ("Anna", "111"), ("Denis", "444")] + expected_sorted = [("Anna", "111"), ("Boris", "222"), ("Denis", "444")] + + head = None + for name, phone in records: + head = ll_insert(head, name, phone) + assert ll_find(head, "Anna") == "111" + head = ll_insert(head, "Anna", "333") + assert ll_find(head, "Anna") == "333" + head = ll_delete(head, "Anna") + assert ll_find(head, "Anna") is None + assert ll_list_all(head) == [("Boris", "222"), ("Denis", "444")] + + table = create_hash_table(17) + for name, phone in records: + ht_insert(table, name, phone) + assert ht_find(table, "Denis") == "444" + ht_insert(table, "Denis", "555") + assert ht_find(table, "Denis") == "555" + ht_delete(table, "Missing") + assert ("Anna", "111") in ht_list_all(table) + + root = None + for name, phone in records: + root = bst_insert(root, name, phone) + assert bst_list_all(root) == expected_sorted + root = bst_delete(root, "Boris") + assert bst_find(root, "Boris") is None + assert bst_list_all(root) == [("Anna", "111"), ("Denis", "444")] + + +if __name__ == "__main__": + _assert_basic_operations() + print("All phonebook checks passed.") diff --git a/shahovaa/zadanie1/requirements.txt b/shahovaa/zadanie1/requirements.txt new file mode 100644 index 0000000..a9006fd --- /dev/null +++ b/shahovaa/zadanie1/requirements.txt @@ -0,0 +1 @@ +matplotlib>=3.8