[1] Task 1

This commit is contained in:
Alex 2026-05-19 22:11:31 +03:00
parent 57c8ef048f
commit e10b075b06
9 changed files with 3295 additions and 0 deletions

3
shahovaa/zadanie1/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
__pycache__/
*.py[cod]
.DS_Store

View File

@ -0,0 +1,24 @@
# Задание 1: структуры данных
Реализация телефонного справочника на трех структурах данных без классов:
- связный список;
- хеш-таблица с цепочками;
- двоичное дерево поиска.
## Запуск
Проверка базовых операций:
```bash
python3 phonebook.py
```
Экспериментальные замеры и построение графика:
```bash
python3 benchmark.py
```
По умолчанию используется `N = 10000`, `5` повторов, результаты сохраняются в
`docs/data/results.csv`, `docs/data/summary.csv` и `docs/data/performance.svg`.

View File

@ -0,0 +1,359 @@
"""Run performance experiments for the procedural phone book structures."""
import argparse
import csv
import html
import math
import random
import time
from pathlib import Path
from phonebook import (
bst_delete,
bst_find,
bst_insert,
create_hash_table,
ht_delete,
ht_find,
ht_insert,
ll_delete,
ll_find,
ll_insert,
)
STRUCTURES = ("LinkedList", "HashTable", "BST")
MODES = ("shuffled", "sorted")
OPERATIONS = ("insert", "find", "delete")
def generate_records(count):
return [(f"User_{index:05d}", f"+7-900-{index:05d}") for index in range(count)]
def prepare_records(count, seed):
records_sorted = generate_records(count)
records_shuffled = records_sorted[:]
random.Random(seed).shuffle(records_shuffled)
return {
"sorted": records_sorted,
"shuffled": records_shuffled,
}
def _insert_all(structure_name, records, bucket_count):
if structure_name == "LinkedList":
head = None
for name, phone in records:
head = ll_insert(head, name, phone)
return head
if structure_name == "HashTable":
buckets = create_hash_table(bucket_count)
for name, phone in records:
ht_insert(buckets, name, phone)
return buckets
if structure_name == "BST":
root = None
for name, phone in records:
root = bst_insert(root, name, phone)
return root
raise ValueError(f"Unknown structure: {structure_name}")
def _find_all(structure_name, structure, names):
if structure_name == "LinkedList":
for name in names:
ll_find(structure, name)
return structure
if structure_name == "HashTable":
for name in names:
ht_find(structure, name)
return structure
if structure_name == "BST":
for name in names:
bst_find(structure, name)
return structure
raise ValueError(f"Unknown structure: {structure_name}")
def _delete_all(structure_name, structure, names):
if structure_name == "LinkedList":
head = structure
for name in names:
head = ll_delete(head, name)
return head
if structure_name == "HashTable":
for name in names:
ht_delete(structure, name)
return structure
if structure_name == "BST":
root = structure
for name in names:
root = bst_delete(root, name)
return root
raise ValueError(f"Unknown structure: {structure_name}")
def _elapsed(action):
start = time.perf_counter()
result = action()
end = time.perf_counter()
return result, end - start
def run_experiment(count=10000, repeats=5, seed=42, bucket_count=20011):
record_sets = prepare_records(count, seed)
all_names = [name for name, _phone in record_sets["sorted"]]
results = []
for structure_name in STRUCTURES:
for mode in MODES:
records = record_sets[mode]
names_for_sampling = [name for name, _phone in records]
for repeat in range(1, repeats + 1):
rng = random.Random(seed + repeat * 1000 + len(structure_name) + len(mode))
find_existing = rng.sample(names_for_sampling, min(100, count))
find_missing = [f"None_{repeat}_{index}" for index in range(10)]
find_names = find_existing + find_missing
delete_names = rng.sample(all_names, min(50, count))
structure, insert_time = _elapsed(
lambda: _insert_all(structure_name, records, bucket_count)
)
results.append(
{
"structure": structure_name,
"mode": mode,
"operation": "insert",
"repeat": repeat,
"time_sec": insert_time,
"n": count,
"bucket_count": bucket_count if structure_name == "HashTable" else "",
}
)
structure, find_time = _elapsed(
lambda: _find_all(structure_name, structure, find_names)
)
results.append(
{
"structure": structure_name,
"mode": mode,
"operation": "find",
"repeat": repeat,
"time_sec": find_time,
"n": count,
"bucket_count": bucket_count if structure_name == "HashTable" else "",
}
)
structure, delete_time = _elapsed(
lambda: _delete_all(structure_name, structure, delete_names)
)
results.append(
{
"structure": structure_name,
"mode": mode,
"operation": "delete",
"repeat": repeat,
"time_sec": delete_time,
"n": count,
"bucket_count": bucket_count if structure_name == "HashTable" else "",
}
)
return results
def summarize(results):
grouped = {}
for row in results:
key = (row["structure"], row["mode"], row["operation"])
grouped.setdefault(key, []).append(row["time_sec"])
summary = []
for structure_name in STRUCTURES:
for mode in MODES:
for operation in OPERATIONS:
values = grouped[(structure_name, mode, operation)]
summary.append(
{
"structure": structure_name,
"mode": mode,
"operation": operation,
"average_time_sec": sum(values) / len(values),
"measurements_sec": ";".join(f"{value:.9f}" for value in values),
}
)
return summary
def write_csv(path, rows, fieldnames):
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("w", encoding="utf-8", newline="") as file:
writer = csv.DictWriter(file, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(rows)
def write_chart(path, summary):
try:
import matplotlib.pyplot as plt
except ModuleNotFoundError:
write_svg_chart(path, summary)
return
labels = [
f"{row['structure']}\n{row['mode']}\n{row['operation']}"
for row in summary
]
values = [row["average_time_sec"] for row in summary]
colors_by_operation = {
"insert": "#4C78A8",
"find": "#F58518",
"delete": "#54A24B",
}
colors = [colors_by_operation[row["operation"]] for row in summary]
path.parent.mkdir(parents=True, exist_ok=True)
plt.figure(figsize=(14, 7))
plt.bar(range(len(values)), values, color=colors)
plt.yscale("log")
plt.ylabel("Среднее время, секунд (логарифмическая шкала)")
plt.title("Сравнение операций телефонного справочника")
plt.xticks(range(len(labels)), labels, rotation=45, ha="right", fontsize=8)
plt.tight_layout()
plt.savefig(path, dpi=160)
plt.close()
def write_svg_chart(path, summary):
width = 1500
height = 760
margin_left = 90
margin_right = 40
margin_top = 70
margin_bottom = 210
plot_width = width - margin_left - margin_right
plot_height = height - margin_top - margin_bottom
baseline = margin_top + plot_height
values = [max(row["average_time_sec"], 1e-12) for row in summary]
log_min = math.floor(math.log10(min(values)))
log_max = math.ceil(math.log10(max(values)))
if log_min == log_max:
log_min -= 1
log_max += 1
def y_for(value):
log_value = math.log10(max(value, 1e-12))
return margin_top + (log_max - log_value) / (log_max - log_min) * plot_height
colors_by_operation = {
"insert": "#4C78A8",
"find": "#F58518",
"delete": "#54A24B",
}
slot_width = plot_width / len(summary)
bar_width = slot_width * 0.62
lines = [
'<?xml version="1.0" encoding="UTF-8"?>',
f'<svg xmlns="http://www.w3.org/2000/svg" width="{width}" height="{height}" viewBox="0 0 {width} {height}">',
'<rect width="100%" height="100%" fill="#ffffff"/>',
'<style>text{font-family:Arial,Helvetica,sans-serif;fill:#222}.axis{stroke:#222;stroke-width:1}.grid{stroke:#ddd;stroke-width:1}.label{font-size:13px}.tick{font-size:12px}.title{font-size:24px;font-weight:700}.legend{font-size:14px}</style>',
f'<text class="title" x="{width / 2}" y="35" text-anchor="middle">Сравнение операций телефонного справочника</text>',
f'<line class="axis" x1="{margin_left}" y1="{baseline}" x2="{width - margin_right}" y2="{baseline}"/>',
f'<line class="axis" x1="{margin_left}" y1="{margin_top}" x2="{margin_left}" y2="{baseline}"/>',
]
for exponent in range(log_min, log_max + 1):
value = 10 ** exponent
y = y_for(value)
lines.append(
f'<line class="grid" x1="{margin_left}" y1="{y:.2f}" x2="{width - margin_right}" y2="{y:.2f}"/>'
)
lines.append(
f'<text class="tick" x="{margin_left - 10}" y="{y + 4:.2f}" text-anchor="end">1e{exponent}</text>'
)
for index, row in enumerate(summary):
x = margin_left + index * slot_width + (slot_width - bar_width) / 2
y = y_for(row["average_time_sec"])
bar_height = baseline - y
color = colors_by_operation[row["operation"]]
label = f"{row['structure']} / {row['mode']} / {row['operation']}"
lines.append(
f'<rect x="{x:.2f}" y="{y:.2f}" width="{bar_width:.2f}" height="{bar_height:.2f}" fill="{color}"/>'
)
lines.append(
f'<text class="tick" x="{x + bar_width / 2:.2f}" y="{y - 5:.2f}" text-anchor="middle">{row["average_time_sec"]:.3g}</text>'
)
lines.append(
f'<text class="label" transform="translate({x + bar_width / 2:.2f} {baseline + 18:.2f}) rotate(55)" text-anchor="start">{html.escape(label)}</text>'
)
legend_x = margin_left
legend_y = height - 30
for offset, (operation, color) in enumerate(colors_by_operation.items()):
x = legend_x + offset * 130
lines.append(f'<rect x="{x}" y="{legend_y - 12}" width="18" height="18" fill="{color}"/>')
lines.append(f'<text class="legend" x="{x + 26}" y="{legend_y + 2}">{operation}</text>')
lines.append(
f'<text class="label" transform="translate(24 {margin_top + plot_height / 2}) rotate(-90)" text-anchor="middle">Среднее время, секунд (логарифмическая шкала)</text>'
)
lines.append("</svg>")
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text("\n".join(lines), encoding="utf-8")
def main():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--n", type=int, default=10000, help="number of generated records")
parser.add_argument("--repeats", type=int, default=5, help="number of repeated measurements")
parser.add_argument("--seed", type=int, default=42, help="random seed")
parser.add_argument("--bucket-count", type=int, default=20011, help="hash-table bucket count")
parser.add_argument("--output-dir", type=Path, default=Path("docs/data"))
args = parser.parse_args()
results = run_experiment(
count=args.n,
repeats=args.repeats,
seed=args.seed,
bucket_count=args.bucket_count,
)
summary = summarize(results)
write_csv(
args.output_dir / "results.csv",
results,
["structure", "mode", "operation", "repeat", "time_sec", "n", "bucket_count"],
)
write_csv(
args.output_dir / "summary.csv",
summary,
["structure", "mode", "operation", "average_time_sec", "measurements_sec"],
)
chart_path = args.output_dir / "performance.svg"
write_chart(chart_path, summary)
print(f"Saved detailed results to {args.output_dir / 'results.csv'}")
print(f"Saved summary to {args.output_dir / 'summary.csv'}")
print(f"Saved chart to {chart_path}")
if __name__ == "__main__":
main()

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 78 KiB

View File

@ -0,0 +1,91 @@
structure,mode,operation,repeat,time_sec,n,bucket_count
LinkedList,shuffled,insert,1,1.5487497089998215,10000,
LinkedList,shuffled,find,1,0.013355207998756669,10000,
LinkedList,shuffled,delete,1,0.006138000000646571,10000,
LinkedList,shuffled,insert,2,1.6062446670002828,10000,
LinkedList,shuffled,find,2,0.014175791999150533,10000,
LinkedList,shuffled,delete,2,0.007367083000644925,10000,
LinkedList,shuffled,insert,3,1.5470056670001213,10000,
LinkedList,shuffled,find,3,0.014115500000116299,10000,
LinkedList,shuffled,delete,3,0.006011666999256704,10000,
LinkedList,shuffled,insert,4,1.5362317910003185,10000,
LinkedList,shuffled,find,4,0.01460650000080932,10000,
LinkedList,shuffled,delete,4,0.006377084000632749,10000,
LinkedList,shuffled,insert,5,1.541476624999632,10000,
LinkedList,shuffled,find,5,0.014646625000750646,10000,
LinkedList,shuffled,delete,5,0.005829540999911842,10000,
LinkedList,sorted,insert,1,1.4639895000000251,10000,
LinkedList,sorted,find,1,0.012882999999419553,10000,
LinkedList,sorted,delete,1,0.005734124999435153,10000,
LinkedList,sorted,insert,2,1.4757493329998397,10000,
LinkedList,sorted,find,2,0.013435208000373677,10000,
LinkedList,sorted,delete,2,0.006567624999661348,10000,
LinkedList,sorted,insert,3,1.474924916999953,10000,
LinkedList,sorted,find,3,0.012946166998517583,10000,
LinkedList,sorted,delete,3,0.005636875001073349,10000,
LinkedList,sorted,insert,4,1.6074728750008944,10000,
LinkedList,sorted,find,4,0.012849667000409681,10000,
LinkedList,sorted,delete,4,0.006610207999983686,10000,
LinkedList,sorted,insert,5,1.5465652919992863,10000,
LinkedList,sorted,find,5,0.012851292000050307,10000,
LinkedList,sorted,delete,5,0.005656833000102779,10000,
HashTable,shuffled,insert,1,0.005485583000336192,10000,20011
HashTable,shuffled,find,1,5.770799907622859e-05,10000,20011
HashTable,shuffled,delete,1,3.570800072338898e-05,10000,20011
HashTable,shuffled,insert,2,0.006064958999559167,10000,20011
HashTable,shuffled,find,2,5.854200026078615e-05,10000,20011
HashTable,shuffled,delete,2,3.495800046948716e-05,10000,20011
HashTable,shuffled,insert,3,0.005850707999343285,10000,20011
HashTable,shuffled,find,3,5.441699977382086e-05,10000,20011
HashTable,shuffled,delete,3,2.7292000595480204e-05,10000,20011
HashTable,shuffled,insert,4,0.005818375000671949,10000,20011
HashTable,shuffled,find,4,5.387499913922511e-05,10000,20011
HashTable,shuffled,delete,4,2.683300044736825e-05,10000,20011
HashTable,shuffled,insert,5,0.006451041999753215,10000,20011
HashTable,shuffled,find,5,5.6000000768108293e-05,10000,20011
HashTable,shuffled,delete,5,2.937499994004611e-05,10000,20011
HashTable,sorted,insert,1,0.005557000000408152,10000,20011
HashTable,sorted,find,1,5.608300125459209e-05,10000,20011
HashTable,sorted,delete,1,2.8624999686144292e-05,10000,20011
HashTable,sorted,insert,2,0.005895457999940845,10000,20011
HashTable,sorted,find,2,6.0874999689986e-05,10000,20011
HashTable,sorted,delete,2,3.199999991920777e-05,10000,20011
HashTable,sorted,insert,3,0.005766083999333205,10000,20011
HashTable,sorted,find,3,5.500000042957254e-05,10000,20011
HashTable,sorted,delete,3,2.7874999432242475e-05,10000,20011
HashTable,sorted,insert,4,0.005590124999798718,10000,20011
HashTable,sorted,find,4,5.337499896995723e-05,10000,20011
HashTable,sorted,delete,4,2.6959000024362467e-05,10000,20011
HashTable,sorted,insert,5,0.007889499998782412,10000,20011
HashTable,sorted,find,5,5.549999877985101e-05,10000,20011
HashTable,sorted,delete,5,2.7749998480430804e-05,10000,20011
BST,shuffled,insert,1,0.011201125000297907,10000,
BST,shuffled,find,1,9.245900037058163e-05,10000,
BST,shuffled,delete,1,6.958300036785658e-05,10000,
BST,shuffled,insert,2,0.011337707999700797,10000,
BST,shuffled,find,2,9.545799912302755e-05,10000,
BST,shuffled,delete,2,7.141599962778855e-05,10000,
BST,shuffled,insert,3,0.01119999999900756,10000,
BST,shuffled,find,3,9.308299922849983e-05,10000,
BST,shuffled,delete,3,6.779199975426309e-05,10000,
BST,shuffled,insert,4,0.011189917000592686,10000,
BST,shuffled,find,4,9.675000001152512e-05,10000,
BST,shuffled,delete,4,6.624999878113158e-05,10000,
BST,shuffled,insert,5,0.01118529100131127,10000,
BST,shuffled,find,5,8.670799979881849e-05,10000,
BST,shuffled,delete,5,6.904200017743278e-05,10000,
BST,sorted,insert,1,2.2425066659998265,10000,
BST,sorted,find,1,0.018234625000332016,10000,
BST,sorted,delete,1,0.010230416999547742,10000,
BST,sorted,insert,2,2.26542979199985,10000,
BST,sorted,find,2,0.021546082998611382,10000,
BST,sorted,delete,2,0.011778292000599322,10000,
BST,sorted,insert,3,2.246992708000107,10000,
BST,sorted,find,3,0.01936033300080453,10000,
BST,sorted,delete,3,0.010003166000387864,10000,
BST,sorted,insert,4,2.2515108749994397,10000,
BST,sorted,find,4,0.021122417001606664,10000,
BST,sorted,delete,4,0.01173120800012839,10000,
BST,sorted,insert,5,2.2457697090012516,10000,
BST,sorted,find,5,0.01902170900029887,10000,
BST,sorted,delete,5,0.010273834001054638,10000,
1 structure mode operation repeat time_sec n bucket_count
2 LinkedList shuffled insert 1 1.5487497089998215 10000
3 LinkedList shuffled find 1 0.013355207998756669 10000
4 LinkedList shuffled delete 1 0.006138000000646571 10000
5 LinkedList shuffled insert 2 1.6062446670002828 10000
6 LinkedList shuffled find 2 0.014175791999150533 10000
7 LinkedList shuffled delete 2 0.007367083000644925 10000
8 LinkedList shuffled insert 3 1.5470056670001213 10000
9 LinkedList shuffled find 3 0.014115500000116299 10000
10 LinkedList shuffled delete 3 0.006011666999256704 10000
11 LinkedList shuffled insert 4 1.5362317910003185 10000
12 LinkedList shuffled find 4 0.01460650000080932 10000
13 LinkedList shuffled delete 4 0.006377084000632749 10000
14 LinkedList shuffled insert 5 1.541476624999632 10000
15 LinkedList shuffled find 5 0.014646625000750646 10000
16 LinkedList shuffled delete 5 0.005829540999911842 10000
17 LinkedList sorted insert 1 1.4639895000000251 10000
18 LinkedList sorted find 1 0.012882999999419553 10000
19 LinkedList sorted delete 1 0.005734124999435153 10000
20 LinkedList sorted insert 2 1.4757493329998397 10000
21 LinkedList sorted find 2 0.013435208000373677 10000
22 LinkedList sorted delete 2 0.006567624999661348 10000
23 LinkedList sorted insert 3 1.474924916999953 10000
24 LinkedList sorted find 3 0.012946166998517583 10000
25 LinkedList sorted delete 3 0.005636875001073349 10000
26 LinkedList sorted insert 4 1.6074728750008944 10000
27 LinkedList sorted find 4 0.012849667000409681 10000
28 LinkedList sorted delete 4 0.006610207999983686 10000
29 LinkedList sorted insert 5 1.5465652919992863 10000
30 LinkedList sorted find 5 0.012851292000050307 10000
31 LinkedList sorted delete 5 0.005656833000102779 10000
32 HashTable shuffled insert 1 0.005485583000336192 10000 20011
33 HashTable shuffled find 1 5.770799907622859e-05 10000 20011
34 HashTable shuffled delete 1 3.570800072338898e-05 10000 20011
35 HashTable shuffled insert 2 0.006064958999559167 10000 20011
36 HashTable shuffled find 2 5.854200026078615e-05 10000 20011
37 HashTable shuffled delete 2 3.495800046948716e-05 10000 20011
38 HashTable shuffled insert 3 0.005850707999343285 10000 20011
39 HashTable shuffled find 3 5.441699977382086e-05 10000 20011
40 HashTable shuffled delete 3 2.7292000595480204e-05 10000 20011
41 HashTable shuffled insert 4 0.005818375000671949 10000 20011
42 HashTable shuffled find 4 5.387499913922511e-05 10000 20011
43 HashTable shuffled delete 4 2.683300044736825e-05 10000 20011
44 HashTable shuffled insert 5 0.006451041999753215 10000 20011
45 HashTable shuffled find 5 5.6000000768108293e-05 10000 20011
46 HashTable shuffled delete 5 2.937499994004611e-05 10000 20011
47 HashTable sorted insert 1 0.005557000000408152 10000 20011
48 HashTable sorted find 1 5.608300125459209e-05 10000 20011
49 HashTable sorted delete 1 2.8624999686144292e-05 10000 20011
50 HashTable sorted insert 2 0.005895457999940845 10000 20011
51 HashTable sorted find 2 6.0874999689986e-05 10000 20011
52 HashTable sorted delete 2 3.199999991920777e-05 10000 20011
53 HashTable sorted insert 3 0.005766083999333205 10000 20011
54 HashTable sorted find 3 5.500000042957254e-05 10000 20011
55 HashTable sorted delete 3 2.7874999432242475e-05 10000 20011
56 HashTable sorted insert 4 0.005590124999798718 10000 20011
57 HashTable sorted find 4 5.337499896995723e-05 10000 20011
58 HashTable sorted delete 4 2.6959000024362467e-05 10000 20011
59 HashTable sorted insert 5 0.007889499998782412 10000 20011
60 HashTable sorted find 5 5.549999877985101e-05 10000 20011
61 HashTable sorted delete 5 2.7749998480430804e-05 10000 20011
62 BST shuffled insert 1 0.011201125000297907 10000
63 BST shuffled find 1 9.245900037058163e-05 10000
64 BST shuffled delete 1 6.958300036785658e-05 10000
65 BST shuffled insert 2 0.011337707999700797 10000
66 BST shuffled find 2 9.545799912302755e-05 10000
67 BST shuffled delete 2 7.141599962778855e-05 10000
68 BST shuffled insert 3 0.01119999999900756 10000
69 BST shuffled find 3 9.308299922849983e-05 10000
70 BST shuffled delete 3 6.779199975426309e-05 10000
71 BST shuffled insert 4 0.011189917000592686 10000
72 BST shuffled find 4 9.675000001152512e-05 10000
73 BST shuffled delete 4 6.624999878113158e-05 10000
74 BST shuffled insert 5 0.01118529100131127 10000
75 BST shuffled find 5 8.670799979881849e-05 10000
76 BST shuffled delete 5 6.904200017743278e-05 10000
77 BST sorted insert 1 2.2425066659998265 10000
78 BST sorted find 1 0.018234625000332016 10000
79 BST sorted delete 1 0.010230416999547742 10000
80 BST sorted insert 2 2.26542979199985 10000
81 BST sorted find 2 0.021546082998611382 10000
82 BST sorted delete 2 0.011778292000599322 10000
83 BST sorted insert 3 2.246992708000107 10000
84 BST sorted find 3 0.01936033300080453 10000
85 BST sorted delete 3 0.010003166000387864 10000
86 BST sorted insert 4 2.2515108749994397 10000
87 BST sorted find 4 0.021122417001606664 10000
88 BST sorted delete 4 0.01173120800012839 10000
89 BST sorted insert 5 2.2457697090012516 10000
90 BST sorted find 5 0.01902170900029887 10000
91 BST sorted delete 5 0.010273834001054638 10000

View File

@ -0,0 +1,19 @@
structure,mode,operation,average_time_sec,measurements_sec
LinkedList,shuffled,insert,1.5559416918000353,1.548749709;1.606244667;1.547005667;1.536231791;1.541476625
LinkedList,shuffled,find,0.014179924999916693,0.013355208;0.014175792;0.014115500;0.014606500;0.014646625
LinkedList,shuffled,delete,0.006344675000218558,0.006138000;0.007367083;0.006011667;0.006377084;0.005829541
LinkedList,sorted,insert,1.5137403833999996,1.463989500;1.475749333;1.474924917;1.607472875;1.546565292
LinkedList,sorted,find,0.01299306679975416,0.012883000;0.013435208;0.012946167;0.012849667;0.012851292
LinkedList,sorted,delete,0.006041133200051263,0.005734125;0.006567625;0.005636875;0.006610208;0.005656833
HashTable,shuffled,insert,0.005934133399932762,0.005485583;0.006064959;0.005850708;0.005818375;0.006451042
HashTable,shuffled,find,5.61083998036338e-05,0.000057708;0.000058542;0.000054417;0.000053875;0.000056000
HashTable,shuffled,delete,3.083320043515414e-05,0.000035708;0.000034958;0.000027292;0.000026833;0.000029375
HashTable,sorted,insert,0.006139633399652666,0.005557000;0.005895458;0.005766084;0.005590125;0.007889500
HashTable,sorted,find,5.6166599824791776e-05,0.000056083;0.000060875;0.000055000;0.000053375;0.000055500
HashTable,sorted,delete,2.8641799508477563e-05,0.000028625;0.000032000;0.000027875;0.000026959;0.000027750
BST,shuffled,insert,0.011222808200182044,0.011201125;0.011337708;0.011200000;0.011189917;0.011185291
BST,shuffled,find,9.289159970649052e-05,0.000092459;0.000095458;0.000093083;0.000096750;0.000086708
BST,shuffled,delete,6.881659974169451e-05,0.000069583;0.000071416;0.000067792;0.000066250;0.000069042
BST,sorted,insert,2.250441950000095,2.242506666;2.265429792;2.246992708;2.251510875;2.245769709
BST,sorted,find,0.019857033400330692,0.018234625;0.021546083;0.019360333;0.021122417;0.019021709
BST,sorted,delete,0.010803383400343591,0.010230417;0.011778292;0.010003166;0.011731208;0.010273834
1 structure mode operation average_time_sec measurements_sec
2 LinkedList shuffled insert 1.5559416918000353 1.548749709;1.606244667;1.547005667;1.536231791;1.541476625
3 LinkedList shuffled find 0.014179924999916693 0.013355208;0.014175792;0.014115500;0.014606500;0.014646625
4 LinkedList shuffled delete 0.006344675000218558 0.006138000;0.007367083;0.006011667;0.006377084;0.005829541
5 LinkedList sorted insert 1.5137403833999996 1.463989500;1.475749333;1.474924917;1.607472875;1.546565292
6 LinkedList sorted find 0.01299306679975416 0.012883000;0.013435208;0.012946167;0.012849667;0.012851292
7 LinkedList sorted delete 0.006041133200051263 0.005734125;0.006567625;0.005636875;0.006610208;0.005656833
8 HashTable shuffled insert 0.005934133399932762 0.005485583;0.006064959;0.005850708;0.005818375;0.006451042
9 HashTable shuffled find 5.61083998036338e-05 0.000057708;0.000058542;0.000054417;0.000053875;0.000056000
10 HashTable shuffled delete 3.083320043515414e-05 0.000035708;0.000034958;0.000027292;0.000026833;0.000029375
11 HashTable sorted insert 0.006139633399652666 0.005557000;0.005895458;0.005766084;0.005590125;0.007889500
12 HashTable sorted find 5.6166599824791776e-05 0.000056083;0.000060875;0.000055000;0.000053375;0.000055500
13 HashTable sorted delete 2.8641799508477563e-05 0.000028625;0.000032000;0.000027875;0.000026959;0.000027750
14 BST shuffled insert 0.011222808200182044 0.011201125;0.011337708;0.011200000;0.011189917;0.011185291
15 BST shuffled find 9.289159970649052e-05 0.000092459;0.000095458;0.000093083;0.000096750;0.000086708
16 BST shuffled delete 6.881659974169451e-05 0.000069583;0.000071416;0.000067792;0.000066250;0.000069042
17 BST sorted insert 2.250441950000095 2.242506666;2.265429792;2.246992708;2.251510875;2.245769709
18 BST sorted find 0.019857033400330692 0.018234625;0.021546083;0.019360333;0.021122417;0.019021709
19 BST sorted delete 0.010803383400343591 0.010230417;0.011778292;0.010003166;0.011731208;0.010273834

View File

@ -0,0 +1,112 @@
# Отчет по заданию 1: структуры данных
## Цель
Реализовать три структуры данных с нуля в процедурной парадигме и сравнить
скорость основных операций телефонного справочника:
- `insert(name, phone)` - добавить или обновить запись;
- `find(name)` - найти телефон по имени;
- `delete(name)` - удалить запись;
- `list_all()` - получить все записи, отсортированные по имени.
Классы не использовались. Узлы связного списка и дерева представлены
словарями, хеш-таблица представлена списком бакетов.
## Реализация
Код находится в файле `phonebook.py`.
Реализованы функции:
- связный список: `ll_insert`, `ll_find`, `ll_delete`, `ll_list_all`;
- хеш-таблица: `create_hash_table`, `ht_insert`, `ht_find`, `ht_delete`, `ht_list_all`;
- двоичное дерево поиска: `bst_insert`, `bst_find`, `bst_delete`, `bst_list_all`.
Для хеш-таблицы используется метод цепочек: каждый бакет хранит голову
связного списка. Хеш-функция написана вручную, чтобы результат не зависел от
рандомизации встроенной функции `hash()` в Python.
Для BST вставка, поиск, удаление и обход написаны без классов. Обход
`bst_list_all` реализован итеративно, чтобы отсортированный вход на 10000
элементов не приводил к переполнению стека рекурсии.
## Методика эксперимента
Скрипт эксперимента находится в файле `benchmark.py`.
Параметры запуска:
- количество записей: `N = 10000`;
- число повторов каждого эксперимента: `5`;
- имена: `User_00000`, `User_00001`, ..., `User_09999`;
- два режима входных данных: `shuffled` и `sorted`;
- поиск: 100 существующих имен и 10 отсутствующих;
- удаление: 50 случайных существующих имен;
- размер хеш-таблицы: `20011` бакетов.
После вставки структура не пересоздается: поиск и удаление выполняются на той
же заполненной структуре. Для каждого режима и каждой структуры создается новая
структура.
Файлы с результатами:
- `docs/data/results.csv` - все отдельные замеры;
- `docs/data/summary.csv` - среднее время и список всех пяти замеров;
- `docs/data/performance.svg` - столбчатая диаграмма средних значений.
![График производительности](data/performance.svg)
## Средние результаты
Время указано в секундах.
| Структура | Режим | Вставка | Поиск | Удаление |
|---|---:|---:|---:|---:|
| LinkedList | shuffled | 1.555942 | 0.014180 | 0.006345 |
| LinkedList | sorted | 1.513740 | 0.012993 | 0.006041 |
| HashTable | shuffled | 0.005934 | 0.000056 | 0.000031 |
| HashTable | sorted | 0.006140 | 0.000056 | 0.000029 |
| BST | shuffled | 0.011223 | 0.000093 | 0.000069 |
| BST | sorted | 2.250442 | 0.019857 | 0.010803 |
## Анализ
Связный список оказался самым медленным на вставке и поиске. Причина в том, что
для корректной операции `insert` нужно проверить, есть ли уже запись с таким
именем. При уникальных именах почти каждая вставка проходит по всему текущему
списку, поэтому суммарная сложность вставки всех записей становится `O(n^2)`.
Порядок входных данных почти не влияет на результат, потому что структура не
использует порядок ключей.
Хеш-таблица показала лучшие результаты почти во всех операциях. При хорошем
распределении по бакетам вставка, поиск и удаление близки к `O(1)`. Порядок
входных данных почти не влияет на время, так как индекс бакета определяется
хешем имени, а не расположением записи во входном списке.
BST хорошо работает на перемешанных данных: дерево получается сравнительно
сбалансированным, поэтому операции близки к `O(log n)`. На отсортированном
входе обычное двоичное дерево поиска вырождается в цепочку: каждый новый ключ
становится правым потомком предыдущего. Из-за этого вставка всех записей
становится `O(n^2)`, а поиск и удаление приближаются к поведению связного
списка.
Удаление у хеш-таблицы быстрое по той же причине, что и поиск: сначала
вычисляется бакет, затем просматривается короткая цепочка. В BST удаление
быстрое на перемешанном дереве, но на вырожденном дереве оно замедляется.
В связном списке удаление требует линейного поиска удаляемого элемента.
## Вывод
Для частого поиска, обновления и удаления по точному имени лучше выбирать
хеш-таблицу. Она быстрее всего в эксперименте и почти не зависит от порядка
вставки.
Если нужно часто получать данные в отсортированном порядке, дерево поиска дает
удобный `in-order` обход без отдельной сортировки. Но обычный BST чувствителен
к порядку входных данных, поэтому на практике лучше использовать
самобалансирующееся дерево или готовую структуру из библиотеки.
Связный список подходит только для маленьких наборов данных или учебных задач.
Для телефонного справочника с частым поиском он неудачен, потому что каждая
операция поиска требует последовательного прохода по элементам.

View File

@ -0,0 +1,255 @@
"""Procedural phone book data structures for assignment 1.
The task explicitly asks to avoid classes, so every structure is represented
with plain dictionaries, lists and functions.
"""
def _make_ll_node(name, phone, next_node=None):
return {"name": name, "phone": phone, "next": next_node}
def ll_insert(head, name, phone):
"""Insert or update a record in a linked list, returning the head."""
if head is None:
return _make_ll_node(name, phone)
current = head
while current is not None:
if current["name"] == name:
current["phone"] = phone
return head
if current["next"] is None:
break
current = current["next"]
current["next"] = _make_ll_node(name, phone)
return head
def ll_find(head, name):
"""Return a phone by name or None if there is no such record."""
current = head
while current is not None:
if current["name"] == name:
return current["phone"]
current = current["next"]
return None
def ll_delete(head, name):
"""Delete a record by name, returning the possibly changed head."""
previous = None
current = head
while current is not None:
if current["name"] == name:
if previous is None:
return current["next"]
previous["next"] = current["next"]
return head
previous = current
current = current["next"]
return head
def ll_list_all(head):
"""Return all linked-list records sorted by name."""
records = []
current = head
while current is not None:
records.append((current["name"], current["phone"]))
current = current["next"]
return sorted(records, key=lambda item: item[0])
def create_hash_table(size=20011):
"""Create a fixed-size hash table with separate chaining."""
return [None for _ in range(size)]
def _hash_name(name, bucket_count):
"""Stable polynomial hash, unlike Python's randomized built-in hash()."""
value = 0
for char in name:
value = (value * 31 + ord(char)) % bucket_count
return value
def ht_insert(buckets, name, phone):
"""Insert or update a record in the hash table."""
index = _hash_name(name, len(buckets))
buckets[index] = ll_insert(buckets[index], name, phone)
def ht_find(buckets, name):
"""Return a phone by name or None if there is no such record."""
index = _hash_name(name, len(buckets))
return ll_find(buckets[index], name)
def ht_delete(buckets, name):
"""Delete a record by name if it exists."""
index = _hash_name(name, len(buckets))
buckets[index] = ll_delete(buckets[index], name)
def ht_list_all(buckets):
"""Return all hash-table records sorted by name."""
records = []
for head in buckets:
current = head
while current is not None:
records.append((current["name"], current["phone"]))
current = current["next"]
return sorted(records, key=lambda item: item[0])
def _make_bst_node(name, phone):
return {"name": name, "phone": phone, "left": None, "right": None}
def bst_insert(root, name, phone):
"""Insert or update a record in a binary search tree."""
if root is None:
return _make_bst_node(name, phone)
current = root
while True:
if name == current["name"]:
current["phone"] = phone
return root
if name < current["name"]:
if current["left"] is None:
current["left"] = _make_bst_node(name, phone)
return root
current = current["left"]
else:
if current["right"] is None:
current["right"] = _make_bst_node(name, phone)
return root
current = current["right"]
def bst_find(root, name):
"""Return a phone by name or None if there is no such record."""
current = root
while current is not None:
if name == current["name"]:
return current["phone"]
if name < current["name"]:
current = current["left"]
else:
current = current["right"]
return None
def _detach_min(node):
"""Detach the minimal node from a subtree and return (new_subtree, min)."""
parent = None
current = node
while current["left"] is not None:
parent = current
current = current["left"]
if parent is None:
return current["right"], current
parent["left"] = current["right"]
current["right"] = None
return node, current
def bst_delete(root, name):
"""Delete a record from the tree, returning the possibly changed root."""
parent = None
current = root
while current is not None and current["name"] != name:
parent = current
if name < current["name"]:
current = current["left"]
else:
current = current["right"]
if current is None:
return root
if current["left"] is None:
replacement = current["right"]
elif current["right"] is None:
replacement = current["left"]
else:
new_right, successor = _detach_min(current["right"])
successor["left"] = current["left"]
successor["right"] = new_right
replacement = successor
if parent is None:
return replacement
if parent["left"] is current:
parent["left"] = replacement
else:
parent["right"] = replacement
return root
def bst_list_all(root):
"""Return all BST records sorted by name using in-order traversal."""
records = []
stack = []
current = root
while current is not None or stack:
while current is not None:
stack.append(current)
current = current["left"]
current = stack.pop()
records.append((current["name"], current["phone"]))
current = current["right"]
return records
def _assert_basic_operations():
records = [("Boris", "222"), ("Anna", "111"), ("Denis", "444")]
expected_sorted = [("Anna", "111"), ("Boris", "222"), ("Denis", "444")]
head = None
for name, phone in records:
head = ll_insert(head, name, phone)
assert ll_find(head, "Anna") == "111"
head = ll_insert(head, "Anna", "333")
assert ll_find(head, "Anna") == "333"
head = ll_delete(head, "Anna")
assert ll_find(head, "Anna") is None
assert ll_list_all(head) == [("Boris", "222"), ("Denis", "444")]
table = create_hash_table(17)
for name, phone in records:
ht_insert(table, name, phone)
assert ht_find(table, "Denis") == "444"
ht_insert(table, "Denis", "555")
assert ht_find(table, "Denis") == "555"
ht_delete(table, "Missing")
assert ("Anna", "111") in ht_list_all(table)
root = None
for name, phone in records:
root = bst_insert(root, name, phone)
assert bst_list_all(root) == expected_sorted
root = bst_delete(root, "Boris")
assert bst_find(root, "Boris") is None
assert bst_list_all(root) == [("Anna", "111"), ("Denis", "444")]
if __name__ == "__main__":
_assert_basic_operations()
print("All phonebook checks passed.")

View File

@ -0,0 +1 @@
matplotlib>=3.8