[1] Task 1
This commit is contained in:
parent
57c8ef048f
commit
e10b075b06
3
shahovaa/zadanie1/.gitignore
vendored
Normal file
3
shahovaa/zadanie1/.gitignore
vendored
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
__pycache__/
|
||||
*.py[cod]
|
||||
.DS_Store
|
||||
24
shahovaa/zadanie1/README.md
Normal file
24
shahovaa/zadanie1/README.md
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
# Задание 1: структуры данных
|
||||
|
||||
Реализация телефонного справочника на трех структурах данных без классов:
|
||||
|
||||
- связный список;
|
||||
- хеш-таблица с цепочками;
|
||||
- двоичное дерево поиска.
|
||||
|
||||
## Запуск
|
||||
|
||||
Проверка базовых операций:
|
||||
|
||||
```bash
|
||||
python3 phonebook.py
|
||||
```
|
||||
|
||||
Экспериментальные замеры и построение графика:
|
||||
|
||||
```bash
|
||||
python3 benchmark.py
|
||||
```
|
||||
|
||||
По умолчанию используется `N = 10000`, `5` повторов, результаты сохраняются в
|
||||
`docs/data/results.csv`, `docs/data/summary.csv` и `docs/data/performance.svg`.
|
||||
359
shahovaa/zadanie1/benchmark.py
Normal file
359
shahovaa/zadanie1/benchmark.py
Normal file
|
|
@ -0,0 +1,359 @@
|
|||
"""Run performance experiments for the procedural phone book structures."""
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import html
|
||||
import math
|
||||
import random
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from phonebook import (
|
||||
bst_delete,
|
||||
bst_find,
|
||||
bst_insert,
|
||||
create_hash_table,
|
||||
ht_delete,
|
||||
ht_find,
|
||||
ht_insert,
|
||||
ll_delete,
|
||||
ll_find,
|
||||
ll_insert,
|
||||
)
|
||||
|
||||
|
||||
STRUCTURES = ("LinkedList", "HashTable", "BST")
|
||||
MODES = ("shuffled", "sorted")
|
||||
OPERATIONS = ("insert", "find", "delete")
|
||||
|
||||
|
||||
def generate_records(count):
|
||||
return [(f"User_{index:05d}", f"+7-900-{index:05d}") for index in range(count)]
|
||||
|
||||
|
||||
def prepare_records(count, seed):
|
||||
records_sorted = generate_records(count)
|
||||
records_shuffled = records_sorted[:]
|
||||
random.Random(seed).shuffle(records_shuffled)
|
||||
return {
|
||||
"sorted": records_sorted,
|
||||
"shuffled": records_shuffled,
|
||||
}
|
||||
|
||||
|
||||
def _insert_all(structure_name, records, bucket_count):
|
||||
if structure_name == "LinkedList":
|
||||
head = None
|
||||
for name, phone in records:
|
||||
head = ll_insert(head, name, phone)
|
||||
return head
|
||||
|
||||
if structure_name == "HashTable":
|
||||
buckets = create_hash_table(bucket_count)
|
||||
for name, phone in records:
|
||||
ht_insert(buckets, name, phone)
|
||||
return buckets
|
||||
|
||||
if structure_name == "BST":
|
||||
root = None
|
||||
for name, phone in records:
|
||||
root = bst_insert(root, name, phone)
|
||||
return root
|
||||
|
||||
raise ValueError(f"Unknown structure: {structure_name}")
|
||||
|
||||
|
||||
def _find_all(structure_name, structure, names):
|
||||
if structure_name == "LinkedList":
|
||||
for name in names:
|
||||
ll_find(structure, name)
|
||||
return structure
|
||||
|
||||
if structure_name == "HashTable":
|
||||
for name in names:
|
||||
ht_find(structure, name)
|
||||
return structure
|
||||
|
||||
if structure_name == "BST":
|
||||
for name in names:
|
||||
bst_find(structure, name)
|
||||
return structure
|
||||
|
||||
raise ValueError(f"Unknown structure: {structure_name}")
|
||||
|
||||
|
||||
def _delete_all(structure_name, structure, names):
|
||||
if structure_name == "LinkedList":
|
||||
head = structure
|
||||
for name in names:
|
||||
head = ll_delete(head, name)
|
||||
return head
|
||||
|
||||
if structure_name == "HashTable":
|
||||
for name in names:
|
||||
ht_delete(structure, name)
|
||||
return structure
|
||||
|
||||
if structure_name == "BST":
|
||||
root = structure
|
||||
for name in names:
|
||||
root = bst_delete(root, name)
|
||||
return root
|
||||
|
||||
raise ValueError(f"Unknown structure: {structure_name}")
|
||||
|
||||
|
||||
def _elapsed(action):
|
||||
start = time.perf_counter()
|
||||
result = action()
|
||||
end = time.perf_counter()
|
||||
return result, end - start
|
||||
|
||||
|
||||
def run_experiment(count=10000, repeats=5, seed=42, bucket_count=20011):
|
||||
record_sets = prepare_records(count, seed)
|
||||
all_names = [name for name, _phone in record_sets["sorted"]]
|
||||
results = []
|
||||
|
||||
for structure_name in STRUCTURES:
|
||||
for mode in MODES:
|
||||
records = record_sets[mode]
|
||||
names_for_sampling = [name for name, _phone in records]
|
||||
|
||||
for repeat in range(1, repeats + 1):
|
||||
rng = random.Random(seed + repeat * 1000 + len(structure_name) + len(mode))
|
||||
find_existing = rng.sample(names_for_sampling, min(100, count))
|
||||
find_missing = [f"None_{repeat}_{index}" for index in range(10)]
|
||||
find_names = find_existing + find_missing
|
||||
delete_names = rng.sample(all_names, min(50, count))
|
||||
|
||||
structure, insert_time = _elapsed(
|
||||
lambda: _insert_all(structure_name, records, bucket_count)
|
||||
)
|
||||
results.append(
|
||||
{
|
||||
"structure": structure_name,
|
||||
"mode": mode,
|
||||
"operation": "insert",
|
||||
"repeat": repeat,
|
||||
"time_sec": insert_time,
|
||||
"n": count,
|
||||
"bucket_count": bucket_count if structure_name == "HashTable" else "",
|
||||
}
|
||||
)
|
||||
|
||||
structure, find_time = _elapsed(
|
||||
lambda: _find_all(structure_name, structure, find_names)
|
||||
)
|
||||
results.append(
|
||||
{
|
||||
"structure": structure_name,
|
||||
"mode": mode,
|
||||
"operation": "find",
|
||||
"repeat": repeat,
|
||||
"time_sec": find_time,
|
||||
"n": count,
|
||||
"bucket_count": bucket_count if structure_name == "HashTable" else "",
|
||||
}
|
||||
)
|
||||
|
||||
structure, delete_time = _elapsed(
|
||||
lambda: _delete_all(structure_name, structure, delete_names)
|
||||
)
|
||||
results.append(
|
||||
{
|
||||
"structure": structure_name,
|
||||
"mode": mode,
|
||||
"operation": "delete",
|
||||
"repeat": repeat,
|
||||
"time_sec": delete_time,
|
||||
"n": count,
|
||||
"bucket_count": bucket_count if structure_name == "HashTable" else "",
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def summarize(results):
|
||||
grouped = {}
|
||||
for row in results:
|
||||
key = (row["structure"], row["mode"], row["operation"])
|
||||
grouped.setdefault(key, []).append(row["time_sec"])
|
||||
|
||||
summary = []
|
||||
for structure_name in STRUCTURES:
|
||||
for mode in MODES:
|
||||
for operation in OPERATIONS:
|
||||
values = grouped[(structure_name, mode, operation)]
|
||||
summary.append(
|
||||
{
|
||||
"structure": structure_name,
|
||||
"mode": mode,
|
||||
"operation": operation,
|
||||
"average_time_sec": sum(values) / len(values),
|
||||
"measurements_sec": ";".join(f"{value:.9f}" for value in values),
|
||||
}
|
||||
)
|
||||
return summary
|
||||
|
||||
|
||||
def write_csv(path, rows, fieldnames):
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with path.open("w", encoding="utf-8", newline="") as file:
|
||||
writer = csv.DictWriter(file, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(rows)
|
||||
|
||||
|
||||
def write_chart(path, summary):
|
||||
try:
|
||||
import matplotlib.pyplot as plt
|
||||
except ModuleNotFoundError:
|
||||
write_svg_chart(path, summary)
|
||||
return
|
||||
|
||||
labels = [
|
||||
f"{row['structure']}\n{row['mode']}\n{row['operation']}"
|
||||
for row in summary
|
||||
]
|
||||
values = [row["average_time_sec"] for row in summary]
|
||||
colors_by_operation = {
|
||||
"insert": "#4C78A8",
|
||||
"find": "#F58518",
|
||||
"delete": "#54A24B",
|
||||
}
|
||||
colors = [colors_by_operation[row["operation"]] for row in summary]
|
||||
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
plt.figure(figsize=(14, 7))
|
||||
plt.bar(range(len(values)), values, color=colors)
|
||||
plt.yscale("log")
|
||||
plt.ylabel("Среднее время, секунд (логарифмическая шкала)")
|
||||
plt.title("Сравнение операций телефонного справочника")
|
||||
plt.xticks(range(len(labels)), labels, rotation=45, ha="right", fontsize=8)
|
||||
plt.tight_layout()
|
||||
plt.savefig(path, dpi=160)
|
||||
plt.close()
|
||||
|
||||
|
||||
def write_svg_chart(path, summary):
|
||||
width = 1500
|
||||
height = 760
|
||||
margin_left = 90
|
||||
margin_right = 40
|
||||
margin_top = 70
|
||||
margin_bottom = 210
|
||||
plot_width = width - margin_left - margin_right
|
||||
plot_height = height - margin_top - margin_bottom
|
||||
baseline = margin_top + plot_height
|
||||
|
||||
values = [max(row["average_time_sec"], 1e-12) for row in summary]
|
||||
log_min = math.floor(math.log10(min(values)))
|
||||
log_max = math.ceil(math.log10(max(values)))
|
||||
if log_min == log_max:
|
||||
log_min -= 1
|
||||
log_max += 1
|
||||
|
||||
def y_for(value):
|
||||
log_value = math.log10(max(value, 1e-12))
|
||||
return margin_top + (log_max - log_value) / (log_max - log_min) * plot_height
|
||||
|
||||
colors_by_operation = {
|
||||
"insert": "#4C78A8",
|
||||
"find": "#F58518",
|
||||
"delete": "#54A24B",
|
||||
}
|
||||
slot_width = plot_width / len(summary)
|
||||
bar_width = slot_width * 0.62
|
||||
|
||||
lines = [
|
||||
'<?xml version="1.0" encoding="UTF-8"?>',
|
||||
f'<svg xmlns="http://www.w3.org/2000/svg" width="{width}" height="{height}" viewBox="0 0 {width} {height}">',
|
||||
'<rect width="100%" height="100%" fill="#ffffff"/>',
|
||||
'<style>text{font-family:Arial,Helvetica,sans-serif;fill:#222}.axis{stroke:#222;stroke-width:1}.grid{stroke:#ddd;stroke-width:1}.label{font-size:13px}.tick{font-size:12px}.title{font-size:24px;font-weight:700}.legend{font-size:14px}</style>',
|
||||
f'<text class="title" x="{width / 2}" y="35" text-anchor="middle">Сравнение операций телефонного справочника</text>',
|
||||
f'<line class="axis" x1="{margin_left}" y1="{baseline}" x2="{width - margin_right}" y2="{baseline}"/>',
|
||||
f'<line class="axis" x1="{margin_left}" y1="{margin_top}" x2="{margin_left}" y2="{baseline}"/>',
|
||||
]
|
||||
|
||||
for exponent in range(log_min, log_max + 1):
|
||||
value = 10 ** exponent
|
||||
y = y_for(value)
|
||||
lines.append(
|
||||
f'<line class="grid" x1="{margin_left}" y1="{y:.2f}" x2="{width - margin_right}" y2="{y:.2f}"/>'
|
||||
)
|
||||
lines.append(
|
||||
f'<text class="tick" x="{margin_left - 10}" y="{y + 4:.2f}" text-anchor="end">1e{exponent}</text>'
|
||||
)
|
||||
|
||||
for index, row in enumerate(summary):
|
||||
x = margin_left + index * slot_width + (slot_width - bar_width) / 2
|
||||
y = y_for(row["average_time_sec"])
|
||||
bar_height = baseline - y
|
||||
color = colors_by_operation[row["operation"]]
|
||||
label = f"{row['structure']} / {row['mode']} / {row['operation']}"
|
||||
|
||||
lines.append(
|
||||
f'<rect x="{x:.2f}" y="{y:.2f}" width="{bar_width:.2f}" height="{bar_height:.2f}" fill="{color}"/>'
|
||||
)
|
||||
lines.append(
|
||||
f'<text class="tick" x="{x + bar_width / 2:.2f}" y="{y - 5:.2f}" text-anchor="middle">{row["average_time_sec"]:.3g}</text>'
|
||||
)
|
||||
lines.append(
|
||||
f'<text class="label" transform="translate({x + bar_width / 2:.2f} {baseline + 18:.2f}) rotate(55)" text-anchor="start">{html.escape(label)}</text>'
|
||||
)
|
||||
|
||||
legend_x = margin_left
|
||||
legend_y = height - 30
|
||||
for offset, (operation, color) in enumerate(colors_by_operation.items()):
|
||||
x = legend_x + offset * 130
|
||||
lines.append(f'<rect x="{x}" y="{legend_y - 12}" width="18" height="18" fill="{color}"/>')
|
||||
lines.append(f'<text class="legend" x="{x + 26}" y="{legend_y + 2}">{operation}</text>')
|
||||
|
||||
lines.append(
|
||||
f'<text class="label" transform="translate(24 {margin_top + plot_height / 2}) rotate(-90)" text-anchor="middle">Среднее время, секунд (логарифмическая шкала)</text>'
|
||||
)
|
||||
lines.append("</svg>")
|
||||
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text("\n".join(lines), encoding="utf-8")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--n", type=int, default=10000, help="number of generated records")
|
||||
parser.add_argument("--repeats", type=int, default=5, help="number of repeated measurements")
|
||||
parser.add_argument("--seed", type=int, default=42, help="random seed")
|
||||
parser.add_argument("--bucket-count", type=int, default=20011, help="hash-table bucket count")
|
||||
parser.add_argument("--output-dir", type=Path, default=Path("docs/data"))
|
||||
args = parser.parse_args()
|
||||
|
||||
results = run_experiment(
|
||||
count=args.n,
|
||||
repeats=args.repeats,
|
||||
seed=args.seed,
|
||||
bucket_count=args.bucket_count,
|
||||
)
|
||||
summary = summarize(results)
|
||||
|
||||
write_csv(
|
||||
args.output_dir / "results.csv",
|
||||
results,
|
||||
["structure", "mode", "operation", "repeat", "time_sec", "n", "bucket_count"],
|
||||
)
|
||||
write_csv(
|
||||
args.output_dir / "summary.csv",
|
||||
summary,
|
||||
["structure", "mode", "operation", "average_time_sec", "measurements_sec"],
|
||||
)
|
||||
chart_path = args.output_dir / "performance.svg"
|
||||
write_chart(chart_path, summary)
|
||||
|
||||
print(f"Saved detailed results to {args.output_dir / 'results.csv'}")
|
||||
print(f"Saved summary to {args.output_dir / 'summary.csv'}")
|
||||
print(f"Saved chart to {chart_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
2431
shahovaa/zadanie1/docs/data/performance.svg
Normal file
2431
shahovaa/zadanie1/docs/data/performance.svg
Normal file
File diff suppressed because it is too large
Load Diff
|
After Width: | Height: | Size: 78 KiB |
91
shahovaa/zadanie1/docs/data/results.csv
Normal file
91
shahovaa/zadanie1/docs/data/results.csv
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
structure,mode,operation,repeat,time_sec,n,bucket_count
|
||||
LinkedList,shuffled,insert,1,1.5487497089998215,10000,
|
||||
LinkedList,shuffled,find,1,0.013355207998756669,10000,
|
||||
LinkedList,shuffled,delete,1,0.006138000000646571,10000,
|
||||
LinkedList,shuffled,insert,2,1.6062446670002828,10000,
|
||||
LinkedList,shuffled,find,2,0.014175791999150533,10000,
|
||||
LinkedList,shuffled,delete,2,0.007367083000644925,10000,
|
||||
LinkedList,shuffled,insert,3,1.5470056670001213,10000,
|
||||
LinkedList,shuffled,find,3,0.014115500000116299,10000,
|
||||
LinkedList,shuffled,delete,3,0.006011666999256704,10000,
|
||||
LinkedList,shuffled,insert,4,1.5362317910003185,10000,
|
||||
LinkedList,shuffled,find,4,0.01460650000080932,10000,
|
||||
LinkedList,shuffled,delete,4,0.006377084000632749,10000,
|
||||
LinkedList,shuffled,insert,5,1.541476624999632,10000,
|
||||
LinkedList,shuffled,find,5,0.014646625000750646,10000,
|
||||
LinkedList,shuffled,delete,5,0.005829540999911842,10000,
|
||||
LinkedList,sorted,insert,1,1.4639895000000251,10000,
|
||||
LinkedList,sorted,find,1,0.012882999999419553,10000,
|
||||
LinkedList,sorted,delete,1,0.005734124999435153,10000,
|
||||
LinkedList,sorted,insert,2,1.4757493329998397,10000,
|
||||
LinkedList,sorted,find,2,0.013435208000373677,10000,
|
||||
LinkedList,sorted,delete,2,0.006567624999661348,10000,
|
||||
LinkedList,sorted,insert,3,1.474924916999953,10000,
|
||||
LinkedList,sorted,find,3,0.012946166998517583,10000,
|
||||
LinkedList,sorted,delete,3,0.005636875001073349,10000,
|
||||
LinkedList,sorted,insert,4,1.6074728750008944,10000,
|
||||
LinkedList,sorted,find,4,0.012849667000409681,10000,
|
||||
LinkedList,sorted,delete,4,0.006610207999983686,10000,
|
||||
LinkedList,sorted,insert,5,1.5465652919992863,10000,
|
||||
LinkedList,sorted,find,5,0.012851292000050307,10000,
|
||||
LinkedList,sorted,delete,5,0.005656833000102779,10000,
|
||||
HashTable,shuffled,insert,1,0.005485583000336192,10000,20011
|
||||
HashTable,shuffled,find,1,5.770799907622859e-05,10000,20011
|
||||
HashTable,shuffled,delete,1,3.570800072338898e-05,10000,20011
|
||||
HashTable,shuffled,insert,2,0.006064958999559167,10000,20011
|
||||
HashTable,shuffled,find,2,5.854200026078615e-05,10000,20011
|
||||
HashTable,shuffled,delete,2,3.495800046948716e-05,10000,20011
|
||||
HashTable,shuffled,insert,3,0.005850707999343285,10000,20011
|
||||
HashTable,shuffled,find,3,5.441699977382086e-05,10000,20011
|
||||
HashTable,shuffled,delete,3,2.7292000595480204e-05,10000,20011
|
||||
HashTable,shuffled,insert,4,0.005818375000671949,10000,20011
|
||||
HashTable,shuffled,find,4,5.387499913922511e-05,10000,20011
|
||||
HashTable,shuffled,delete,4,2.683300044736825e-05,10000,20011
|
||||
HashTable,shuffled,insert,5,0.006451041999753215,10000,20011
|
||||
HashTable,shuffled,find,5,5.6000000768108293e-05,10000,20011
|
||||
HashTable,shuffled,delete,5,2.937499994004611e-05,10000,20011
|
||||
HashTable,sorted,insert,1,0.005557000000408152,10000,20011
|
||||
HashTable,sorted,find,1,5.608300125459209e-05,10000,20011
|
||||
HashTable,sorted,delete,1,2.8624999686144292e-05,10000,20011
|
||||
HashTable,sorted,insert,2,0.005895457999940845,10000,20011
|
||||
HashTable,sorted,find,2,6.0874999689986e-05,10000,20011
|
||||
HashTable,sorted,delete,2,3.199999991920777e-05,10000,20011
|
||||
HashTable,sorted,insert,3,0.005766083999333205,10000,20011
|
||||
HashTable,sorted,find,3,5.500000042957254e-05,10000,20011
|
||||
HashTable,sorted,delete,3,2.7874999432242475e-05,10000,20011
|
||||
HashTable,sorted,insert,4,0.005590124999798718,10000,20011
|
||||
HashTable,sorted,find,4,5.337499896995723e-05,10000,20011
|
||||
HashTable,sorted,delete,4,2.6959000024362467e-05,10000,20011
|
||||
HashTable,sorted,insert,5,0.007889499998782412,10000,20011
|
||||
HashTable,sorted,find,5,5.549999877985101e-05,10000,20011
|
||||
HashTable,sorted,delete,5,2.7749998480430804e-05,10000,20011
|
||||
BST,shuffled,insert,1,0.011201125000297907,10000,
|
||||
BST,shuffled,find,1,9.245900037058163e-05,10000,
|
||||
BST,shuffled,delete,1,6.958300036785658e-05,10000,
|
||||
BST,shuffled,insert,2,0.011337707999700797,10000,
|
||||
BST,shuffled,find,2,9.545799912302755e-05,10000,
|
||||
BST,shuffled,delete,2,7.141599962778855e-05,10000,
|
||||
BST,shuffled,insert,3,0.01119999999900756,10000,
|
||||
BST,shuffled,find,3,9.308299922849983e-05,10000,
|
||||
BST,shuffled,delete,3,6.779199975426309e-05,10000,
|
||||
BST,shuffled,insert,4,0.011189917000592686,10000,
|
||||
BST,shuffled,find,4,9.675000001152512e-05,10000,
|
||||
BST,shuffled,delete,4,6.624999878113158e-05,10000,
|
||||
BST,shuffled,insert,5,0.01118529100131127,10000,
|
||||
BST,shuffled,find,5,8.670799979881849e-05,10000,
|
||||
BST,shuffled,delete,5,6.904200017743278e-05,10000,
|
||||
BST,sorted,insert,1,2.2425066659998265,10000,
|
||||
BST,sorted,find,1,0.018234625000332016,10000,
|
||||
BST,sorted,delete,1,0.010230416999547742,10000,
|
||||
BST,sorted,insert,2,2.26542979199985,10000,
|
||||
BST,sorted,find,2,0.021546082998611382,10000,
|
||||
BST,sorted,delete,2,0.011778292000599322,10000,
|
||||
BST,sorted,insert,3,2.246992708000107,10000,
|
||||
BST,sorted,find,3,0.01936033300080453,10000,
|
||||
BST,sorted,delete,3,0.010003166000387864,10000,
|
||||
BST,sorted,insert,4,2.2515108749994397,10000,
|
||||
BST,sorted,find,4,0.021122417001606664,10000,
|
||||
BST,sorted,delete,4,0.01173120800012839,10000,
|
||||
BST,sorted,insert,5,2.2457697090012516,10000,
|
||||
BST,sorted,find,5,0.01902170900029887,10000,
|
||||
BST,sorted,delete,5,0.010273834001054638,10000,
|
||||
|
19
shahovaa/zadanie1/docs/data/summary.csv
Normal file
19
shahovaa/zadanie1/docs/data/summary.csv
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
structure,mode,operation,average_time_sec,measurements_sec
|
||||
LinkedList,shuffled,insert,1.5559416918000353,1.548749709;1.606244667;1.547005667;1.536231791;1.541476625
|
||||
LinkedList,shuffled,find,0.014179924999916693,0.013355208;0.014175792;0.014115500;0.014606500;0.014646625
|
||||
LinkedList,shuffled,delete,0.006344675000218558,0.006138000;0.007367083;0.006011667;0.006377084;0.005829541
|
||||
LinkedList,sorted,insert,1.5137403833999996,1.463989500;1.475749333;1.474924917;1.607472875;1.546565292
|
||||
LinkedList,sorted,find,0.01299306679975416,0.012883000;0.013435208;0.012946167;0.012849667;0.012851292
|
||||
LinkedList,sorted,delete,0.006041133200051263,0.005734125;0.006567625;0.005636875;0.006610208;0.005656833
|
||||
HashTable,shuffled,insert,0.005934133399932762,0.005485583;0.006064959;0.005850708;0.005818375;0.006451042
|
||||
HashTable,shuffled,find,5.61083998036338e-05,0.000057708;0.000058542;0.000054417;0.000053875;0.000056000
|
||||
HashTable,shuffled,delete,3.083320043515414e-05,0.000035708;0.000034958;0.000027292;0.000026833;0.000029375
|
||||
HashTable,sorted,insert,0.006139633399652666,0.005557000;0.005895458;0.005766084;0.005590125;0.007889500
|
||||
HashTable,sorted,find,5.6166599824791776e-05,0.000056083;0.000060875;0.000055000;0.000053375;0.000055500
|
||||
HashTable,sorted,delete,2.8641799508477563e-05,0.000028625;0.000032000;0.000027875;0.000026959;0.000027750
|
||||
BST,shuffled,insert,0.011222808200182044,0.011201125;0.011337708;0.011200000;0.011189917;0.011185291
|
||||
BST,shuffled,find,9.289159970649052e-05,0.000092459;0.000095458;0.000093083;0.000096750;0.000086708
|
||||
BST,shuffled,delete,6.881659974169451e-05,0.000069583;0.000071416;0.000067792;0.000066250;0.000069042
|
||||
BST,sorted,insert,2.250441950000095,2.242506666;2.265429792;2.246992708;2.251510875;2.245769709
|
||||
BST,sorted,find,0.019857033400330692,0.018234625;0.021546083;0.019360333;0.021122417;0.019021709
|
||||
BST,sorted,delete,0.010803383400343591,0.010230417;0.011778292;0.010003166;0.011731208;0.010273834
|
||||
|
112
shahovaa/zadanie1/docs/report.md
Normal file
112
shahovaa/zadanie1/docs/report.md
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
# Отчет по заданию 1: структуры данных
|
||||
|
||||
## Цель
|
||||
|
||||
Реализовать три структуры данных с нуля в процедурной парадигме и сравнить
|
||||
скорость основных операций телефонного справочника:
|
||||
|
||||
- `insert(name, phone)` - добавить или обновить запись;
|
||||
- `find(name)` - найти телефон по имени;
|
||||
- `delete(name)` - удалить запись;
|
||||
- `list_all()` - получить все записи, отсортированные по имени.
|
||||
|
||||
Классы не использовались. Узлы связного списка и дерева представлены
|
||||
словарями, хеш-таблица представлена списком бакетов.
|
||||
|
||||
## Реализация
|
||||
|
||||
Код находится в файле `phonebook.py`.
|
||||
|
||||
Реализованы функции:
|
||||
|
||||
- связный список: `ll_insert`, `ll_find`, `ll_delete`, `ll_list_all`;
|
||||
- хеш-таблица: `create_hash_table`, `ht_insert`, `ht_find`, `ht_delete`, `ht_list_all`;
|
||||
- двоичное дерево поиска: `bst_insert`, `bst_find`, `bst_delete`, `bst_list_all`.
|
||||
|
||||
Для хеш-таблицы используется метод цепочек: каждый бакет хранит голову
|
||||
связного списка. Хеш-функция написана вручную, чтобы результат не зависел от
|
||||
рандомизации встроенной функции `hash()` в Python.
|
||||
|
||||
Для BST вставка, поиск, удаление и обход написаны без классов. Обход
|
||||
`bst_list_all` реализован итеративно, чтобы отсортированный вход на 10000
|
||||
элементов не приводил к переполнению стека рекурсии.
|
||||
|
||||
## Методика эксперимента
|
||||
|
||||
Скрипт эксперимента находится в файле `benchmark.py`.
|
||||
|
||||
Параметры запуска:
|
||||
|
||||
- количество записей: `N = 10000`;
|
||||
- число повторов каждого эксперимента: `5`;
|
||||
- имена: `User_00000`, `User_00001`, ..., `User_09999`;
|
||||
- два режима входных данных: `shuffled` и `sorted`;
|
||||
- поиск: 100 существующих имен и 10 отсутствующих;
|
||||
- удаление: 50 случайных существующих имен;
|
||||
- размер хеш-таблицы: `20011` бакетов.
|
||||
|
||||
После вставки структура не пересоздается: поиск и удаление выполняются на той
|
||||
же заполненной структуре. Для каждого режима и каждой структуры создается новая
|
||||
структура.
|
||||
|
||||
Файлы с результатами:
|
||||
|
||||
- `docs/data/results.csv` - все отдельные замеры;
|
||||
- `docs/data/summary.csv` - среднее время и список всех пяти замеров;
|
||||
- `docs/data/performance.svg` - столбчатая диаграмма средних значений.
|
||||
|
||||

|
||||
|
||||
## Средние результаты
|
||||
|
||||
Время указано в секундах.
|
||||
|
||||
| Структура | Режим | Вставка | Поиск | Удаление |
|
||||
|---|---:|---:|---:|---:|
|
||||
| LinkedList | shuffled | 1.555942 | 0.014180 | 0.006345 |
|
||||
| LinkedList | sorted | 1.513740 | 0.012993 | 0.006041 |
|
||||
| HashTable | shuffled | 0.005934 | 0.000056 | 0.000031 |
|
||||
| HashTable | sorted | 0.006140 | 0.000056 | 0.000029 |
|
||||
| BST | shuffled | 0.011223 | 0.000093 | 0.000069 |
|
||||
| BST | sorted | 2.250442 | 0.019857 | 0.010803 |
|
||||
|
||||
## Анализ
|
||||
|
||||
Связный список оказался самым медленным на вставке и поиске. Причина в том, что
|
||||
для корректной операции `insert` нужно проверить, есть ли уже запись с таким
|
||||
именем. При уникальных именах почти каждая вставка проходит по всему текущему
|
||||
списку, поэтому суммарная сложность вставки всех записей становится `O(n^2)`.
|
||||
Порядок входных данных почти не влияет на результат, потому что структура не
|
||||
использует порядок ключей.
|
||||
|
||||
Хеш-таблица показала лучшие результаты почти во всех операциях. При хорошем
|
||||
распределении по бакетам вставка, поиск и удаление близки к `O(1)`. Порядок
|
||||
входных данных почти не влияет на время, так как индекс бакета определяется
|
||||
хешем имени, а не расположением записи во входном списке.
|
||||
|
||||
BST хорошо работает на перемешанных данных: дерево получается сравнительно
|
||||
сбалансированным, поэтому операции близки к `O(log n)`. На отсортированном
|
||||
входе обычное двоичное дерево поиска вырождается в цепочку: каждый новый ключ
|
||||
становится правым потомком предыдущего. Из-за этого вставка всех записей
|
||||
становится `O(n^2)`, а поиск и удаление приближаются к поведению связного
|
||||
списка.
|
||||
|
||||
Удаление у хеш-таблицы быстрое по той же причине, что и поиск: сначала
|
||||
вычисляется бакет, затем просматривается короткая цепочка. В BST удаление
|
||||
быстрое на перемешанном дереве, но на вырожденном дереве оно замедляется.
|
||||
В связном списке удаление требует линейного поиска удаляемого элемента.
|
||||
|
||||
## Вывод
|
||||
|
||||
Для частого поиска, обновления и удаления по точному имени лучше выбирать
|
||||
хеш-таблицу. Она быстрее всего в эксперименте и почти не зависит от порядка
|
||||
вставки.
|
||||
|
||||
Если нужно часто получать данные в отсортированном порядке, дерево поиска дает
|
||||
удобный `in-order` обход без отдельной сортировки. Но обычный BST чувствителен
|
||||
к порядку входных данных, поэтому на практике лучше использовать
|
||||
самобалансирующееся дерево или готовую структуру из библиотеки.
|
||||
|
||||
Связный список подходит только для маленьких наборов данных или учебных задач.
|
||||
Для телефонного справочника с частым поиском он неудачен, потому что каждая
|
||||
операция поиска требует последовательного прохода по элементам.
|
||||
255
shahovaa/zadanie1/phonebook.py
Normal file
255
shahovaa/zadanie1/phonebook.py
Normal file
|
|
@ -0,0 +1,255 @@
|
|||
"""Procedural phone book data structures for assignment 1.
|
||||
|
||||
The task explicitly asks to avoid classes, so every structure is represented
|
||||
with plain dictionaries, lists and functions.
|
||||
"""
|
||||
|
||||
|
||||
def _make_ll_node(name, phone, next_node=None):
|
||||
return {"name": name, "phone": phone, "next": next_node}
|
||||
|
||||
|
||||
def ll_insert(head, name, phone):
|
||||
"""Insert or update a record in a linked list, returning the head."""
|
||||
if head is None:
|
||||
return _make_ll_node(name, phone)
|
||||
|
||||
current = head
|
||||
while current is not None:
|
||||
if current["name"] == name:
|
||||
current["phone"] = phone
|
||||
return head
|
||||
if current["next"] is None:
|
||||
break
|
||||
current = current["next"]
|
||||
|
||||
current["next"] = _make_ll_node(name, phone)
|
||||
return head
|
||||
|
||||
|
||||
def ll_find(head, name):
|
||||
"""Return a phone by name or None if there is no such record."""
|
||||
current = head
|
||||
while current is not None:
|
||||
if current["name"] == name:
|
||||
return current["phone"]
|
||||
current = current["next"]
|
||||
return None
|
||||
|
||||
|
||||
def ll_delete(head, name):
|
||||
"""Delete a record by name, returning the possibly changed head."""
|
||||
previous = None
|
||||
current = head
|
||||
|
||||
while current is not None:
|
||||
if current["name"] == name:
|
||||
if previous is None:
|
||||
return current["next"]
|
||||
previous["next"] = current["next"]
|
||||
return head
|
||||
|
||||
previous = current
|
||||
current = current["next"]
|
||||
|
||||
return head
|
||||
|
||||
|
||||
def ll_list_all(head):
|
||||
"""Return all linked-list records sorted by name."""
|
||||
records = []
|
||||
current = head
|
||||
while current is not None:
|
||||
records.append((current["name"], current["phone"]))
|
||||
current = current["next"]
|
||||
return sorted(records, key=lambda item: item[0])
|
||||
|
||||
|
||||
def create_hash_table(size=20011):
|
||||
"""Create a fixed-size hash table with separate chaining."""
|
||||
return [None for _ in range(size)]
|
||||
|
||||
|
||||
def _hash_name(name, bucket_count):
|
||||
"""Stable polynomial hash, unlike Python's randomized built-in hash()."""
|
||||
value = 0
|
||||
for char in name:
|
||||
value = (value * 31 + ord(char)) % bucket_count
|
||||
return value
|
||||
|
||||
|
||||
def ht_insert(buckets, name, phone):
|
||||
"""Insert or update a record in the hash table."""
|
||||
index = _hash_name(name, len(buckets))
|
||||
buckets[index] = ll_insert(buckets[index], name, phone)
|
||||
|
||||
|
||||
def ht_find(buckets, name):
|
||||
"""Return a phone by name or None if there is no such record."""
|
||||
index = _hash_name(name, len(buckets))
|
||||
return ll_find(buckets[index], name)
|
||||
|
||||
|
||||
def ht_delete(buckets, name):
|
||||
"""Delete a record by name if it exists."""
|
||||
index = _hash_name(name, len(buckets))
|
||||
buckets[index] = ll_delete(buckets[index], name)
|
||||
|
||||
|
||||
def ht_list_all(buckets):
|
||||
"""Return all hash-table records sorted by name."""
|
||||
records = []
|
||||
for head in buckets:
|
||||
current = head
|
||||
while current is not None:
|
||||
records.append((current["name"], current["phone"]))
|
||||
current = current["next"]
|
||||
return sorted(records, key=lambda item: item[0])
|
||||
|
||||
|
||||
def _make_bst_node(name, phone):
|
||||
return {"name": name, "phone": phone, "left": None, "right": None}
|
||||
|
||||
|
||||
def bst_insert(root, name, phone):
|
||||
"""Insert or update a record in a binary search tree."""
|
||||
if root is None:
|
||||
return _make_bst_node(name, phone)
|
||||
|
||||
current = root
|
||||
while True:
|
||||
if name == current["name"]:
|
||||
current["phone"] = phone
|
||||
return root
|
||||
|
||||
if name < current["name"]:
|
||||
if current["left"] is None:
|
||||
current["left"] = _make_bst_node(name, phone)
|
||||
return root
|
||||
current = current["left"]
|
||||
else:
|
||||
if current["right"] is None:
|
||||
current["right"] = _make_bst_node(name, phone)
|
||||
return root
|
||||
current = current["right"]
|
||||
|
||||
|
||||
def bst_find(root, name):
|
||||
"""Return a phone by name or None if there is no such record."""
|
||||
current = root
|
||||
while current is not None:
|
||||
if name == current["name"]:
|
||||
return current["phone"]
|
||||
if name < current["name"]:
|
||||
current = current["left"]
|
||||
else:
|
||||
current = current["right"]
|
||||
return None
|
||||
|
||||
|
||||
def _detach_min(node):
|
||||
"""Detach the minimal node from a subtree and return (new_subtree, min)."""
|
||||
parent = None
|
||||
current = node
|
||||
|
||||
while current["left"] is not None:
|
||||
parent = current
|
||||
current = current["left"]
|
||||
|
||||
if parent is None:
|
||||
return current["right"], current
|
||||
|
||||
parent["left"] = current["right"]
|
||||
current["right"] = None
|
||||
return node, current
|
||||
|
||||
|
||||
def bst_delete(root, name):
|
||||
"""Delete a record from the tree, returning the possibly changed root."""
|
||||
parent = None
|
||||
current = root
|
||||
|
||||
while current is not None and current["name"] != name:
|
||||
parent = current
|
||||
if name < current["name"]:
|
||||
current = current["left"]
|
||||
else:
|
||||
current = current["right"]
|
||||
|
||||
if current is None:
|
||||
return root
|
||||
|
||||
if current["left"] is None:
|
||||
replacement = current["right"]
|
||||
elif current["right"] is None:
|
||||
replacement = current["left"]
|
||||
else:
|
||||
new_right, successor = _detach_min(current["right"])
|
||||
successor["left"] = current["left"]
|
||||
successor["right"] = new_right
|
||||
replacement = successor
|
||||
|
||||
if parent is None:
|
||||
return replacement
|
||||
|
||||
if parent["left"] is current:
|
||||
parent["left"] = replacement
|
||||
else:
|
||||
parent["right"] = replacement
|
||||
|
||||
return root
|
||||
|
||||
|
||||
def bst_list_all(root):
|
||||
"""Return all BST records sorted by name using in-order traversal."""
|
||||
records = []
|
||||
stack = []
|
||||
current = root
|
||||
|
||||
while current is not None or stack:
|
||||
while current is not None:
|
||||
stack.append(current)
|
||||
current = current["left"]
|
||||
|
||||
current = stack.pop()
|
||||
records.append((current["name"], current["phone"]))
|
||||
current = current["right"]
|
||||
|
||||
return records
|
||||
|
||||
|
||||
def _assert_basic_operations():
|
||||
records = [("Boris", "222"), ("Anna", "111"), ("Denis", "444")]
|
||||
expected_sorted = [("Anna", "111"), ("Boris", "222"), ("Denis", "444")]
|
||||
|
||||
head = None
|
||||
for name, phone in records:
|
||||
head = ll_insert(head, name, phone)
|
||||
assert ll_find(head, "Anna") == "111"
|
||||
head = ll_insert(head, "Anna", "333")
|
||||
assert ll_find(head, "Anna") == "333"
|
||||
head = ll_delete(head, "Anna")
|
||||
assert ll_find(head, "Anna") is None
|
||||
assert ll_list_all(head) == [("Boris", "222"), ("Denis", "444")]
|
||||
|
||||
table = create_hash_table(17)
|
||||
for name, phone in records:
|
||||
ht_insert(table, name, phone)
|
||||
assert ht_find(table, "Denis") == "444"
|
||||
ht_insert(table, "Denis", "555")
|
||||
assert ht_find(table, "Denis") == "555"
|
||||
ht_delete(table, "Missing")
|
||||
assert ("Anna", "111") in ht_list_all(table)
|
||||
|
||||
root = None
|
||||
for name, phone in records:
|
||||
root = bst_insert(root, name, phone)
|
||||
assert bst_list_all(root) == expected_sorted
|
||||
root = bst_delete(root, "Boris")
|
||||
assert bst_find(root, "Boris") is None
|
||||
assert bst_list_all(root) == [("Anna", "111"), ("Denis", "444")]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
_assert_basic_operations()
|
||||
print("All phonebook checks passed.")
|
||||
1
shahovaa/zadanie1/requirements.txt
Normal file
1
shahovaa/zadanie1/requirements.txt
Normal file
|
|
@ -0,0 +1 @@
|
|||
matplotlib>=3.8
|
||||
Loading…
Reference in New Issue
Block a user