2026-rff_mp/shahovaa/zadanie1/benchmark.py
2026-05-19 22:11:31 +03:00

360 lines
12 KiB
Python

"""Run performance experiments for the procedural phone book structures."""
import argparse
import csv
import html
import math
import random
import time
from pathlib import Path
from phonebook import (
bst_delete,
bst_find,
bst_insert,
create_hash_table,
ht_delete,
ht_find,
ht_insert,
ll_delete,
ll_find,
ll_insert,
)
STRUCTURES = ("LinkedList", "HashTable", "BST")
MODES = ("shuffled", "sorted")
OPERATIONS = ("insert", "find", "delete")
def generate_records(count):
return [(f"User_{index:05d}", f"+7-900-{index:05d}") for index in range(count)]
def prepare_records(count, seed):
records_sorted = generate_records(count)
records_shuffled = records_sorted[:]
random.Random(seed).shuffle(records_shuffled)
return {
"sorted": records_sorted,
"shuffled": records_shuffled,
}
def _insert_all(structure_name, records, bucket_count):
if structure_name == "LinkedList":
head = None
for name, phone in records:
head = ll_insert(head, name, phone)
return head
if structure_name == "HashTable":
buckets = create_hash_table(bucket_count)
for name, phone in records:
ht_insert(buckets, name, phone)
return buckets
if structure_name == "BST":
root = None
for name, phone in records:
root = bst_insert(root, name, phone)
return root
raise ValueError(f"Unknown structure: {structure_name}")
def _find_all(structure_name, structure, names):
if structure_name == "LinkedList":
for name in names:
ll_find(structure, name)
return structure
if structure_name == "HashTable":
for name in names:
ht_find(structure, name)
return structure
if structure_name == "BST":
for name in names:
bst_find(structure, name)
return structure
raise ValueError(f"Unknown structure: {structure_name}")
def _delete_all(structure_name, structure, names):
if structure_name == "LinkedList":
head = structure
for name in names:
head = ll_delete(head, name)
return head
if structure_name == "HashTable":
for name in names:
ht_delete(structure, name)
return structure
if structure_name == "BST":
root = structure
for name in names:
root = bst_delete(root, name)
return root
raise ValueError(f"Unknown structure: {structure_name}")
def _elapsed(action):
start = time.perf_counter()
result = action()
end = time.perf_counter()
return result, end - start
def run_experiment(count=10000, repeats=5, seed=42, bucket_count=20011):
record_sets = prepare_records(count, seed)
all_names = [name for name, _phone in record_sets["sorted"]]
results = []
for structure_name in STRUCTURES:
for mode in MODES:
records = record_sets[mode]
names_for_sampling = [name for name, _phone in records]
for repeat in range(1, repeats + 1):
rng = random.Random(seed + repeat * 1000 + len(structure_name) + len(mode))
find_existing = rng.sample(names_for_sampling, min(100, count))
find_missing = [f"None_{repeat}_{index}" for index in range(10)]
find_names = find_existing + find_missing
delete_names = rng.sample(all_names, min(50, count))
structure, insert_time = _elapsed(
lambda: _insert_all(structure_name, records, bucket_count)
)
results.append(
{
"structure": structure_name,
"mode": mode,
"operation": "insert",
"repeat": repeat,
"time_sec": insert_time,
"n": count,
"bucket_count": bucket_count if structure_name == "HashTable" else "",
}
)
structure, find_time = _elapsed(
lambda: _find_all(structure_name, structure, find_names)
)
results.append(
{
"structure": structure_name,
"mode": mode,
"operation": "find",
"repeat": repeat,
"time_sec": find_time,
"n": count,
"bucket_count": bucket_count if structure_name == "HashTable" else "",
}
)
structure, delete_time = _elapsed(
lambda: _delete_all(structure_name, structure, delete_names)
)
results.append(
{
"structure": structure_name,
"mode": mode,
"operation": "delete",
"repeat": repeat,
"time_sec": delete_time,
"n": count,
"bucket_count": bucket_count if structure_name == "HashTable" else "",
}
)
return results
def summarize(results):
grouped = {}
for row in results:
key = (row["structure"], row["mode"], row["operation"])
grouped.setdefault(key, []).append(row["time_sec"])
summary = []
for structure_name in STRUCTURES:
for mode in MODES:
for operation in OPERATIONS:
values = grouped[(structure_name, mode, operation)]
summary.append(
{
"structure": structure_name,
"mode": mode,
"operation": operation,
"average_time_sec": sum(values) / len(values),
"measurements_sec": ";".join(f"{value:.9f}" for value in values),
}
)
return summary
def write_csv(path, rows, fieldnames):
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("w", encoding="utf-8", newline="") as file:
writer = csv.DictWriter(file, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(rows)
def write_chart(path, summary):
try:
import matplotlib.pyplot as plt
except ModuleNotFoundError:
write_svg_chart(path, summary)
return
labels = [
f"{row['structure']}\n{row['mode']}\n{row['operation']}"
for row in summary
]
values = [row["average_time_sec"] for row in summary]
colors_by_operation = {
"insert": "#4C78A8",
"find": "#F58518",
"delete": "#54A24B",
}
colors = [colors_by_operation[row["operation"]] for row in summary]
path.parent.mkdir(parents=True, exist_ok=True)
plt.figure(figsize=(14, 7))
plt.bar(range(len(values)), values, color=colors)
plt.yscale("log")
plt.ylabel("Среднее время, секунд (логарифмическая шкала)")
plt.title("Сравнение операций телефонного справочника")
plt.xticks(range(len(labels)), labels, rotation=45, ha="right", fontsize=8)
plt.tight_layout()
plt.savefig(path, dpi=160)
plt.close()
def write_svg_chart(path, summary):
width = 1500
height = 760
margin_left = 90
margin_right = 40
margin_top = 70
margin_bottom = 210
plot_width = width - margin_left - margin_right
plot_height = height - margin_top - margin_bottom
baseline = margin_top + plot_height
values = [max(row["average_time_sec"], 1e-12) for row in summary]
log_min = math.floor(math.log10(min(values)))
log_max = math.ceil(math.log10(max(values)))
if log_min == log_max:
log_min -= 1
log_max += 1
def y_for(value):
log_value = math.log10(max(value, 1e-12))
return margin_top + (log_max - log_value) / (log_max - log_min) * plot_height
colors_by_operation = {
"insert": "#4C78A8",
"find": "#F58518",
"delete": "#54A24B",
}
slot_width = plot_width / len(summary)
bar_width = slot_width * 0.62
lines = [
'<?xml version="1.0" encoding="UTF-8"?>',
f'<svg xmlns="http://www.w3.org/2000/svg" width="{width}" height="{height}" viewBox="0 0 {width} {height}">',
'<rect width="100%" height="100%" fill="#ffffff"/>',
'<style>text{font-family:Arial,Helvetica,sans-serif;fill:#222}.axis{stroke:#222;stroke-width:1}.grid{stroke:#ddd;stroke-width:1}.label{font-size:13px}.tick{font-size:12px}.title{font-size:24px;font-weight:700}.legend{font-size:14px}</style>',
f'<text class="title" x="{width / 2}" y="35" text-anchor="middle">Сравнение операций телефонного справочника</text>',
f'<line class="axis" x1="{margin_left}" y1="{baseline}" x2="{width - margin_right}" y2="{baseline}"/>',
f'<line class="axis" x1="{margin_left}" y1="{margin_top}" x2="{margin_left}" y2="{baseline}"/>',
]
for exponent in range(log_min, log_max + 1):
value = 10 ** exponent
y = y_for(value)
lines.append(
f'<line class="grid" x1="{margin_left}" y1="{y:.2f}" x2="{width - margin_right}" y2="{y:.2f}"/>'
)
lines.append(
f'<text class="tick" x="{margin_left - 10}" y="{y + 4:.2f}" text-anchor="end">1e{exponent}</text>'
)
for index, row in enumerate(summary):
x = margin_left + index * slot_width + (slot_width - bar_width) / 2
y = y_for(row["average_time_sec"])
bar_height = baseline - y
color = colors_by_operation[row["operation"]]
label = f"{row['structure']} / {row['mode']} / {row['operation']}"
lines.append(
f'<rect x="{x:.2f}" y="{y:.2f}" width="{bar_width:.2f}" height="{bar_height:.2f}" fill="{color}"/>'
)
lines.append(
f'<text class="tick" x="{x + bar_width / 2:.2f}" y="{y - 5:.2f}" text-anchor="middle">{row["average_time_sec"]:.3g}</text>'
)
lines.append(
f'<text class="label" transform="translate({x + bar_width / 2:.2f} {baseline + 18:.2f}) rotate(55)" text-anchor="start">{html.escape(label)}</text>'
)
legend_x = margin_left
legend_y = height - 30
for offset, (operation, color) in enumerate(colors_by_operation.items()):
x = legend_x + offset * 130
lines.append(f'<rect x="{x}" y="{legend_y - 12}" width="18" height="18" fill="{color}"/>')
lines.append(f'<text class="legend" x="{x + 26}" y="{legend_y + 2}">{operation}</text>')
lines.append(
f'<text class="label" transform="translate(24 {margin_top + plot_height / 2}) rotate(-90)" text-anchor="middle">Среднее время, секунд (логарифмическая шкала)</text>'
)
lines.append("</svg>")
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text("\n".join(lines), encoding="utf-8")
def main():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--n", type=int, default=10000, help="number of generated records")
parser.add_argument("--repeats", type=int, default=5, help="number of repeated measurements")
parser.add_argument("--seed", type=int, default=42, help="random seed")
parser.add_argument("--bucket-count", type=int, default=20011, help="hash-table bucket count")
parser.add_argument("--output-dir", type=Path, default=Path("docs/data"))
args = parser.parse_args()
results = run_experiment(
count=args.n,
repeats=args.repeats,
seed=args.seed,
bucket_count=args.bucket_count,
)
summary = summarize(results)
write_csv(
args.output_dir / "results.csv",
results,
["structure", "mode", "operation", "repeat", "time_sec", "n", "bucket_count"],
)
write_csv(
args.output_dir / "summary.csv",
summary,
["structure", "mode", "operation", "average_time_sec", "measurements_sec"],
)
chart_path = args.output_dir / "performance.svg"
write_chart(chart_path, summary)
print(f"Saved detailed results to {args.output_dir / 'results.csv'}")
print(f"Saved summary to {args.output_dir / 'summary.csv'}")
print(f"Saved chart to {chart_path}")
if __name__ == "__main__":
main()