360 lines
12 KiB
Python
360 lines
12 KiB
Python
"""Run performance experiments for the procedural phone book structures."""
|
|
|
|
import argparse
|
|
import csv
|
|
import html
|
|
import math
|
|
import random
|
|
import time
|
|
from pathlib import Path
|
|
|
|
from phonebook import (
|
|
bst_delete,
|
|
bst_find,
|
|
bst_insert,
|
|
create_hash_table,
|
|
ht_delete,
|
|
ht_find,
|
|
ht_insert,
|
|
ll_delete,
|
|
ll_find,
|
|
ll_insert,
|
|
)
|
|
|
|
|
|
STRUCTURES = ("LinkedList", "HashTable", "BST")
|
|
MODES = ("shuffled", "sorted")
|
|
OPERATIONS = ("insert", "find", "delete")
|
|
|
|
|
|
def generate_records(count):
|
|
return [(f"User_{index:05d}", f"+7-900-{index:05d}") for index in range(count)]
|
|
|
|
|
|
def prepare_records(count, seed):
|
|
records_sorted = generate_records(count)
|
|
records_shuffled = records_sorted[:]
|
|
random.Random(seed).shuffle(records_shuffled)
|
|
return {
|
|
"sorted": records_sorted,
|
|
"shuffled": records_shuffled,
|
|
}
|
|
|
|
|
|
def _insert_all(structure_name, records, bucket_count):
|
|
if structure_name == "LinkedList":
|
|
head = None
|
|
for name, phone in records:
|
|
head = ll_insert(head, name, phone)
|
|
return head
|
|
|
|
if structure_name == "HashTable":
|
|
buckets = create_hash_table(bucket_count)
|
|
for name, phone in records:
|
|
ht_insert(buckets, name, phone)
|
|
return buckets
|
|
|
|
if structure_name == "BST":
|
|
root = None
|
|
for name, phone in records:
|
|
root = bst_insert(root, name, phone)
|
|
return root
|
|
|
|
raise ValueError(f"Unknown structure: {structure_name}")
|
|
|
|
|
|
def _find_all(structure_name, structure, names):
|
|
if structure_name == "LinkedList":
|
|
for name in names:
|
|
ll_find(structure, name)
|
|
return structure
|
|
|
|
if structure_name == "HashTable":
|
|
for name in names:
|
|
ht_find(structure, name)
|
|
return structure
|
|
|
|
if structure_name == "BST":
|
|
for name in names:
|
|
bst_find(structure, name)
|
|
return structure
|
|
|
|
raise ValueError(f"Unknown structure: {structure_name}")
|
|
|
|
|
|
def _delete_all(structure_name, structure, names):
|
|
if structure_name == "LinkedList":
|
|
head = structure
|
|
for name in names:
|
|
head = ll_delete(head, name)
|
|
return head
|
|
|
|
if structure_name == "HashTable":
|
|
for name in names:
|
|
ht_delete(structure, name)
|
|
return structure
|
|
|
|
if structure_name == "BST":
|
|
root = structure
|
|
for name in names:
|
|
root = bst_delete(root, name)
|
|
return root
|
|
|
|
raise ValueError(f"Unknown structure: {structure_name}")
|
|
|
|
|
|
def _elapsed(action):
|
|
start = time.perf_counter()
|
|
result = action()
|
|
end = time.perf_counter()
|
|
return result, end - start
|
|
|
|
|
|
def run_experiment(count=10000, repeats=5, seed=42, bucket_count=20011):
|
|
record_sets = prepare_records(count, seed)
|
|
all_names = [name for name, _phone in record_sets["sorted"]]
|
|
results = []
|
|
|
|
for structure_name in STRUCTURES:
|
|
for mode in MODES:
|
|
records = record_sets[mode]
|
|
names_for_sampling = [name for name, _phone in records]
|
|
|
|
for repeat in range(1, repeats + 1):
|
|
rng = random.Random(seed + repeat * 1000 + len(structure_name) + len(mode))
|
|
find_existing = rng.sample(names_for_sampling, min(100, count))
|
|
find_missing = [f"None_{repeat}_{index}" for index in range(10)]
|
|
find_names = find_existing + find_missing
|
|
delete_names = rng.sample(all_names, min(50, count))
|
|
|
|
structure, insert_time = _elapsed(
|
|
lambda: _insert_all(structure_name, records, bucket_count)
|
|
)
|
|
results.append(
|
|
{
|
|
"structure": structure_name,
|
|
"mode": mode,
|
|
"operation": "insert",
|
|
"repeat": repeat,
|
|
"time_sec": insert_time,
|
|
"n": count,
|
|
"bucket_count": bucket_count if structure_name == "HashTable" else "",
|
|
}
|
|
)
|
|
|
|
structure, find_time = _elapsed(
|
|
lambda: _find_all(structure_name, structure, find_names)
|
|
)
|
|
results.append(
|
|
{
|
|
"structure": structure_name,
|
|
"mode": mode,
|
|
"operation": "find",
|
|
"repeat": repeat,
|
|
"time_sec": find_time,
|
|
"n": count,
|
|
"bucket_count": bucket_count if structure_name == "HashTable" else "",
|
|
}
|
|
)
|
|
|
|
structure, delete_time = _elapsed(
|
|
lambda: _delete_all(structure_name, structure, delete_names)
|
|
)
|
|
results.append(
|
|
{
|
|
"structure": structure_name,
|
|
"mode": mode,
|
|
"operation": "delete",
|
|
"repeat": repeat,
|
|
"time_sec": delete_time,
|
|
"n": count,
|
|
"bucket_count": bucket_count if structure_name == "HashTable" else "",
|
|
}
|
|
)
|
|
|
|
return results
|
|
|
|
|
|
def summarize(results):
|
|
grouped = {}
|
|
for row in results:
|
|
key = (row["structure"], row["mode"], row["operation"])
|
|
grouped.setdefault(key, []).append(row["time_sec"])
|
|
|
|
summary = []
|
|
for structure_name in STRUCTURES:
|
|
for mode in MODES:
|
|
for operation in OPERATIONS:
|
|
values = grouped[(structure_name, mode, operation)]
|
|
summary.append(
|
|
{
|
|
"structure": structure_name,
|
|
"mode": mode,
|
|
"operation": operation,
|
|
"average_time_sec": sum(values) / len(values),
|
|
"measurements_sec": ";".join(f"{value:.9f}" for value in values),
|
|
}
|
|
)
|
|
return summary
|
|
|
|
|
|
def write_csv(path, rows, fieldnames):
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
with path.open("w", encoding="utf-8", newline="") as file:
|
|
writer = csv.DictWriter(file, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
writer.writerows(rows)
|
|
|
|
|
|
def write_chart(path, summary):
|
|
try:
|
|
import matplotlib.pyplot as plt
|
|
except ModuleNotFoundError:
|
|
write_svg_chart(path, summary)
|
|
return
|
|
|
|
labels = [
|
|
f"{row['structure']}\n{row['mode']}\n{row['operation']}"
|
|
for row in summary
|
|
]
|
|
values = [row["average_time_sec"] for row in summary]
|
|
colors_by_operation = {
|
|
"insert": "#4C78A8",
|
|
"find": "#F58518",
|
|
"delete": "#54A24B",
|
|
}
|
|
colors = [colors_by_operation[row["operation"]] for row in summary]
|
|
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
plt.figure(figsize=(14, 7))
|
|
plt.bar(range(len(values)), values, color=colors)
|
|
plt.yscale("log")
|
|
plt.ylabel("Среднее время, секунд (логарифмическая шкала)")
|
|
plt.title("Сравнение операций телефонного справочника")
|
|
plt.xticks(range(len(labels)), labels, rotation=45, ha="right", fontsize=8)
|
|
plt.tight_layout()
|
|
plt.savefig(path, dpi=160)
|
|
plt.close()
|
|
|
|
|
|
def write_svg_chart(path, summary):
|
|
width = 1500
|
|
height = 760
|
|
margin_left = 90
|
|
margin_right = 40
|
|
margin_top = 70
|
|
margin_bottom = 210
|
|
plot_width = width - margin_left - margin_right
|
|
plot_height = height - margin_top - margin_bottom
|
|
baseline = margin_top + plot_height
|
|
|
|
values = [max(row["average_time_sec"], 1e-12) for row in summary]
|
|
log_min = math.floor(math.log10(min(values)))
|
|
log_max = math.ceil(math.log10(max(values)))
|
|
if log_min == log_max:
|
|
log_min -= 1
|
|
log_max += 1
|
|
|
|
def y_for(value):
|
|
log_value = math.log10(max(value, 1e-12))
|
|
return margin_top + (log_max - log_value) / (log_max - log_min) * plot_height
|
|
|
|
colors_by_operation = {
|
|
"insert": "#4C78A8",
|
|
"find": "#F58518",
|
|
"delete": "#54A24B",
|
|
}
|
|
slot_width = plot_width / len(summary)
|
|
bar_width = slot_width * 0.62
|
|
|
|
lines = [
|
|
'<?xml version="1.0" encoding="UTF-8"?>',
|
|
f'<svg xmlns="http://www.w3.org/2000/svg" width="{width}" height="{height}" viewBox="0 0 {width} {height}">',
|
|
'<rect width="100%" height="100%" fill="#ffffff"/>',
|
|
'<style>text{font-family:Arial,Helvetica,sans-serif;fill:#222}.axis{stroke:#222;stroke-width:1}.grid{stroke:#ddd;stroke-width:1}.label{font-size:13px}.tick{font-size:12px}.title{font-size:24px;font-weight:700}.legend{font-size:14px}</style>',
|
|
f'<text class="title" x="{width / 2}" y="35" text-anchor="middle">Сравнение операций телефонного справочника</text>',
|
|
f'<line class="axis" x1="{margin_left}" y1="{baseline}" x2="{width - margin_right}" y2="{baseline}"/>',
|
|
f'<line class="axis" x1="{margin_left}" y1="{margin_top}" x2="{margin_left}" y2="{baseline}"/>',
|
|
]
|
|
|
|
for exponent in range(log_min, log_max + 1):
|
|
value = 10 ** exponent
|
|
y = y_for(value)
|
|
lines.append(
|
|
f'<line class="grid" x1="{margin_left}" y1="{y:.2f}" x2="{width - margin_right}" y2="{y:.2f}"/>'
|
|
)
|
|
lines.append(
|
|
f'<text class="tick" x="{margin_left - 10}" y="{y + 4:.2f}" text-anchor="end">1e{exponent}</text>'
|
|
)
|
|
|
|
for index, row in enumerate(summary):
|
|
x = margin_left + index * slot_width + (slot_width - bar_width) / 2
|
|
y = y_for(row["average_time_sec"])
|
|
bar_height = baseline - y
|
|
color = colors_by_operation[row["operation"]]
|
|
label = f"{row['structure']} / {row['mode']} / {row['operation']}"
|
|
|
|
lines.append(
|
|
f'<rect x="{x:.2f}" y="{y:.2f}" width="{bar_width:.2f}" height="{bar_height:.2f}" fill="{color}"/>'
|
|
)
|
|
lines.append(
|
|
f'<text class="tick" x="{x + bar_width / 2:.2f}" y="{y - 5:.2f}" text-anchor="middle">{row["average_time_sec"]:.3g}</text>'
|
|
)
|
|
lines.append(
|
|
f'<text class="label" transform="translate({x + bar_width / 2:.2f} {baseline + 18:.2f}) rotate(55)" text-anchor="start">{html.escape(label)}</text>'
|
|
)
|
|
|
|
legend_x = margin_left
|
|
legend_y = height - 30
|
|
for offset, (operation, color) in enumerate(colors_by_operation.items()):
|
|
x = legend_x + offset * 130
|
|
lines.append(f'<rect x="{x}" y="{legend_y - 12}" width="18" height="18" fill="{color}"/>')
|
|
lines.append(f'<text class="legend" x="{x + 26}" y="{legend_y + 2}">{operation}</text>')
|
|
|
|
lines.append(
|
|
f'<text class="label" transform="translate(24 {margin_top + plot_height / 2}) rotate(-90)" text-anchor="middle">Среднее время, секунд (логарифмическая шкала)</text>'
|
|
)
|
|
lines.append("</svg>")
|
|
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
path.write_text("\n".join(lines), encoding="utf-8")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description=__doc__)
|
|
parser.add_argument("--n", type=int, default=10000, help="number of generated records")
|
|
parser.add_argument("--repeats", type=int, default=5, help="number of repeated measurements")
|
|
parser.add_argument("--seed", type=int, default=42, help="random seed")
|
|
parser.add_argument("--bucket-count", type=int, default=20011, help="hash-table bucket count")
|
|
parser.add_argument("--output-dir", type=Path, default=Path("docs/data"))
|
|
args = parser.parse_args()
|
|
|
|
results = run_experiment(
|
|
count=args.n,
|
|
repeats=args.repeats,
|
|
seed=args.seed,
|
|
bucket_count=args.bucket_count,
|
|
)
|
|
summary = summarize(results)
|
|
|
|
write_csv(
|
|
args.output_dir / "results.csv",
|
|
results,
|
|
["structure", "mode", "operation", "repeat", "time_sec", "n", "bucket_count"],
|
|
)
|
|
write_csv(
|
|
args.output_dir / "summary.csv",
|
|
summary,
|
|
["structure", "mode", "operation", "average_time_sec", "measurements_sec"],
|
|
)
|
|
chart_path = args.output_dir / "performance.svg"
|
|
write_chart(chart_path, summary)
|
|
|
|
print(f"Saved detailed results to {args.output_dir / 'results.csv'}")
|
|
print(f"Saved summary to {args.output_dir / 'summary.csv'}")
|
|
print(f"Saved chart to {chart_path}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|