2026-rff_mp/rybakovaa/lab1/docs/data/lab1.py

328 lines
9.6 KiB
Python
Raw Normal View History

2026-05-17 19:06:27 +00:00
import time
import random
import csv
import os
import sys
sys.setrecursionlimit(20000)
BASE = os.path.dirname(os.path.abspath(__file__))
DATA_PATH = BASE
N = 10000
REPEAT = 5
def ll_insert(head, name, phone):
new_node = {"name": name, "phone": phone, "next": head}
return new_node
def ll_find(head, name):
curr = head
while curr:
if curr["name"] == name:
return curr["phone"]
curr = curr["next"]
return None
def ll_delete(head, name):
if head is None:
return None
if head["name"] == name:
return head["next"]
curr = head
while curr["next"]:
if curr["next"]["name"] == name:
curr["next"] = curr["next"]["next"]
return head
curr = curr["next"]
return head
def ll_list_all(head):
result = []
curr = head
while curr:
result.append((curr["name"], curr["phone"]))
curr = curr["next"]
result.sort()
return result
BUCKET_SIZE = 1000
def ht_insert(buckets, name, phone):
idx = hash(name) % len(buckets)
buckets[idx] = ll_insert(buckets[idx], name, phone)
def ht_find(buckets, name):
idx = hash(name) % len(buckets)
return ll_find(buckets[idx], name)
def ht_delete(buckets, name):
idx = hash(name) % len(buckets)
buckets[idx] = ll_delete(buckets[idx], name)
def ht_list_all(buckets):
result = []
for bucket in buckets:
curr = bucket
while curr:
result.append((curr["name"], curr["phone"]))
curr = curr["next"]
result.sort()
return result
def bst_insert(root, name, phone):
if root is None:
return {"name": name, "phone": phone, "left": None, "right": None}
if name < root["name"]:
root["left"] = bst_insert(root["left"], name, phone)
elif name > root["name"]:
root["right"] = bst_insert(root["right"], name, phone)
else:
root["phone"] = phone
return root
def bst_find(root, name):
if root is None:
return None
if name == root["name"]:
return root["phone"]
if name < root["name"]:
return bst_find(root["left"], name)
return bst_find(root["right"], name)
def bst_delete(root, name):
if root is None:
return None
if name < root["name"]:
root["left"] = bst_delete(root["left"], name)
elif name > root["name"]:
root["right"] = bst_delete(root["right"], name)
else:
if root["left"] is None:
return root["right"]
if root["right"] is None:
return root["left"]
temp = root["right"]
while temp["left"]:
temp = temp["left"]
root["name"] = temp["name"]
root["phone"] = temp["phone"]
root["right"] = bst_delete(root["right"], temp["name"])
return root
def bst_list_all(root):
result = []
def walk(node):
if node is None:
return
walk(node["left"])
result.append((node["name"], node["phone"]))
walk(node["right"])
walk(root)
return result
def make_records(n):
records = []
for i in range(n):
records.append((f"User_{i:05d}", f"8-900-{i % 10000:04d}"))
return records
records_all = make_records(N)
records_shuffled = records_all[:]
random.shuffle(records_shuffled)
records_sorted = sorted(records_all)
all_names = [name for name, phone in records_all]
find_existing = random.sample(all_names, 100)
find_missing = [f"None_{i}" for i in range(10)]
find_names = find_existing + find_missing
random.shuffle(find_names)
delete_names = random.sample(all_names, 50)
all_results = []
summary = []
def build_structure(struct_type, records):
if struct_type == "LinkedList":
head = None
for name, phone in records:
head = ll_insert(head, name, phone)
return head
if struct_type == "HashTable":
buckets = [None] * BUCKET_SIZE
for name, phone in records:
ht_insert(buckets, name, phone)
return buckets
root = None
for name, phone in records:
root = bst_insert(root, name, phone)
return root
def do_find(struct_type, container, names):
for name in names:
if struct_type == "LinkedList":
ll_find(container, name)
elif struct_type == "HashTable":
ht_find(container, name)
else:
bst_find(container, name)
def do_delete(struct_type, container, names):
if struct_type == "LinkedList":
for name in names:
container = ll_delete(container, name)
return container
if struct_type == "HashTable":
for name in names:
ht_delete(container, name)
return container
for name in names:
container = bst_delete(container, name)
return container
def run_one_test(struct_type, mode_name, records):
ins_times = []
find_times = []
del_times = []
for run in range(REPEAT):
start = time.perf_counter()
container = build_structure(struct_type, records)
ins_times.append(time.perf_counter() - start)
start = time.perf_counter()
do_find(struct_type, container, find_names)
find_times.append(time.perf_counter() - start)
start = time.perf_counter()
do_delete(struct_type, container, delete_names)
del_times.append(time.perf_counter() - start)
all_results.append([
struct_type, mode_name, f"Run {run + 1}",
ins_times[-1], find_times[-1], del_times[-1],
])
avg_ins = sum(ins_times) / REPEAT
avg_find = sum(find_times) / REPEAT
avg_del = sum(del_times) / REPEAT
all_results.append([
struct_type, mode_name, "AVERAGE", avg_ins, avg_find, avg_del,
])
summary.append({
"name": struct_type,
"mode": mode_name,
"ins": avg_ins,
"find": avg_find,
"del": avg_del,
})
print("Запуск экспериментов...")
for mode_name, data in [("случайный", records_shuffled), ("сортированный", records_sorted)]:
for struct_type in ["LinkedList", "HashTable", "BST"]:
print(f" {struct_type} ({mode_name})")
run_one_test(struct_type, mode_name, data)
csv_path = os.path.join(DATA_PATH, "results.csv")
with open(csv_path, "w", newline="", encoding="utf-8-sig") as f:
writer = csv.writer(f, delimiter=";")
writer.writerow(["Структура", "Режим", "Итерация", "Вставка", "Поиск", "Удаление"])
writer.writerows(all_results)
print("CSV сохранён:", csv_path)
try:
import matplotlib.pyplot as plt
plt.rcParams["font.sans-serif"] = ["Segoe UI", "Arial", "Tahoma", "DejaVu Sans"]
plt.rcParams["axes.unicode_minus"] = False
labels = ["insert", "find", "delete"]
structs = ["LinkedList", "HashTable", "BST"]
colors = ["#5dade2", "#e67e22", "#58d68d"]
fig1, axs = plt.subplots(1, 3, figsize=(15, 5))
fig1.suptitle("Влияние порядка данных")
for i, s_name in enumerate(structs):
rand_d = next(r for r in summary if r["name"] == s_name and r["mode"] == "случайный")
sort_d = next(r for r in summary if r["name"] == s_name and r["mode"] == "сортированный")
x = [0, 1, 2]
w = 0.35
axs[i].bar([p - w / 2 for p in x], [rand_d["ins"], rand_d["find"], rand_d["del"]], w, label="случайный")
axs[i].bar([p + w / 2 for p in x], [sort_d["ins"], sort_d["find"], sort_d["del"]], w, label="сортированный")
axs[i].set_title(s_name)
axs[i].set_xticks(x)
axs[i].set_xticklabels(labels)
axs[i].legend()
axs[i].grid(axis="y", alpha=0.3)
plt.tight_layout()
plt.savefig(os.path.join(DATA_PATH, "order_impact.png"))
plt.close()
fig2, axs2 = plt.subplots(1, 3, figsize=(15, 5))
fig2.suptitle(f"Сравнение структур (N={N})")
for i, key in enumerate(["ins", "find", "del"]):
vals = []
names = []
for r in summary:
names.append(f"{r['name']}\n({r['mode'][:4]})")
vals.append(r[key])
axs2[i].bar(names, vals, color=colors * 2)
axs2[i].set_title(labels[i])
axs2[i].tick_params(axis="x", rotation=20)
plt.tight_layout()
plt.savefig(os.path.join(DATA_PATH, "struct_comparison.png"))
plt.close()
print("Графики сохранены")
except ImportError:
print("matplotlib не установлен")
report_path = os.path.join(os.path.dirname(BASE), "report.md")
with open(report_path, "w", encoding="utf-8-sig") as f:
f.write("# Отчёт: сравнение структур данных\n\n")
f.write(f"N = {N}, повторов = {REPEAT}\n\n")
f.write("| Структура | Режим | Вставка (с) | Поиск (с) | Удаление (с) |\n")
f.write("| --- | --- | --- | --- | --- |\n")
for r in summary:
f.write(
f"| {r['name']} | {r['mode']} | {r['ins']:.6f} | {r['find']:.6f} | {r['del']:.6f} |\n"
)
f.write("\n## Графики\n\n")
f.write("![Сравнение](data/struct_comparison.png)\n\n")
f.write("![Порядок данных](data/order_impact.png)\n\n")
f.write("## Выводы\n\n")
f.write("- BST на отсортированных данных сильно тормозит (вырождение дерева).\n")
f.write("- Хеш-таблица быстра на поиске и слабо зависит от порядка вставки.\n")
f.write("- Связный список медленный при поиске.\n")
f.write("- Для частого поиска предпочтительна хеш-таблица.\n")
print("Отчёт:", report_path)
print("Готово.")