This commit is contained in:
shekurovaa 2026-05-20 22:05:27 +03:00
parent e71fe47f8c
commit d71206b9b0

View File

@ -0,0 +1,475 @@
import time
import random
import csv
import os
import sys
from dataclasses import dataclass, field
from typing import Optional, List, Tuple, Any
import matplotlib.pyplot as plt
sys.setrecursionlimit(20000)
BASE_PATH = r"C:\Users\andre\2026-rff_mp\smirnovad\lab1"
DOCS_PATH = os.path.join(BASE_PATH, "docs")
DATA_PATH = os.path.join(DOCS_PATH, "data")
for path in (DOCS_PATH, DATA_PATH):
os.makedirs(path, exist_ok=True)
N = 10_000
REPEATS = 5
FOUND_SAMPLE_SIZE = 100
NOT_FOUND_SAMPLE_SIZE = 10
DELETE_SAMPLE_SIZE = 50
@dataclass
class NodeLL:
"""Узел односвязного списка."""
key: str
value: Any
next: Optional["NodeLL"] = None
class LinkedList:
"""Односвязный список с вставкой в начало."""
def __init__(self) -> None:
self.head: Optional[NodeLL] = None
def insert(self, key: str, value: Any) -> None:
self.head = NodeLL(key, value, self.head)
def find(self, key: str) -> Any:
cur = self.head
while cur is not None:
if cur.key == key:
return cur.value
cur = cur.next
return None
def delete(self, key: str) -> None:
cur = self.head
prev: Optional[NodeLL] = None
while cur is not None:
if cur.key == key:
if prev is None:
self.head = cur.next
else:
prev.next = cur.next
return
prev = cur
cur = cur.next
def items(self) -> List[Tuple[str, Any]]:
res: List[Tuple[str, Any]] = []
cur = self.head
while cur is not None:
res.append((cur.key, cur.value))
cur = cur.next
return sorted(res)
@dataclass
class NodeBST:
"""Узел бинарного дерева поиска."""
key: str
value: Any
left: Optional["NodeBST"] = None
right: Optional["NodeBST"] = None
class BST:
"""Бинарное дерево поиска (без балансировки)."""
def __init__(self) -> None:
self.root: Optional[NodeBST] = None
def insert(self, key: str, value: Any) -> None:
self.root = self._insert(self.root, key, value)
def _insert(self, node: Optional[NodeBST], key: str, value: Any) -> NodeBST:
if node is None:
return NodeBST(key, value)
if key < node.key:
node.left = self._insert(node.left, key, value)
elif key > node.key:
node.right = self._insert(node.right, key, value)
else:
node.value = value
return node
def find(self, key: str) -> Any:
return self._find(self.root, key)
def _find(self, node: Optional[NodeBST], key: str) -> Any:
if node is None:
return None
if key == node.key:
return node.value
if key < node.key:
return self._find(node.left, key)
return self._find(node.right, key)
def delete(self, key: str) -> None:
self.root = self._delete(self.root, key)
def _delete(self, node: Optional[NodeBST], key: str) -> Optional[NodeBST]:
if node is None:
return None
if key < node.key:
node.left = self._delete(node.left, key)
elif key > node.key:
node.right = self._delete(node.right, key)
else:
if node.left is None:
return node.right
if node.right is None:
return node.left
succ = node.right
while succ.left is not None:
succ = succ.left
node.key, node.value = succ.key, succ.value
node.right = self._delete(node.right, succ.key)
return node
def items(self) -> List[Tuple[str, Any]]:
res: List[Tuple[str, Any]] = []
self._inorder(self.root, res)
return res
def _inorder(self, node: Optional[NodeBST], out: List[Tuple[str, Any]]) -> None:
if node is None:
return
self._inorder(node.left, out)
out.append((node.key, node.value))
self._inorder(node.right, out)
class HashTable:
"""Хеш-таблица с цепочками (односвязные списки)."""
def __init__(self, capacity: int = 1024) -> None:
self.capacity = capacity
self.buckets: List[Optional[LinkedList]] = [None] * capacity
def _index(self, key: str) -> int:
return hash(key) % self.capacity
def insert(self, key: str, value: Any) -> None:
idx = self._index(key)
bucket = self.buckets[idx]
if bucket is None:
bucket = LinkedList()
self.buckets[idx] = bucket
bucket.insert(key, value)
def find(self, key: str) -> Any:
idx = self._index(key)
bucket = self.buckets[idx]
if bucket is None:
return None
return bucket.find(key)
def delete(self, key: str) -> None:
idx = self._index(key)
bucket = self.buckets[idx]
if bucket is None:
return
bucket.delete(key)
def items(self) -> List[Tuple[str, Any]]:
res: List[Tuple[str, Any]] = []
for bucket in self.buckets:
if bucket is not None:
res.extend(bucket.items())
return sorted(res)
def generate_records(n: int) -> List[Tuple[str, str]]:
"""Генерирует список (имя, телефон)."""
raw = [(f"user_{i:05d}", f"8-900-{random.randint(100, 999)}") for i in range(n)]
return raw
def prepare_datasets(n: int) -> dict:
"""Подготавливает наборы данных: случайный и отсортированный."""
raw = generate_records(n)
shuffled = raw[:]
random.shuffle(shuffled)
sorted_data = sorted(raw, key=lambda x: x[0])
return {
"случайный": shuffled,
"сортированный": sorted_data,
}
@dataclass
class RunResult:
struct_name: str
mode: str
run_label: str
insert_time: float
find_time: float
delete_time: float
class BenchmarkRunner:
def __init__(self, repeats: int = REPEATS) -> None:
self.repeats = repeats
self.results: List[RunResult] = []
def run_experiment(self, struct_name: str, mode: str, data: List[Tuple[str, str]]) -> None:
print(f"Запуск: {struct_name} ({mode})")
insert_times: List[float] = []
find_times: List[float] = []
delete_times: List[float] = []
for rep in range(self.repeats):
if struct_name == "LinkedList":
container = LinkedList()
elif struct_name == "HashTable":
container = HashTable(capacity=1024)
elif struct_name == "BST":
container = BST()
else:
raise ValueError(f"Неизвестная структура: {struct_name}")
t0 = time.perf_counter()
for key, val in data:
container.insert(key, val)
insert_times.append(time.perf_counter() - t0)
found_keys = [d[0] for d in random.sample(data, FOUND_SAMPLE_SIZE)]
not_found_keys = [f"nonexistent_{j}" for j in range(NOT_FOUND_SAMPLE_SIZE)]
search_keys = found_keys + not_found_keys
t0 = time.perf_counter()
for k in search_keys:
container.find(k)
find_times.append(time.perf_counter() - t0)
delete_keys = [d[0] for d in random.sample(data, DELETE_SAMPLE_SIZE)]
t0 = time.perf_counter()
for k in delete_keys:
container.delete(k)
delete_times.append(time.perf_counter() - t0)
self.results.append(
RunResult(
struct_name=struct_name,
mode=mode,
run_label=f"run_{rep+1}",
insert_time=insert_times[-1],
find_time=find_times[-1],
delete_time=delete_times[-1],
)
)
avg_ins = sum(insert_times) / self.repeats
avg_find = sum(find_times) / self.repeats
avg_del = sum(delete_times) / self.repeats
self.results.append(
RunResult(
struct_name=struct_name,
mode=mode,
run_label="AVG",
insert_time=avg_ins,
find_time=avg_find,
delete_time=avg_del,
)
)
def save_csv(self, path: str) -> None:
with open(path, "w", newline="", encoding="utf-8") as f:
w = csv.writer(f)
w.writerow(["Структура", "Режим", "Итерация", "Вставка", "Поиск", "Удаление"])
for r in self.results:
w.writerow([
r.struct_name,
r.mode,
r.run_label,
r.insert_time,
r.find_time,
r.delete_time,
])
def summary(self) -> List[dict]:
"""Возвращает список словарей со средними по (структура, режим)."""
summary = []
groups: dict = {}
for r in self.results:
if r.run_label != "AVG":
continue
key = (r.struct_name, r.mode)
groups[key] = {
"name": r.struct_name,
"mode": r.mode,
"ins": r.insert_time,
"find": r.find_time,
"del": r.delete_time,
}
summary.extend(groups.values())
return summary
def build_plots(summary: List[dict], n: int, path_base: str) -> None:
structs = ["LinkedList", "HashTable", "BST"]
ops = ["insert", "find", "delete"]
op_keys = ["ins", "find", "del"]
colors_struct = {
"LinkedList": "#5dade2",
"HashTable": "#e67e22",
"BST": "#58d68d",
}
fig1, axs = plt.subplots(1, 3, figsize=(18, 6))
fig1.suptitle("Влияние порядка данных на время операций", fontsize=16, fontweight="bold")
labels_ops = ["insert", "find", "delete"]
width = 0.35
x = [0, 1, 2]
for i, s_name in enumerate(structs):
rand_row = next(
(r for r in summary if r["name"] == s_name and r["mode"] == "случайный"),
None,
)
sort_row = next(
(r for r in summary if r["name"] == s_name and r["mode"] == "сортированный"),
None,
)
if rand_row is None or sort_row is None:
continue
vals_rand = [rand_row["ins"], rand_row["find"], rand_row["del"]]
vals_sort = [sort_row["ins"], sort_row["find"], sort_row["del"]]
axs[i].bar(
[p - width / 2 for p in x],
vals_rand,
width,
label="случайный",
color=colors_struct[s_name],
)
axs[i].bar(
[p + width / 2 for p in x],
vals_sort,
width,
label="сортированный",
color="#e74c3c",
alpha=0.85,
)
axs[i].set_title(s_name, fontweight="bold")
axs[i].set_xticks(x)
axs[i].set_xticklabels(labels_ops)
axs[i].set_ylabel("Время (с)")
axs[i].legend()
axs[i].grid(axis="y", linestyle="--", alpha=0.3)
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.savefig(os.path.join(path_base, "order_impact.png"))
plt.close(fig1)
fig2, axs2 = plt.subplots(1, 3, figsize=(18, 6))
fig2.suptitle(f"Сравнение структур данных (N={n})", fontsize=16, fontweight="bold")
for i, op_key in enumerate(op_keys):
plot_labels = []
plot_values = []
plot_colors = []
for r in summary:
plot_labels.append(f"{r['name']}\\n({r['mode'][:4]})")
plot_values.append(r[op_key])
plot_colors.append(colors_struct[r["name"]])
bars = axs2[i].bar(plot_labels, plot_values, color=plot_colors)
axs2[i].set_title(f"Операция: {ops[i]}", fontweight="bold")
axs2[i].set_ylabel("Время (с)")
axs2[i].tick_params(axis="x", rotation=15)
for bar in bars:
h = bar.get_height()
axs2[i].text(
bar.get_x() + bar.get_width() / 2,
h,
f"{h:.4f}",
ha="center",
va="bottom",
fontsize=8,
)
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.savefig(os.path.join(path_base, "struct_comparison.png"))
plt.close(fig2)
def build_report(summary: List[dict], n: int, path: str) -> None:
lines = []
lines.append("# Технический отчет: Сравнительный анализ структур данных\n")
lines.append("## 1. Вводные данные\n")
lines.append(
f"Цель — оценить производительность LinkedList, HashTable и BST на массиве из {n} элементов. "
"Рассмотрены два сценария: случайный порядок ключей и заранее отсортированный по возрастанию.\n"
)
lines.append("## 2. Результаты измерений (среднее)\n")
lines.append("| Структура | Режим | Вставка (с) | Поиск (с) | Удаление (с) |\n")
lines.append("| :--- | :--- | :---: | :---: | :---: |\n")
for r in summary:
lines.append(
f"| {r['name']} | {r['mode']} | {r['ins']:.6f} | {r['find']:.6f} | {r['del']:.6f} |\n"
)
lines.append("\n## 3. Визуализация\n")
lines.append("### Сравнение структур по операциям\n")
lines.append("![Сравнение структур](data/struct_comparison.png)\n")
lines.append("### Влияние порядка данных\n")
lines.append("![Влияние порядка](data/order_impact.png)\n")
lines.append("## 4. Выводы\n")
lines.append(
"- **BST без балансировки** на отсортированных ключах вырождается в линейную цепочку, "
"что приводит к резкому росту времени операций (практическая сложность приближается к $O(N)$).\n"
)
lines.append(
"- **HashTable** показывает стабильную производительность, практически не зависящую от порядка входных данных. "
"Это делает её предпочтительной для задач с интенсивным поиском и вставкой.\n"
)
lines.append(
"- **LinkedList**ónico предсказуемо медленен при поиске и удалении, так как эти операции требуют линейного прохода по списку.\n"
)
lines.append(
"- **Итог:** для систем с высокой нагрузкой на поиск/вставку оптимальным выбором является хеш-таблица; "
"BST имеет смысл использовать только при дополнительной балансировке (AVL, красно-черное дерево и т.п.).\n"
)
with open(path, "w", encoding="utf-8") as f:
f.writelines(lines)
def main() -> None:
datasets = prepare_datasets(N)
runner = BenchmarkRunner(repeats=REPEATS)
for mode_name, data in datasets.items():
for struct_name in ["LinkedList", "HashTable", "BST"]:
runner.run_experiment(struct_name, mode_name, data)
csv_path = os.path.join(DATA_PATH, "results.csv")
runner.save_csv(csv_path)
summary = runner.summary()
build_plots(summary, N, DATA_PATH)
build_report(summary, N, os.path.join(DOCS_PATH, "report.md"))
print("Готово.")
if __name__ == "__main__":
main()