diff --git a/shekurovaa/1/docs/data/zad1.py b/shekurovaa/1/docs/data/zad1.py new file mode 100644 index 0000000..ba01cb7 --- /dev/null +++ b/shekurovaa/1/docs/data/zad1.py @@ -0,0 +1,475 @@ +import time +import random +import csv +import os +import sys +from dataclasses import dataclass, field +from typing import Optional, List, Tuple, Any + +import matplotlib.pyplot as plt + +sys.setrecursionlimit(20000) + +BASE_PATH = r"C:\Users\andre\2026-rff_mp\smirnovad\lab1" +DOCS_PATH = os.path.join(BASE_PATH, "docs") +DATA_PATH = os.path.join(DOCS_PATH, "data") + +for path in (DOCS_PATH, DATA_PATH): + os.makedirs(path, exist_ok=True) + +N = 10_000 +REPEATS = 5 +FOUND_SAMPLE_SIZE = 100 +NOT_FOUND_SAMPLE_SIZE = 10 +DELETE_SAMPLE_SIZE = 50 + +@dataclass +class NodeLL: + """Узел односвязного списка.""" + key: str + value: Any + next: Optional["NodeLL"] = None + + +class LinkedList: + """Односвязный список с вставкой в начало.""" + + def __init__(self) -> None: + self.head: Optional[NodeLL] = None + + def insert(self, key: str, value: Any) -> None: + self.head = NodeLL(key, value, self.head) + + def find(self, key: str) -> Any: + cur = self.head + while cur is not None: + if cur.key == key: + return cur.value + cur = cur.next + return None + + def delete(self, key: str) -> None: + cur = self.head + prev: Optional[NodeLL] = None + + while cur is not None: + if cur.key == key: + if prev is None: + self.head = cur.next + else: + prev.next = cur.next + return + prev = cur + cur = cur.next + + def items(self) -> List[Tuple[str, Any]]: + res: List[Tuple[str, Any]] = [] + cur = self.head + while cur is not None: + res.append((cur.key, cur.value)) + cur = cur.next + return sorted(res) + + +@dataclass +class NodeBST: + """Узел бинарного дерева поиска.""" + key: str + value: Any + left: Optional["NodeBST"] = None + right: Optional["NodeBST"] = None + + +class BST: + """Бинарное дерево поиска (без балансировки).""" + + def __init__(self) -> None: + self.root: Optional[NodeBST] = None + + def insert(self, key: str, value: Any) -> None: + self.root = self._insert(self.root, key, value) + + def _insert(self, node: Optional[NodeBST], key: str, value: Any) -> NodeBST: + if node is None: + return NodeBST(key, value) + if key < node.key: + node.left = self._insert(node.left, key, value) + elif key > node.key: + node.right = self._insert(node.right, key, value) + else: + node.value = value + return node + + def find(self, key: str) -> Any: + return self._find(self.root, key) + + def _find(self, node: Optional[NodeBST], key: str) -> Any: + if node is None: + return None + if key == node.key: + return node.value + if key < node.key: + return self._find(node.left, key) + return self._find(node.right, key) + + def delete(self, key: str) -> None: + self.root = self._delete(self.root, key) + + def _delete(self, node: Optional[NodeBST], key: str) -> Optional[NodeBST]: + if node is None: + return None + if key < node.key: + node.left = self._delete(node.left, key) + elif key > node.key: + node.right = self._delete(node.right, key) + else: + if node.left is None: + return node.right + if node.right is None: + return node.left + succ = node.right + while succ.left is not None: + succ = succ.left + node.key, node.value = succ.key, succ.value + node.right = self._delete(node.right, succ.key) + return node + + def items(self) -> List[Tuple[str, Any]]: + res: List[Tuple[str, Any]] = [] + self._inorder(self.root, res) + return res + + def _inorder(self, node: Optional[NodeBST], out: List[Tuple[str, Any]]) -> None: + if node is None: + return + self._inorder(node.left, out) + out.append((node.key, node.value)) + self._inorder(node.right, out) + + +class HashTable: + """Хеш-таблица с цепочками (односвязные списки).""" + + def __init__(self, capacity: int = 1024) -> None: + self.capacity = capacity + self.buckets: List[Optional[LinkedList]] = [None] * capacity + + def _index(self, key: str) -> int: + return hash(key) % self.capacity + + def insert(self, key: str, value: Any) -> None: + idx = self._index(key) + bucket = self.buckets[idx] + if bucket is None: + bucket = LinkedList() + self.buckets[idx] = bucket + bucket.insert(key, value) + + def find(self, key: str) -> Any: + idx = self._index(key) + bucket = self.buckets[idx] + if bucket is None: + return None + return bucket.find(key) + + def delete(self, key: str) -> None: + idx = self._index(key) + bucket = self.buckets[idx] + if bucket is None: + return + bucket.delete(key) + + def items(self) -> List[Tuple[str, Any]]: + res: List[Tuple[str, Any]] = [] + for bucket in self.buckets: + if bucket is not None: + res.extend(bucket.items()) + return sorted(res) + +def generate_records(n: int) -> List[Tuple[str, str]]: + """Генерирует список (имя, телефон).""" + raw = [(f"user_{i:05d}", f"8-900-{random.randint(100, 999)}") for i in range(n)] + return raw + + +def prepare_datasets(n: int) -> dict: + """Подготавливает наборы данных: случайный и отсортированный.""" + raw = generate_records(n) + shuffled = raw[:] + random.shuffle(shuffled) + sorted_data = sorted(raw, key=lambda x: x[0]) + return { + "случайный": shuffled, + "сортированный": sorted_data, + } + +@dataclass +class RunResult: + struct_name: str + mode: str + run_label: str + insert_time: float + find_time: float + delete_time: float + + +class BenchmarkRunner: + def __init__(self, repeats: int = REPEATS) -> None: + self.repeats = repeats + self.results: List[RunResult] = [] + + def run_experiment(self, struct_name: str, mode: str, data: List[Tuple[str, str]]) -> None: + print(f"Запуск: {struct_name} ({mode})") + + insert_times: List[float] = [] + find_times: List[float] = [] + delete_times: List[float] = [] + + for rep in range(self.repeats): + if struct_name == "LinkedList": + container = LinkedList() + elif struct_name == "HashTable": + container = HashTable(capacity=1024) + elif struct_name == "BST": + container = BST() + else: + raise ValueError(f"Неизвестная структура: {struct_name}") + + t0 = time.perf_counter() + for key, val in data: + container.insert(key, val) + insert_times.append(time.perf_counter() - t0) + + found_keys = [d[0] for d in random.sample(data, FOUND_SAMPLE_SIZE)] + not_found_keys = [f"nonexistent_{j}" for j in range(NOT_FOUND_SAMPLE_SIZE)] + search_keys = found_keys + not_found_keys + + t0 = time.perf_counter() + for k in search_keys: + container.find(k) + find_times.append(time.perf_counter() - t0) + + delete_keys = [d[0] for d in random.sample(data, DELETE_SAMPLE_SIZE)] + t0 = time.perf_counter() + for k in delete_keys: + container.delete(k) + delete_times.append(time.perf_counter() - t0) + + self.results.append( + RunResult( + struct_name=struct_name, + mode=mode, + run_label=f"run_{rep+1}", + insert_time=insert_times[-1], + find_time=find_times[-1], + delete_time=delete_times[-1], + ) + ) + + avg_ins = sum(insert_times) / self.repeats + avg_find = sum(find_times) / self.repeats + avg_del = sum(delete_times) / self.repeats + + self.results.append( + RunResult( + struct_name=struct_name, + mode=mode, + run_label="AVG", + insert_time=avg_ins, + find_time=avg_find, + delete_time=avg_del, + ) + ) + + def save_csv(self, path: str) -> None: + with open(path, "w", newline="", encoding="utf-8") as f: + w = csv.writer(f) + w.writerow(["Структура", "Режим", "Итерация", "Вставка", "Поиск", "Удаление"]) + for r in self.results: + w.writerow([ + r.struct_name, + r.mode, + r.run_label, + r.insert_time, + r.find_time, + r.delete_time, + ]) + + def summary(self) -> List[dict]: + """Возвращает список словарей со средними по (структура, режим).""" + summary = [] + groups: dict = {} + for r in self.results: + if r.run_label != "AVG": + continue + key = (r.struct_name, r.mode) + groups[key] = { + "name": r.struct_name, + "mode": r.mode, + "ins": r.insert_time, + "find": r.find_time, + "del": r.delete_time, + } + summary.extend(groups.values()) + return summary + + +def build_plots(summary: List[dict], n: int, path_base: str) -> None: + structs = ["LinkedList", "HashTable", "BST"] + ops = ["insert", "find", "delete"] + op_keys = ["ins", "find", "del"] + colors_struct = { + "LinkedList": "#5dade2", + "HashTable": "#e67e22", + "BST": "#58d68d", + } + + fig1, axs = plt.subplots(1, 3, figsize=(18, 6)) + fig1.suptitle("Влияние порядка данных на время операций", fontsize=16, fontweight="bold") + + labels_ops = ["insert", "find", "delete"] + width = 0.35 + x = [0, 1, 2] + + for i, s_name in enumerate(structs): + rand_row = next( + (r for r in summary if r["name"] == s_name and r["mode"] == "случайный"), + None, + ) + sort_row = next( + (r for r in summary if r["name"] == s_name and r["mode"] == "сортированный"), + None, + ) + if rand_row is None or sort_row is None: + continue + + vals_rand = [rand_row["ins"], rand_row["find"], rand_row["del"]] + vals_sort = [sort_row["ins"], sort_row["find"], sort_row["del"]] + + axs[i].bar( + [p - width / 2 for p in x], + vals_rand, + width, + label="случайный", + color=colors_struct[s_name], + ) + axs[i].bar( + [p + width / 2 for p in x], + vals_sort, + width, + label="сортированный", + color="#e74c3c", + alpha=0.85, + ) + + axs[i].set_title(s_name, fontweight="bold") + axs[i].set_xticks(x) + axs[i].set_xticklabels(labels_ops) + axs[i].set_ylabel("Время (с)") + axs[i].legend() + axs[i].grid(axis="y", linestyle="--", alpha=0.3) + + plt.tight_layout(rect=[0, 0.03, 1, 0.95]) + plt.savefig(os.path.join(path_base, "order_impact.png")) + plt.close(fig1) + + fig2, axs2 = plt.subplots(1, 3, figsize=(18, 6)) + fig2.suptitle(f"Сравнение структур данных (N={n})", fontsize=16, fontweight="bold") + + for i, op_key in enumerate(op_keys): + plot_labels = [] + plot_values = [] + plot_colors = [] + + for r in summary: + plot_labels.append(f"{r['name']}\\n({r['mode'][:4]})") + plot_values.append(r[op_key]) + plot_colors.append(colors_struct[r["name"]]) + + bars = axs2[i].bar(plot_labels, plot_values, color=plot_colors) + axs2[i].set_title(f"Операция: {ops[i]}", fontweight="bold") + axs2[i].set_ylabel("Время (с)") + axs2[i].tick_params(axis="x", rotation=15) + + for bar in bars: + h = bar.get_height() + axs2[i].text( + bar.get_x() + bar.get_width() / 2, + h, + f"{h:.4f}", + ha="center", + va="bottom", + fontsize=8, + ) + + plt.tight_layout(rect=[0, 0.03, 1, 0.95]) + plt.savefig(os.path.join(path_base, "struct_comparison.png")) + plt.close(fig2) + + +def build_report(summary: List[dict], n: int, path: str) -> None: + lines = [] + lines.append("# Технический отчет: Сравнительный анализ структур данных\n") + lines.append("## 1. Вводные данные\n") + lines.append( + f"Цель — оценить производительность LinkedList, HashTable и BST на массиве из {n} элементов. " + "Рассмотрены два сценария: случайный порядок ключей и заранее отсортированный по возрастанию.\n" + ) + + lines.append("## 2. Результаты измерений (среднее)\n") + lines.append("| Структура | Режим | Вставка (с) | Поиск (с) | Удаление (с) |\n") + lines.append("| :--- | :--- | :---: | :---: | :---: |\n") + for r in summary: + lines.append( + f"| {r['name']} | {r['mode']} | {r['ins']:.6f} | {r['find']:.6f} | {r['del']:.6f} |\n" + ) + + lines.append("\n## 3. Визуализация\n") + lines.append("### Сравнение структур по операциям\n") + lines.append("![Сравнение структур](data/struct_comparison.png)\n") + lines.append("### Влияние порядка данных\n") + lines.append("![Влияние порядка](data/order_impact.png)\n") + + lines.append("## 4. Выводы\n") + lines.append( + "- **BST без балансировки** на отсортированных ключах вырождается в линейную цепочку, " + "что приводит к резкому росту времени операций (практическая сложность приближается к $O(N)$).\n" + ) + lines.append( + "- **HashTable** показывает стабильную производительность, практически не зависящую от порядка входных данных. " + "Это делает её предпочтительной для задач с интенсивным поиском и вставкой.\n" + ) + lines.append( + "- **LinkedList**ónico предсказуемо медленен при поиске и удалении, так как эти операции требуют линейного прохода по списку.\n" + ) + lines.append( + "- **Итог:** для систем с высокой нагрузкой на поиск/вставку оптимальным выбором является хеш-таблица; " + "BST имеет смысл использовать только при дополнительной балансировке (AVL, красно-черное дерево и т.п.).\n" + ) + + with open(path, "w", encoding="utf-8") as f: + f.writelines(lines) + + + +def main() -> None: + datasets = prepare_datasets(N) + runner = BenchmarkRunner(repeats=REPEATS) + + for mode_name, data in datasets.items(): + for struct_name in ["LinkedList", "HashTable", "BST"]: + runner.run_experiment(struct_name, mode_name, data) + + csv_path = os.path.join(DATA_PATH, "results.csv") + runner.save_csv(csv_path) + + summary = runner.summary() + + build_plots(summary, N, DATA_PATH) + build_report(summary, N, os.path.join(DOCS_PATH, "report.md")) + + print("Готово.") + + +if __name__ == "__main__": + main() \ No newline at end of file