import time import random import csv import os import matplotlib.pyplot as plt import numpy as np # ===================== 1. Связный список ===================== def ll_insert(head, name, phone): """Вставка в конец (или обновление), возвращает голову.""" new_node = {'name': name, 'phone': phone, 'next': None} if head is None: return new_node cur = head while True: if cur['name'] == name: cur['phone'] = phone return head if cur['next'] is None: break cur = cur['next'] cur['next'] = new_node return head def ll_find(head, name): cur = head while cur: if cur['name'] == name: return cur['phone'] cur = cur['next'] return None def ll_delete(head, name): if head is None: return None if head['name'] == name: return head['next'] cur = head while cur['next']: if cur['next']['name'] == name: cur['next'] = cur['next']['next'] return head cur = cur['next'] return head def ll_list_all(head): result = [] cur = head while cur: result.append((cur['name'], cur['phone'])) cur = cur['next'] result.sort(key=lambda x: x[0]) return result # ===================== 2. Хеш-таблица ===================== def ht_hash(name, size): h = 0 for ch in name: h = (h * 31 + ord(ch)) % size return h def ht_insert(buckets, name, phone): idx = ht_hash(name, len(buckets)) buckets[idx] = ll_insert(buckets[idx], name, phone) def ht_find(buckets, name): idx = ht_hash(name, len(buckets)) return ll_find(buckets[idx], name) def ht_delete(buckets, name): idx = ht_hash(name, len(buckets)) buckets[idx] = ll_delete(buckets[idx], name) def ht_list_all(buckets): result = [] for head in buckets: cur = head while cur: result.append((cur['name'], cur['phone'])) cur = cur['next'] result.sort(key=lambda x: x[0]) return result # ===================== 3. BST ===================== def bst_insert(root, name, phone): """Итеративная вставка, не вызывает переполнения стека.""" new_node = {'name': name, 'phone': phone, 'left': None, 'right': None} if root is None: return new_node cur = root while True: if name < cur['name']: if cur['left'] is None: cur['left'] = new_node break cur = cur['left'] elif name > cur['name']: if cur['right'] is None: cur['right'] = new_node break cur = cur['right'] else: cur['phone'] = phone # обновление break return root def bst_find(root, name): cur = root while cur: if name == cur['name']: return cur['phone'] elif name < cur['name']: cur = cur['left'] else: cur = cur['right'] return None def bst_delete(root, name): # Ищем узел и его родителя parent = None cur = root while cur and cur['name'] != name: parent = cur if name < cur['name']: cur = cur['left'] else: cur = cur['right'] if cur is None: # не найден return root # Случай 1: нет левого потомка if cur['left'] is None: child = cur['right'] # Случай 2: нет правого потомка elif cur['right'] is None: child = cur['left'] else: # Случай 3: два потомка — ищем минимальный в правом поддереве succ_parent = cur succ = cur['right'] while succ['left']: succ_parent = succ succ = succ['left'] # Копируем данные cur['name'] = succ['name'] cur['phone'] = succ['phone'] # Удаляем succ (у него нет левого потомка) if succ_parent['left'] == succ: succ_parent['left'] = succ['right'] else: succ_parent['right'] = succ['right'] return root # Подключаем child вместо cur if parent is None: return child if parent['left'] == cur: parent['left'] = child else: parent['right'] = child return root def bst_list_all(root): result = [] stack = [] cur = root while stack or cur: while cur: stack.append(cur) cur = cur['left'] cur = stack.pop() result.append((cur['name'], cur['phone'])) cur = cur['right'] return result # ===================== Генерация данных ===================== def generate_data(n=10000): records = [(f"User_{i:05d}", f"8800{i:07d}") for i in range(n)] shuffled = records[:] random.shuffle(shuffled) sorted_rec = sorted(records, key=lambda x: x[0]) return shuffled, sorted_rec # ===================== Замеры ===================== def run_experiment(struct_type, records, n_searches=100, n_missing=10, n_deletes=50, repeats=5): """ struct_type: 'll', 'ht', 'bst' Возвращает словарь с усреднёнными замерами. """ all_insert_times = [] all_search_times = [] all_delete_times = [] for _ in range(repeats): # --- инициализация структуры --- if struct_type == 'll': head = None elif struct_type == 'ht': buckets = [None] * 512 # размер хеш-таблицы else: # bst root = None # --- вставка --- start = time.perf_counter() if struct_type == 'll': for name, phone in records: head = ll_insert(head, name, phone) elif struct_type == 'ht': for name, phone in records: ht_insert(buckets, name, phone) else: for name, phone in records: root = bst_insert(root, name, phone) insert_time = time.perf_counter() - start all_insert_times.append(insert_time) # --- поиск --- existing = random.sample(records, min(n_searches, len(records))) missing = [(f"Missing_{i}", "") for i in range(n_missing)] test_keys = existing + missing random.shuffle(test_keys) start = time.perf_counter() if struct_type == 'll': for name, _ in test_keys: ll_find(head, name) elif struct_type == 'ht': for name, _ in test_keys: ht_find(buckets, name) else: for name, _ in test_keys: bst_find(root, name) search_time = time.perf_counter() - start all_search_times.append(search_time) # --- удаление --- del_sample = random.sample(records, min(n_deletes, len(records))) start = time.perf_counter() if struct_type == 'll': for name, _ in del_sample: head = ll_delete(head, name) elif struct_type == 'ht': for name, _ in del_sample: ht_delete(buckets, name) else: for name, _ in del_sample: root = bst_delete(root, name) delete_time = time.perf_counter() - start all_delete_times.append(delete_time) return { 'struct': struct_type, 'insert_avg': sum(all_insert_times) / repeats, 'search_avg': sum(all_search_times) / repeats, 'delete_avg': sum(all_delete_times) / repeats, 'insert_all': all_insert_times, 'search_all': all_search_times, 'delete_all': all_delete_times, } def main(): random.seed(42) N = 10000 shuffled, sorted_rec = generate_data(N) results = [] for struct_name, label in [('ll', 'LinkedList'), ('ht', 'HashTable'), ('bst', 'BST')]: for order_name, records in [('shuffled', shuffled), ('sorted', sorted_rec)]: print(f"Тестирую {label} на {order_name} данных...") res = run_experiment(struct_name, records) res['order'] = order_name res['label'] = label results.append(res) print(f"{label:15} | {order_name:10} | insert: {res['insert_avg']:.6f}s | " f"search: {res['search_avg']:.6f}s | delete: {res['delete_avg']:.6f}s") # Сохраняем в CSV os.makedirs('docs/data', exist_ok=True) with open('docs/data/benchmark_results.csv', 'w', newline='') as f: writer = csv.writer(f) writer.writerow(['structure', 'order', 'run', 'insert', 'search', 'delete']) for r in results: for i in range(len(r['insert_all'])): writer.writerow([r['label'], r['order'], i + 1, r['insert_all'][i], r['search_all'][i], r['delete_all'][i]]) print("\nCSV сохранён в docs/data/benchmark_results.csv") # ===================== ГРАФИКИ ===================== structures = ['LinkedList', 'HashTable', 'BST'] orders = ['shuffled', 'sorted'] metrics = ['insert', 'search', 'delete'] metric_names = {'insert': 'Вставка (сек)', 'search': 'Поиск (сек)', 'delete': 'Удаление (сек)'} colors = {'shuffled': '#4CAF50', 'sorted': '#FF5722'} fig, axes = plt.subplots(1, 3, figsize=(16, 5.5)) for idx, metric in enumerate(metrics): ax = axes[idx] x = np.arange(len(structures)) width = 0.35 # Собираем данные shuffled_vals = [] sorted_vals = [] for struct in structures: for res in results: if res['label'] == struct and res['order'] == 'shuffled': shuffled_vals.append(res[f'{metric}_avg']) elif res['label'] == struct and res['order'] == 'sorted': sorted_vals.append(res[f'{metric}_avg']) bars1 = ax.bar(x - width/2, shuffled_vals, width, label='Случайный порядок', color=colors['shuffled'], edgecolor='black', linewidth=0.5) bars2 = ax.bar(x + width/2, sorted_vals, width, label='Отсортированный порядок', color=colors['sorted'], edgecolor='black', linewidth=0.5) # Подписи значений на столбцах for bar in bars1: height = bar.get_height() ax.text(bar.get_x() + bar.get_width()/2., height + max(shuffled_vals)*0.01, f'{height:.4f}', ha='center', va='bottom', fontsize=7) for bar in bars2: height = bar.get_height() ax.text(bar.get_x() + bar.get_width()/2., height + max(sorted_vals)*0.01, f'{height:.4f}', ha='center', va='bottom', fontsize=7) ax.set_title(metric_names[metric], fontsize=12, fontweight='bold') ax.set_xticks(x) ax.set_xticklabels(structures, fontsize=10) ax.legend(fontsize=9) ax.grid(axis='y', alpha=0.3, linestyle='--') # Для поиска — логарифмическая шкала (чтобы было видно разницу) if metric == 'search': ax.set_yscale('log') ax.set_ylabel('Время (сек, лог. шкала)', fontsize=9) else: ax.set_ylabel('Время (сек)', fontsize=9) plt.suptitle('Сравнение производительности структур данных (N = 10 000 записей)', fontsize=14, fontweight='bold', y=1.02) plt.tight_layout() # Сохраняем график graph_path = 'docs/benchmark_graph.png' os.makedirs('docs', exist_ok=True) plt.savefig(graph_path, dpi=150, bbox_inches='tight') plt.show() print(f"График сохранён в {graph_path}") print("АНАЛИЗ РЕЗУЛЬТАТОВ") print("\n1. Влияние порядка данных на BST:") bst_shuffled_insert = next(r['insert_avg'] for r in results if r['label']=='BST' and r['order']=='shuffled') bst_sorted_insert = next(r['insert_avg'] for r in results if r['label']=='BST' and r['order']=='sorted') print(f" - Случайные данные: {bst_shuffled_insert:.6f} сек") print(f" - Отсортированные данные: {bst_sorted_insert:.6f} сек") print(f" - Замедление в {bst_sorted_insert/bst_shuffled_insert:.1f} раз") print(" Причина: на отсортированных данных BST вырождается в связный список (глубина = N)") print("\n2. Стабильность хеш-таблицы:") ht_shuffled = next(r['insert_avg'] for r in results if r['label']=='HashTable' and r['order']=='shuffled') ht_sorted = next(r['insert_avg'] for r in results if r['label']=='HashTable' and r['order']=='sorted') print(f" - Случайные: {ht_shuffled:.6f} сек") print(f" - Отсортированные: {ht_sorted:.6f} сек") print(" Причина: хеш-функция равномерно распределяет ключи независимо от порядка") print("\n3. Медленный поиск в связном списке:") ll_search = next(r['search_avg'] for r in results if r['label']=='LinkedList' and r['order']=='shuffled') ht_search = next(r['search_avg'] for r in results if r['label']=='HashTable' and r['order']=='shuffled') print(f" - LinkedList: {ll_search:.6f} сек") print(f" - HashTable: {ht_search:.6f} сек") print(f" - Хеш-таблица быстрее в {ll_search/ht_search:.1f} раз") print(" Причина: поиск в списке всегда O(n), в хеш-таблице ~O(1)") print("\n4. Удаление:") for label in ['LinkedList', 'HashTable', 'BST']: del_shuff = next(r['delete_avg'] for r in results if r['label']==label and r['order']=='shuffled') del_sort = next(r['delete_avg'] for r in results if r['label']==label and r['order']=='sorted') print(f" - {label:15}: случ.={del_shuff:.6f} сек, отсорт.={del_sort:.6f} сек") print("\n5. Рекомендации:") print(" - Частый поиск + вставки → Хеш-таблица") print(" - Нужна сортировка «из коробки» → Сбалансированное BST (AVL/Красно-чёрное)") print(" - Только добавление в конец → Связный список") print(" - Обычный BST опасен на реальных частично упорядоченных данных!") print("="*60) if __name__ == '__main__': main()