diff --git a/dyachenkoas/docs/data/1laba.py b/dyachenkoas/docs/data/1laba.py new file mode 100644 index 0000000..b623312 --- /dev/null +++ b/dyachenkoas/docs/data/1laba.py @@ -0,0 +1,391 @@ +import time +import random +import csv +import os +import matplotlib.pyplot as plt +import numpy as np + +# ===================== 1. Связный список ===================== +def ll_insert(head, name, phone): + """Вставка в конец (или обновление), возвращает голову.""" + new_node = {'name': name, 'phone': phone, 'next': None} + if head is None: + return new_node + cur = head + while True: + if cur['name'] == name: + cur['phone'] = phone + return head + if cur['next'] is None: + break + cur = cur['next'] + cur['next'] = new_node + return head + +def ll_find(head, name): + cur = head + while cur: + if cur['name'] == name: + return cur['phone'] + cur = cur['next'] + return None + +def ll_delete(head, name): + if head is None: + return None + if head['name'] == name: + return head['next'] + cur = head + while cur['next']: + if cur['next']['name'] == name: + cur['next'] = cur['next']['next'] + return head + cur = cur['next'] + return head + +def ll_list_all(head): + result = [] + cur = head + while cur: + result.append((cur['name'], cur['phone'])) + cur = cur['next'] + result.sort(key=lambda x: x[0]) + return result + +# ===================== 2. Хеш-таблица ===================== +def ht_hash(name, size): + h = 0 + for ch in name: + h = (h * 31 + ord(ch)) % size + return h + +def ht_insert(buckets, name, phone): + idx = ht_hash(name, len(buckets)) + buckets[idx] = ll_insert(buckets[idx], name, phone) + +def ht_find(buckets, name): + idx = ht_hash(name, len(buckets)) + return ll_find(buckets[idx], name) + +def ht_delete(buckets, name): + idx = ht_hash(name, len(buckets)) + buckets[idx] = ll_delete(buckets[idx], name) + +def ht_list_all(buckets): + result = [] + for head in buckets: + cur = head + while cur: + result.append((cur['name'], cur['phone'])) + cur = cur['next'] + result.sort(key=lambda x: x[0]) + return result + +# ===================== 3. BST ===================== +def bst_insert(root, name, phone): + """Итеративная вставка, не вызывает переполнения стека.""" + new_node = {'name': name, 'phone': phone, 'left': None, 'right': None} + if root is None: + return new_node + + cur = root + while True: + if name < cur['name']: + if cur['left'] is None: + cur['left'] = new_node + break + cur = cur['left'] + elif name > cur['name']: + if cur['right'] is None: + cur['right'] = new_node + break + cur = cur['right'] + else: + cur['phone'] = phone # обновление + break + return root + +def bst_find(root, name): + cur = root + while cur: + if name == cur['name']: + return cur['phone'] + elif name < cur['name']: + cur = cur['left'] + else: + cur = cur['right'] + return None + +def bst_delete(root, name): + # Ищем узел и его родителя + parent = None + cur = root + while cur and cur['name'] != name: + parent = cur + if name < cur['name']: + cur = cur['left'] + else: + cur = cur['right'] + if cur is None: # не найден + return root + + # Случай 1: нет левого потомка + if cur['left'] is None: + child = cur['right'] + # Случай 2: нет правого потомка + elif cur['right'] is None: + child = cur['left'] + else: + # Случай 3: два потомка — ищем минимальный в правом поддереве + succ_parent = cur + succ = cur['right'] + while succ['left']: + succ_parent = succ + succ = succ['left'] + # Копируем данные + cur['name'] = succ['name'] + cur['phone'] = succ['phone'] + # Удаляем succ (у него нет левого потомка) + if succ_parent['left'] == succ: + succ_parent['left'] = succ['right'] + else: + succ_parent['right'] = succ['right'] + return root + + # Подключаем child вместо cur + if parent is None: + return child + if parent['left'] == cur: + parent['left'] = child + else: + parent['right'] = child + return root + +def bst_list_all(root): + result = [] + stack = [] + cur = root + while stack or cur: + while cur: + stack.append(cur) + cur = cur['left'] + cur = stack.pop() + result.append((cur['name'], cur['phone'])) + cur = cur['right'] + return result + +# ===================== Генерация данных ===================== +def generate_data(n=10000): + records = [(f"User_{i:05d}", f"8800{i:07d}") for i in range(n)] + shuffled = records[:] + random.shuffle(shuffled) + sorted_rec = sorted(records, key=lambda x: x[0]) + return shuffled, sorted_rec + +# ===================== Замеры ===================== +def run_experiment(struct_type, records, n_searches=100, n_missing=10, n_deletes=50, repeats=5): + """ + struct_type: 'll', 'ht', 'bst' + Возвращает словарь с усреднёнными замерами. + """ + all_insert_times = [] + all_search_times = [] + all_delete_times = [] + + for _ in range(repeats): + # --- инициализация структуры --- + if struct_type == 'll': + head = None + elif struct_type == 'ht': + buckets = [None] * 512 # размер хеш-таблицы + else: # bst + root = None + + # --- вставка --- + start = time.perf_counter() + if struct_type == 'll': + for name, phone in records: + head = ll_insert(head, name, phone) + elif struct_type == 'ht': + for name, phone in records: + ht_insert(buckets, name, phone) + else: + for name, phone in records: + root = bst_insert(root, name, phone) + insert_time = time.perf_counter() - start + all_insert_times.append(insert_time) + + # --- поиск --- + existing = random.sample(records, min(n_searches, len(records))) + missing = [(f"Missing_{i}", "") for i in range(n_missing)] + test_keys = existing + missing + random.shuffle(test_keys) + + start = time.perf_counter() + if struct_type == 'll': + for name, _ in test_keys: + ll_find(head, name) + elif struct_type == 'ht': + for name, _ in test_keys: + ht_find(buckets, name) + else: + for name, _ in test_keys: + bst_find(root, name) + search_time = time.perf_counter() - start + all_search_times.append(search_time) + + # --- удаление --- + del_sample = random.sample(records, min(n_deletes, len(records))) + start = time.perf_counter() + if struct_type == 'll': + for name, _ in del_sample: + head = ll_delete(head, name) + elif struct_type == 'ht': + for name, _ in del_sample: + ht_delete(buckets, name) + else: + for name, _ in del_sample: + root = bst_delete(root, name) + delete_time = time.perf_counter() - start + all_delete_times.append(delete_time) + + return { + 'struct': struct_type, + 'insert_avg': sum(all_insert_times) / repeats, + 'search_avg': sum(all_search_times) / repeats, + 'delete_avg': sum(all_delete_times) / repeats, + 'insert_all': all_insert_times, + 'search_all': all_search_times, + 'delete_all': all_delete_times, + } + +def main(): + random.seed(42) + N = 10000 + shuffled, sorted_rec = generate_data(N) + + results = [] + for struct_name, label in [('ll', 'LinkedList'), ('ht', 'HashTable'), ('bst', 'BST')]: + for order_name, records in [('shuffled', shuffled), ('sorted', sorted_rec)]: + print(f"Тестирую {label} на {order_name} данных...") + res = run_experiment(struct_name, records) + res['order'] = order_name + res['label'] = label + results.append(res) + print(f"{label:15} | {order_name:10} | insert: {res['insert_avg']:.6f}s | " + f"search: {res['search_avg']:.6f}s | delete: {res['delete_avg']:.6f}s") + + # Сохраняем в CSV + os.makedirs('docs/data', exist_ok=True) + with open('docs/data/benchmark_results.csv', 'w', newline='') as f: + writer = csv.writer(f) + writer.writerow(['structure', 'order', 'run', 'insert', 'search', 'delete']) + for r in results: + for i in range(len(r['insert_all'])): + writer.writerow([r['label'], r['order'], i + 1, + r['insert_all'][i], r['search_all'][i], r['delete_all'][i]]) + print("\nCSV сохранён в docs/data/benchmark_results.csv") + + # ===================== ГРАФИКИ ===================== + structures = ['LinkedList', 'HashTable', 'BST'] + orders = ['shuffled', 'sorted'] + metrics = ['insert', 'search', 'delete'] + metric_names = {'insert': 'Вставка (сек)', 'search': 'Поиск (сек)', 'delete': 'Удаление (сек)'} + colors = {'shuffled': '#4CAF50', 'sorted': '#FF5722'} + + fig, axes = plt.subplots(1, 3, figsize=(16, 5.5)) + + for idx, metric in enumerate(metrics): + ax = axes[idx] + x = np.arange(len(structures)) + width = 0.35 + + # Собираем данные + shuffled_vals = [] + sorted_vals = [] + for struct in structures: + for res in results: + if res['label'] == struct and res['order'] == 'shuffled': + shuffled_vals.append(res[f'{metric}_avg']) + elif res['label'] == struct and res['order'] == 'sorted': + sorted_vals.append(res[f'{metric}_avg']) + + bars1 = ax.bar(x - width/2, shuffled_vals, width, label='Случайный порядок', + color=colors['shuffled'], edgecolor='black', linewidth=0.5) + bars2 = ax.bar(x + width/2, sorted_vals, width, label='Отсортированный порядок', + color=colors['sorted'], edgecolor='black', linewidth=0.5) + + # Подписи значений на столбцах + for bar in bars1: + height = bar.get_height() + ax.text(bar.get_x() + bar.get_width()/2., height + max(shuffled_vals)*0.01, + f'{height:.4f}', ha='center', va='bottom', fontsize=7) + for bar in bars2: + height = bar.get_height() + ax.text(bar.get_x() + bar.get_width()/2., height + max(sorted_vals)*0.01, + f'{height:.4f}', ha='center', va='bottom', fontsize=7) + + ax.set_title(metric_names[metric], fontsize=12, fontweight='bold') + ax.set_xticks(x) + ax.set_xticklabels(structures, fontsize=10) + ax.legend(fontsize=9) + ax.grid(axis='y', alpha=0.3, linestyle='--') + + # Для поиска — логарифмическая шкала (чтобы было видно разницу) + if metric == 'search': + ax.set_yscale('log') + ax.set_ylabel('Время (сек, лог. шкала)', fontsize=9) + else: + ax.set_ylabel('Время (сек)', fontsize=9) + + plt.suptitle('Сравнение производительности структур данных (N = 10 000 записей)', + fontsize=14, fontweight='bold', y=1.02) + plt.tight_layout() + + # Сохраняем график + graph_path = 'docs/benchmark_graph.png' + os.makedirs('docs', exist_ok=True) + plt.savefig(graph_path, dpi=150, bbox_inches='tight') + plt.show() + print(f"График сохранён в {graph_path}") + + print("АНАЛИЗ РЕЗУЛЬТАТОВ") + + print("\n1. Влияние порядка данных на BST:") + bst_shuffled_insert = next(r['insert_avg'] for r in results if r['label']=='BST' and r['order']=='shuffled') + bst_sorted_insert = next(r['insert_avg'] for r in results if r['label']=='BST' and r['order']=='sorted') + print(f" - Случайные данные: {bst_shuffled_insert:.6f} сек") + print(f" - Отсортированные данные: {bst_sorted_insert:.6f} сек") + print(f" - Замедление в {bst_sorted_insert/bst_shuffled_insert:.1f} раз") + print(" Причина: на отсортированных данных BST вырождается в связный список (глубина = N)") + + print("\n2. Стабильность хеш-таблицы:") + ht_shuffled = next(r['insert_avg'] for r in results if r['label']=='HashTable' and r['order']=='shuffled') + ht_sorted = next(r['insert_avg'] for r in results if r['label']=='HashTable' and r['order']=='sorted') + print(f" - Случайные: {ht_shuffled:.6f} сек") + print(f" - Отсортированные: {ht_sorted:.6f} сек") + print(" Причина: хеш-функция равномерно распределяет ключи независимо от порядка") + + print("\n3. Медленный поиск в связном списке:") + ll_search = next(r['search_avg'] for r in results if r['label']=='LinkedList' and r['order']=='shuffled') + ht_search = next(r['search_avg'] for r in results if r['label']=='HashTable' and r['order']=='shuffled') + print(f" - LinkedList: {ll_search:.6f} сек") + print(f" - HashTable: {ht_search:.6f} сек") + print(f" - Хеш-таблица быстрее в {ll_search/ht_search:.1f} раз") + print(" Причина: поиск в списке всегда O(n), в хеш-таблице ~O(1)") + + print("\n4. Удаление:") + for label in ['LinkedList', 'HashTable', 'BST']: + del_shuff = next(r['delete_avg'] for r in results if r['label']==label and r['order']=='shuffled') + del_sort = next(r['delete_avg'] for r in results if r['label']==label and r['order']=='sorted') + print(f" - {label:15}: случ.={del_shuff:.6f} сек, отсорт.={del_sort:.6f} сек") + + print("\n5. Рекомендации:") + print(" - Частый поиск + вставки → Хеш-таблица") + print(" - Нужна сортировка «из коробки» → Сбалансированное BST (AVL/Красно-чёрное)") + print(" - Только добавление в конец → Связный список") + print(" - Обычный BST опасен на реальных частично упорядоченных данных!") + print("="*60) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/dyachenkoas/docs/data/benchmark_graph.png b/dyachenkoas/docs/data/benchmark_graph.png new file mode 100644 index 0000000..93da175 Binary files /dev/null and b/dyachenkoas/docs/data/benchmark_graph.png differ diff --git a/dyachenkoas/docs/data/benchmark_results.csv b/dyachenkoas/docs/data/benchmark_results.csv new file mode 100644 index 0000000..5ef984d --- /dev/null +++ b/dyachenkoas/docs/data/benchmark_results.csv @@ -0,0 +1,31 @@ +structure,order,run,insert,search,delete +LinkedList,shuffled,1,2.009218709077686,0.01879545859992504,0.015042624901980162 +LinkedList,shuffled,2,2.0021930830553174,0.019880667328834534,0.011847833171486855 +LinkedList,shuffled,3,2.0060967500321567,0.01650112494826317,0.014535124879330397 +LinkedList,shuffled,4,2.0117608746513724,0.01841795863583684,0.01226008404046297 +LinkedList,shuffled,5,2.0219967076554894,0.019554249942302704,0.013240499887615442 +LinkedList,sorted,1,1.9876009593717754,0.01887020794674754,0.011140415910631418 +LinkedList,sorted,2,1.9921909999102354,0.01734908390790224,0.012648874893784523 +LinkedList,sorted,3,2.005885625258088,0.016392583958804607,0.012753374874591827 +LinkedList,sorted,4,2.0059890002012253,0.018063416704535484,0.013081958051770926 +LinkedList,sorted,5,2.000846417155117,0.01971287466585636,0.012666041031479836 +HashTable,shuffled,1,0.016287750098854303,0.00015062512829899788,7.462501525878906e-05 +HashTable,shuffled,2,0.014905208256095648,0.00014308281242847443,9.108288213610649e-05 +HashTable,shuffled,3,0.014663124922662973,0.00014704186469316483,7.82911665737629e-05 +HashTable,shuffled,4,0.014399250037968159,0.00014016637578606606,8.183391764760017e-05 +HashTable,shuffled,5,0.014289166778326035,0.000143333338201046,8.44169408082962e-05 +HashTable,sorted,1,0.014408249873667955,0.0001459997147321701,7.950002327561378e-05 +HashTable,sorted,2,0.016188541892915964,0.00016799988225102425,7.862504571676254e-05 +HashTable,sorted,3,0.022037209011614323,0.00014124996960163116,8.16253013908863e-05 +HashTable,sorted,4,0.01406783377751708,0.0001532919704914093,8.27917829155922e-05 +HashTable,sorted,5,0.014112749602645636,0.0001559997908771038,9.04998742043972e-05 +BST,shuffled,1,0.012917417101562023,0.0001227916218340397,7.3291826993227e-05 +BST,shuffled,2,0.01313945883885026,0.000122124794870615,7.370905950665474e-05 +BST,shuffled,3,0.01313587510958314,0.00011783279478549957,7.433397695422173e-05 +BST,shuffled,4,0.012769625056535006,0.00012508314102888107,6.770854815840721e-05 +BST,shuffled,5,0.012868000194430351,0.0001216246746480465,7.262500002980232e-05 +BST,sorted,1,3.3953831251710653,0.023627332877367735,0.013505042064934969 +BST,sorted,2,3.3977634580805898,0.025384000036865473,0.015041666105389595 +BST,sorted,3,3.404989833943546,0.02827158337458968,0.012459500227123499 +BST,sorted,4,3.389576541259885,0.025892207864671946,0.015427417121827602 +BST,sorted,5,3.408438625279814,0.025629667099565268,0.013972874730825424