2026-rff_mp/dyachenkoas/docs/data/1laba.py
dyachenkoas d19b1788cf revert 901031fd0a
revert revert 6bed72c0f5

revert Загрузить файлы в «dyachenkoas/docs/data»
2026-05-24 12:58:21 +00:00

391 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import time
import random
import csv
import os
import matplotlib.pyplot as plt
import numpy as np
# ===================== 1. Связный список =====================
def ll_insert(head, name, phone):
"""Вставка в конец (или обновление), возвращает голову."""
new_node = {'name': name, 'phone': phone, 'next': None}
if head is None:
return new_node
cur = head
while True:
if cur['name'] == name:
cur['phone'] = phone
return head
if cur['next'] is None:
break
cur = cur['next']
cur['next'] = new_node
return head
def ll_find(head, name):
cur = head
while cur:
if cur['name'] == name:
return cur['phone']
cur = cur['next']
return None
def ll_delete(head, name):
if head is None:
return None
if head['name'] == name:
return head['next']
cur = head
while cur['next']:
if cur['next']['name'] == name:
cur['next'] = cur['next']['next']
return head
cur = cur['next']
return head
def ll_list_all(head):
result = []
cur = head
while cur:
result.append((cur['name'], cur['phone']))
cur = cur['next']
result.sort(key=lambda x: x[0])
return result
# ===================== 2. Хеш-таблица =====================
def ht_hash(name, size):
h = 0
for ch in name:
h = (h * 31 + ord(ch)) % size
return h
def ht_insert(buckets, name, phone):
idx = ht_hash(name, len(buckets))
buckets[idx] = ll_insert(buckets[idx], name, phone)
def ht_find(buckets, name):
idx = ht_hash(name, len(buckets))
return ll_find(buckets[idx], name)
def ht_delete(buckets, name):
idx = ht_hash(name, len(buckets))
buckets[idx] = ll_delete(buckets[idx], name)
def ht_list_all(buckets):
result = []
for head in buckets:
cur = head
while cur:
result.append((cur['name'], cur['phone']))
cur = cur['next']
result.sort(key=lambda x: x[0])
return result
# ===================== 3. BST =====================
def bst_insert(root, name, phone):
"""Итеративная вставка, не вызывает переполнения стека."""
new_node = {'name': name, 'phone': phone, 'left': None, 'right': None}
if root is None:
return new_node
cur = root
while True:
if name < cur['name']:
if cur['left'] is None:
cur['left'] = new_node
break
cur = cur['left']
elif name > cur['name']:
if cur['right'] is None:
cur['right'] = new_node
break
cur = cur['right']
else:
cur['phone'] = phone # обновление
break
return root
def bst_find(root, name):
cur = root
while cur:
if name == cur['name']:
return cur['phone']
elif name < cur['name']:
cur = cur['left']
else:
cur = cur['right']
return None
def bst_delete(root, name):
# Ищем узел и его родителя
parent = None
cur = root
while cur and cur['name'] != name:
parent = cur
if name < cur['name']:
cur = cur['left']
else:
cur = cur['right']
if cur is None: # не найден
return root
# Случай 1: нет левого потомка
if cur['left'] is None:
child = cur['right']
# Случай 2: нет правого потомка
elif cur['right'] is None:
child = cur['left']
else:
# Случай 3: два потомка — ищем минимальный в правом поддереве
succ_parent = cur
succ = cur['right']
while succ['left']:
succ_parent = succ
succ = succ['left']
# Копируем данные
cur['name'] = succ['name']
cur['phone'] = succ['phone']
# Удаляем succ (у него нет левого потомка)
if succ_parent['left'] == succ:
succ_parent['left'] = succ['right']
else:
succ_parent['right'] = succ['right']
return root
# Подключаем child вместо cur
if parent is None:
return child
if parent['left'] == cur:
parent['left'] = child
else:
parent['right'] = child
return root
def bst_list_all(root):
result = []
stack = []
cur = root
while stack or cur:
while cur:
stack.append(cur)
cur = cur['left']
cur = stack.pop()
result.append((cur['name'], cur['phone']))
cur = cur['right']
return result
# ===================== Генерация данных =====================
def generate_data(n=10000):
records = [(f"User_{i:05d}", f"8800{i:07d}") for i in range(n)]
shuffled = records[:]
random.shuffle(shuffled)
sorted_rec = sorted(records, key=lambda x: x[0])
return shuffled, sorted_rec
# ===================== Замеры =====================
def run_experiment(struct_type, records, n_searches=100, n_missing=10, n_deletes=50, repeats=5):
"""
struct_type: 'll', 'ht', 'bst'
Возвращает словарь с усреднёнными замерами.
"""
all_insert_times = []
all_search_times = []
all_delete_times = []
for _ in range(repeats):
# --- инициализация структуры ---
if struct_type == 'll':
head = None
elif struct_type == 'ht':
buckets = [None] * 512 # размер хеш-таблицы
else: # bst
root = None
# --- вставка ---
start = time.perf_counter()
if struct_type == 'll':
for name, phone in records:
head = ll_insert(head, name, phone)
elif struct_type == 'ht':
for name, phone in records:
ht_insert(buckets, name, phone)
else:
for name, phone in records:
root = bst_insert(root, name, phone)
insert_time = time.perf_counter() - start
all_insert_times.append(insert_time)
# --- поиск ---
existing = random.sample(records, min(n_searches, len(records)))
missing = [(f"Missing_{i}", "") for i in range(n_missing)]
test_keys = existing + missing
random.shuffle(test_keys)
start = time.perf_counter()
if struct_type == 'll':
for name, _ in test_keys:
ll_find(head, name)
elif struct_type == 'ht':
for name, _ in test_keys:
ht_find(buckets, name)
else:
for name, _ in test_keys:
bst_find(root, name)
search_time = time.perf_counter() - start
all_search_times.append(search_time)
# --- удаление ---
del_sample = random.sample(records, min(n_deletes, len(records)))
start = time.perf_counter()
if struct_type == 'll':
for name, _ in del_sample:
head = ll_delete(head, name)
elif struct_type == 'ht':
for name, _ in del_sample:
ht_delete(buckets, name)
else:
for name, _ in del_sample:
root = bst_delete(root, name)
delete_time = time.perf_counter() - start
all_delete_times.append(delete_time)
return {
'struct': struct_type,
'insert_avg': sum(all_insert_times) / repeats,
'search_avg': sum(all_search_times) / repeats,
'delete_avg': sum(all_delete_times) / repeats,
'insert_all': all_insert_times,
'search_all': all_search_times,
'delete_all': all_delete_times,
}
def main():
random.seed(42)
N = 10000
shuffled, sorted_rec = generate_data(N)
results = []
for struct_name, label in [('ll', 'LinkedList'), ('ht', 'HashTable'), ('bst', 'BST')]:
for order_name, records in [('shuffled', shuffled), ('sorted', sorted_rec)]:
print(f"Тестирую {label} на {order_name} данных...")
res = run_experiment(struct_name, records)
res['order'] = order_name
res['label'] = label
results.append(res)
print(f"{label:15} | {order_name:10} | insert: {res['insert_avg']:.6f}s | "
f"search: {res['search_avg']:.6f}s | delete: {res['delete_avg']:.6f}s")
# Сохраняем в CSV
os.makedirs('docs/data', exist_ok=True)
with open('docs/data/benchmark_results.csv', 'w', newline='') as f:
writer = csv.writer(f)
writer.writerow(['structure', 'order', 'run', 'insert', 'search', 'delete'])
for r in results:
for i in range(len(r['insert_all'])):
writer.writerow([r['label'], r['order'], i + 1,
r['insert_all'][i], r['search_all'][i], r['delete_all'][i]])
print("\nCSV сохранён в docs/data/benchmark_results.csv")
# ===================== ГРАФИКИ =====================
structures = ['LinkedList', 'HashTable', 'BST']
orders = ['shuffled', 'sorted']
metrics = ['insert', 'search', 'delete']
metric_names = {'insert': 'Вставка (сек)', 'search': 'Поиск (сек)', 'delete': 'Удаление (сек)'}
colors = {'shuffled': '#4CAF50', 'sorted': '#FF5722'}
fig, axes = plt.subplots(1, 3, figsize=(16, 5.5))
for idx, metric in enumerate(metrics):
ax = axes[idx]
x = np.arange(len(structures))
width = 0.35
# Собираем данные
shuffled_vals = []
sorted_vals = []
for struct in structures:
for res in results:
if res['label'] == struct and res['order'] == 'shuffled':
shuffled_vals.append(res[f'{metric}_avg'])
elif res['label'] == struct and res['order'] == 'sorted':
sorted_vals.append(res[f'{metric}_avg'])
bars1 = ax.bar(x - width/2, shuffled_vals, width, label='Случайный порядок',
color=colors['shuffled'], edgecolor='black', linewidth=0.5)
bars2 = ax.bar(x + width/2, sorted_vals, width, label='Отсортированный порядок',
color=colors['sorted'], edgecolor='black', linewidth=0.5)
# Подписи значений на столбцах
for bar in bars1:
height = bar.get_height()
ax.text(bar.get_x() + bar.get_width()/2., height + max(shuffled_vals)*0.01,
f'{height:.4f}', ha='center', va='bottom', fontsize=7)
for bar in bars2:
height = bar.get_height()
ax.text(bar.get_x() + bar.get_width()/2., height + max(sorted_vals)*0.01,
f'{height:.4f}', ha='center', va='bottom', fontsize=7)
ax.set_title(metric_names[metric], fontsize=12, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(structures, fontsize=10)
ax.legend(fontsize=9)
ax.grid(axis='y', alpha=0.3, linestyle='--')
# Для поиска — логарифмическая шкала (чтобы было видно разницу)
if metric == 'search':
ax.set_yscale('log')
ax.set_ylabel('Время (сек, лог. шкала)', fontsize=9)
else:
ax.set_ylabel('Время (сек)', fontsize=9)
plt.suptitle('Сравнение производительности структур данных (N = 10 000 записей)',
fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
# Сохраняем график
graph_path = 'docs/benchmark_graph.png'
os.makedirs('docs', exist_ok=True)
plt.savefig(graph_path, dpi=150, bbox_inches='tight')
plt.show()
print(f"График сохранён в {graph_path}")
print("АНАЛИЗ РЕЗУЛЬТАТОВ")
print("\n1. Влияние порядка данных на BST:")
bst_shuffled_insert = next(r['insert_avg'] for r in results if r['label']=='BST' and r['order']=='shuffled')
bst_sorted_insert = next(r['insert_avg'] for r in results if r['label']=='BST' and r['order']=='sorted')
print(f" - Случайные данные: {bst_shuffled_insert:.6f} сек")
print(f" - Отсортированные данные: {bst_sorted_insert:.6f} сек")
print(f" - Замедление в {bst_sorted_insert/bst_shuffled_insert:.1f} раз")
print(" Причина: на отсортированных данных BST вырождается в связный список (глубина = N)")
print("\n2. Стабильность хеш-таблицы:")
ht_shuffled = next(r['insert_avg'] for r in results if r['label']=='HashTable' and r['order']=='shuffled')
ht_sorted = next(r['insert_avg'] for r in results if r['label']=='HashTable' and r['order']=='sorted')
print(f" - Случайные: {ht_shuffled:.6f} сек")
print(f" - Отсортированные: {ht_sorted:.6f} сек")
print(" Причина: хеш-функция равномерно распределяет ключи независимо от порядка")
print("\n3. Медленный поиск в связном списке:")
ll_search = next(r['search_avg'] for r in results if r['label']=='LinkedList' and r['order']=='shuffled')
ht_search = next(r['search_avg'] for r in results if r['label']=='HashTable' and r['order']=='shuffled')
print(f" - LinkedList: {ll_search:.6f} сек")
print(f" - HashTable: {ht_search:.6f} сек")
print(f" - Хеш-таблица быстрее в {ll_search/ht_search:.1f} раз")
print(" Причина: поиск в списке всегда O(n), в хеш-таблице ~O(1)")
print("\n4. Удаление:")
for label in ['LinkedList', 'HashTable', 'BST']:
del_shuff = next(r['delete_avg'] for r in results if r['label']==label and r['order']=='shuffled')
del_sort = next(r['delete_avg'] for r in results if r['label']==label and r['order']=='sorted')
print(f" - {label:15}: случ.={del_shuff:.6f} сек, отсорт.={del_sort:.6f} сек")
print("\n5. Рекомендации:")
print(" - Частый поиск + вставки → Хеш-таблица")
print(" - Нужна сортировка «из коробки» → Сбалансированное BST (AVL/Красно-чёрное)")
print(" - Только добавление в конец → Связный список")
print(" - Обычный BST опасен на реальных частично упорядоченных данных!")
print("="*60)
if __name__ == '__main__':
main()