2026-rff_mp/BorisovMI/lab_1/docs/data/structures.py

477 lines
15 KiB
Python
Raw Normal View History

import time
import random
import csv
import os
import matplotlib.pyplot as plt
import numpy as np
import sys
sys.setrecursionlimit(20000)
# Связный список
def ll_insert(head, name, phone):
new_node = {'name': name, 'phone': phone, 'next': None}
if head is None:
return new_node
if head['name'] == name:
head['phone'] = phone
return head
current = head
while current['next'] is not None:
if current['next']['name'] == name:
current['next']['phone'] = phone
return head
current = current['next']
current['next'] = new_node
return head
def ll_find(head, name):
current = head
while current is not None:
if current['name'] == name:
return current['phone']
current = current['next']
return None
def ll_delete(head, name):
if head is None:
return None
if head['name'] == name:
return head['next']
current = head
while current['next'] is not None:
if current['next']['name'] == name:
current['next'] = current['next']['next']
return head
current = current['next']
return head
def ll_list_all(head):
records = []
current = head
while current is not None:
records.append((current['name'], current['phone']))
current = current['next']
records.sort(key=lambda x: x[0])
return records
# Хэш функции
def hash_function(name, table_size):
return sum(ord(c) for c in name) % table_size
def ht_create(size=1000):
return [None] * size
def ht_insert(buckets, name, phone):
size = len(buckets)
index = hash_function(name, size)
buckets[index] = ll_insert(buckets[index], name, phone)
def ht_find(buckets, name):
size = len(buckets)
index = hash_function(name, size)
return ll_find(buckets[index], name)
def ht_delete(buckets, name):
size = len(buckets)
index = hash_function(name, size)
buckets[index] = ll_delete(buckets[index], name)
def ht_list_all(buckets):
records = []
for bucket in buckets:
current = bucket
while current is not None:
records.append((current['name'], current['phone']))
current = current['next']
records.sort(key=lambda x: x[0])
return records
# Функции для дерева поиска
def bst_insert(root, name, phone):
if root is None:
return {'name': name, 'phone': phone, 'left': None, 'right': None}
if name < root['name']:
root['left'] = bst_insert(root['left'], name, phone)
elif name > root['name']:
root['right'] = bst_insert(root['right'], name, phone)
else:
root['phone'] = phone
return root
def bst_find(root, name):
current = root
while current is not None:
if name == current['name']:
return current['phone']
elif name < current['name']:
current = current['left']
else:
current = current['right']
return None
def bst_find_min(node):
current = node
while current['left'] is not None:
current = current['left']
return current
def bst_delete(root, name):
if root is None:
return None
if name < root['name']:
root['left'] = bst_delete(root['left'], name)
elif name > root['name']:
root['right'] = bst_delete(root['right'], name)
else:
if root['left'] is None:
return root['right']
elif root['right'] is None:
return root['left']
min_node = bst_find_min(root['right'])
root['name'] = min_node['name']
root['phone'] = min_node['phone']
root['right'] = bst_delete(root['right'], min_node['name'])
return root
def bst_list_all(root):
records = []
def inorder_traversal(node):
if node is not None:
inorder_traversal(node['left'])
records.append((node['name'], node['phone']))
inorder_traversal(node['right'])
inorder_traversal(root)
return records
# Генерация тестовых данных
def generate_records(count=10000):
records = []
for i in range(count):
name = f"User_{i:05d}"
phone = f"+7-{random.randint(100,999)}-{random.randint(100,999)}-{random.randint(1000,9999)}"
records.append((name, phone))
shuffled = records.copy()
random.shuffle(shuffled)
sorted_records = sorted(records, key=lambda x: x[0])
return shuffled, sorted_records
# Замер времени
def measure_insertion(structure_name, records):
times = []
filled_structure = None
for run in range(5):
if structure_name == "linked_list":
structure = None
elif structure_name == "hash_table":
structure = ht_create(1000)
elif structure_name == "bst":
structure = None
start = time.perf_counter()
for name, phone in records:
if structure_name == "linked_list":
structure = ll_insert(structure, name, phone)
elif structure_name == "hash_table":
ht_insert(structure, name, phone)
elif structure_name == "bst":
structure = bst_insert(structure, name, phone)
end = time.perf_counter()
times.append(end - start)
if run == 4:
filled_structure = structure
return times, filled_structure
def measure_search(structure_name, structure, search_names):
times = []
for run in range(5):
start = time.perf_counter()
for name in search_names:
if structure_name == "linked_list":
ll_find(structure, name)
elif structure_name == "hash_table":
ht_find(structure, name)
elif structure_name == "bst":
bst_find(structure, name)
end = time.perf_counter()
times.append(end - start)
return times
def measure_deletion(structure_name, original_structure, delete_names):
times = []
for run in range(5):
if structure_name == "linked_list":
all_records = ll_list_all(original_structure)
test_structure = None
for name, phone in all_records:
test_structure = ll_insert(test_structure, name, phone)
elif structure_name == "hash_table":
all_records = ht_list_all(original_structure)
test_structure = ht_create(1000)
for name, phone in all_records:
ht_insert(test_structure, name, phone)
elif structure_name == "bst":
all_records = bst_list_all(original_structure)
test_structure = None
for name, phone in all_records:
test_structure = bst_insert(test_structure, name, phone)
start = time.perf_counter()
for name in delete_names:
if structure_name == "linked_list":
test_structure = ll_delete(test_structure, name)
elif structure_name == "hash_table":
ht_delete(test_structure, name)
elif structure_name == "bst":
test_structure = bst_delete(test_structure, name)
end = time.perf_counter()
times.append(end - start)
return times
# Основная часть
def run_experiment():
current_dir = os.path.dirname(__name__)
docs_dir = os.path.dirname(current_dir)
csv_file = os.path.join(docs_dir, "experiment_results.csv")
print("=" * 70)
print("ЭКСПЕРИМЕНТАЛЬНОЕ СРАВНЕНИЕ СТРУКТУР ДАННЫХ")
print("Телефонный справочник - 10000 записей")
print("=" * 70)
print(f"\nРезультаты будут сохранены в: {csv_file}")
print("\n1. Генерация тестовых данных...")
shuffled_records, sorted_records = generate_records(10000)
print(f" Сгенерировано 10000 записей")
existing_names = [shuffled_records[i][0] for i in random.sample(range(10000), 100)]
nonexisting_names = [f"NotExist_{i}" for i in range(10)]
search_names = existing_names + nonexisting_names
delete_names = [shuffled_records[i][0] for i in random.sample(range(10000), 50)]
results = [["Структура", "Режим", "Операция",
"Замер1(с)", "Замер2(с)", "Замер3(с)", "Замер4(с)", "Замер5(с)",
"Среднее(с)"]]
for mode_name, records in [("случайный", shuffled_records),
("отсортированный", sorted_records)]:
print(f"\n2. Тестирование режима: {mode_name}")
print("-" * 50)
for struct_name in ["linked_list", "hash_table", "bst"]:
print(f"\n {struct_name.upper()}:")
print(" Вставка 10000 записей...")
insert_times, filled_struct = measure_insertion(struct_name, records)
avg_insert = sum(insert_times) / 5
print(f" Время: {avg_insert:.4f} сек (среднее)")
print(" Поиск 110 записей...")
search_times = measure_search(struct_name, filled_struct, search_names)
avg_search = sum(search_times) / 5
print(f" Время: {avg_search:.4f} сек (среднее)")
print(" Удаление 50 записей...")
delete_times = measure_deletion(struct_name, filled_struct, delete_names)
avg_delete = sum(delete_times) / 5
print(f" Время: {avg_delete:.4f} сек (среднее)")
results.append([struct_name, mode_name, "вставка"] + insert_times + [avg_insert])
results.append([struct_name, mode_name, "поиск"] + search_times + [avg_search])
results.append([struct_name, mode_name, "удаление"] + delete_times + [avg_delete])
print("\n3. Сохранение результатов...")
with open(csv_file, "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerows(results)
print(f" Результаты сохранены в: {csv_file}")
print("\n" + "=" * 70)
print("СВОДНАЯ ТАБЛИЦА РЕЗУЛЬТАТОВ")
print("=" * 70)
print(f"{'Структура':<15} {'Режим':<12} {'Операция':<10} {'Среднее время (сек)':<20}")
print("-" * 70)
for row in results[1:]:
struct, mode, op, t1, t2, t3, t4, t5, avg = row
print(f"{struct:<15} {mode:<12} {op:<10} {avg:<20.6f}")
return results, docs_dir
# Графики
def create_graphs(results, docs_dir):
print("\n4. Построение графиков...")
data = {}
for row in results[1:]:
struct = row[0]
mode = row[1]
op = row[2]
avg = row[8]
if struct not in data:
data[struct] = {}
if mode not in data[struct]:
data[struct][mode] = {}
data[struct][mode][op] = avg
struct_labels = {
'linked_list': 'LinkedList',
'hash_table': 'HashTable',
'bst': 'BST'
}
colors = {
'linked_list': '#3498db',
'hash_table': '#2ecc71',
'bst': '#e74c3c'
}
fig, axes = plt.subplots(1, 3, figsize=(15, 6))
fig.suptitle('Сравнение производительности структур данных', fontsize=16, fontweight='bold')
operations = ['вставка', 'поиск', 'удаление']
operation_titles = ['Вставка\n(10000 записей)', 'Поиск\n(110 запросов)', 'Удаление\n(50 записей)']
modes = ['случайный', 'отсортированный']
mode_labels = ['Случайный', 'Отсортированный']
for idx, (op, op_title) in enumerate(zip(operations, operation_titles)):
ax = axes[idx]
# Позиции для групп столбцов
x = np.arange(len(modes)) # [0, 1]
width = 0.25 # ширина одного столбца
multiplier = 0
for struct in ['linked_list', 'hash_table', 'bst']:
values = [data[struct][mode][op] for mode in modes]
offset = width * multiplier
bars = ax.bar(x + offset, values, width,
label=struct_labels[struct],
color=colors[struct],
edgecolor='black', linewidth=0.5)
for bar, val in zip(bars, values):
if val < 0.01:
ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + val*0.05,
f'{val:.5f}', ha='center', va='bottom', fontsize=8, rotation=0)
else:
ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + val*0.02,
f'{val:.4f}', ha='center', va='bottom', fontsize=8, rotation=0)
multiplier += 1
ax.set_title(op_title, fontsize=12, fontweight='bold')
ax.set_ylabel('Время (секунды)', fontsize=10)
ax.set_xlabel('Режим данных', fontsize=10)
ax.set_xticks(x + width)
ax.set_xticklabels(mode_labels)
ax.legend(loc='upper left', fontsize=8)
ax.grid(True, alpha=0.3, axis='y')
all_values = [data[s][m][op] for s in ['linked_list', 'hash_table', 'bst'] for m in modes]
if max(all_values) / min(all_values) > 100:
ax.set_yscale('log')
ax.set_ylabel('Время (секунды) - логарифмическая шкала', fontsize=9)
plt.tight_layout()
graph_path = os.path.join(docs_dir, "performance_graphs.png")
plt.savefig(graph_path, dpi=150, bbox_inches='tight')
plt.close()
print(f" Графики сохранены в: {graph_path}")
return graph_path
if __name__ == "__main__":
results, docs_dir = run_experiment()
try:
graph_file = create_graphs(results, docs_dir)
print("\n" + "=" * 70)
print("ЭКСПЕРИМЕНТ ЗАВЕРШЕН УСПЕШНО!")
print("=" * 70)
print("\nСОЗДАННЫЕ ФАЙЛЫ:")
print(f" Данные: {os.path.join(docs_dir, 'experiment_results.csv')}")
print(f" Графики: {graph_file}")
except Exception as e:
print(f"\nОшибка при построении графиков: {e}")
print(" Убедитесь, что установлен matplotlib: pip install matplotlib")
print("\n" + "=" * 70)
print("ЭКСПЕРИМЕНТ ЗАВЕРШЕН (без графиков)")
print("=" * 70)
print(f"\nCSV файл сохранен: {os.path.join(docs_dir, 'experiment_results.csv')}")