2026-rff_mp/KolbasovPD/docs/data/1-st_exercise/main.py

395 lines
14 KiB
Python
Raw Normal View History

2026-05-29 19:18:04 +00:00
import time
import random
import csv
import sys
sys.setrecursionlimit(100000)
def ll_insert(head, name, phone):
new_node = {'name': name, 'phone': phone, 'next': None}
if head is None:
return new_node
curr = head
prev = None
while curr:
if curr['name'] == name:
curr['phone'] = phone
return head
prev = curr
curr = curr['next']
prev['next'] = new_node
return head
def ll_find(head, name):
curr = head
while curr:
if curr['name'] == name:
return curr['phone']
curr = curr['next']
return None
def ll_delete(head, name):
if head is None:
return None
if head['name'] == name:
return head['next']
curr = head
while curr['next']:
if curr['next']['name'] == name:
curr['next'] = curr['next']['next']
return head
curr = curr['next']
return head
def ll_list_all(head):
records = []
curr = head
while curr:
records.append((curr['name'], curr['phone']))
curr = curr['next']
records.sort(key=lambda x: x[0])
return records
def hash_function(name, table_size):
return sum(ord(c) for c in name) % table_size
def ht_create(size=1000):
return [None] * size
def ht_insert(buckets, name, phone):
index = hash_function(name, len(buckets))
buckets[index] = ll_insert(buckets[index], name, phone)
def ht_find(buckets, name):
index = hash_function(name, len(buckets))
return ll_find(buckets[index], name)
def ht_delete(buckets, name):
index = hash_function(name, len(buckets))
buckets[index] = ll_delete(buckets[index], name)
def ht_list_all(buckets):
records = []
for head in buckets:
curr = head
while curr:
records.append((curr['name'], curr['phone']))
curr = curr['next']
records.sort(key=lambda x: x[0])
return records
def bst_insert_iterative(root, name, phone):
new_node = {'name': name, 'phone': phone, 'left': None, 'right': None}
if root is None:
return new_node
curr = root
while True:
if name < curr['name']:
if curr['left'] is None:
curr['left'] = new_node
break
curr = curr['left']
elif name > curr['name']:
if curr['right'] is None:
curr['right'] = new_node
break
curr = curr['right']
else:
curr['phone'] = phone
break
return root
def bst_find_iterative(root, name):
curr = root
while curr:
if name == curr['name']:
return curr['phone']
elif name < curr['name']:
curr = curr['left']
else:
curr = curr['right']
return None
def bst_find_min(node):
while node and node['left']:
node = node['left']
return node
def bst_delete_iterative(root, name):
if root is None:
return None
if name < root['name']:
root['left'] = bst_delete_iterative(root['left'], name)
elif name > root['name']:
root['right'] = bst_delete_iterative(root['right'], name)
else:
if root['left'] is None:
return root['right']
elif root['right'] is None:
return root['left']
parent = root
successor = root['right']
while successor['left']:
parent = successor
successor = successor['left']
root['name'] = successor['name']
root['phone'] = successor['phone']
if parent == root:
parent['right'] = successor['right']
else:
parent['left'] = successor['right']
return root
def bst_list_all(root):
result = []
stack = []
curr = root
while stack or curr:
while curr:
stack.append(curr)
curr = curr['left']
curr = stack.pop()
result.append((curr['name'], curr['phone']))
curr = curr['right']
return result
def generate_test_data(N=10000):
names = [f"User_{i:05d}" for i in range(N)]
phones = [f"+7-999-{random.randint(1000000, 9999999)}" for _ in range(N)]
records = list(zip(names, phones))
records_shuffled = records.copy()
random.shuffle(records_shuffled)
records_sorted = sorted(records, key=lambda x: x[0])
return records_shuffled, records_sorted
def measure_insertion(structure_type, records, ht_size=1000):
if structure_type == "LinkedList":
head = None
start = time.perf_counter()
for name, phone in records:
head = ll_insert(head, name, phone)
end = time.perf_counter()
return head, (end - start)
elif structure_type == "HashTable":
buckets = ht_create(ht_size)
start = time.perf_counter()
for name, phone in records:
ht_insert(buckets, name, phone)
end = time.perf_counter()
return buckets, (end - start)
elif structure_type == "BST":
root = None
start = time.perf_counter()
for name, phone in records:
root = bst_insert_iterative(root, name, phone)
end = time.perf_counter()
return root, (end - start)
def measure_search(data_structure, structure_type, existing_names, non_existing_names):
start = time.perf_counter()
for name in existing_names:
if structure_type == "LinkedList":
ll_find(data_structure, name)
elif structure_type == "HashTable":
ht_find(data_structure, name)
elif structure_type == "BST":
bst_find_iterative(data_structure, name)
for name in non_existing_names:
if structure_type == "LinkedList":
ll_find(data_structure, name)
elif structure_type == "HashTable":
ht_find(data_structure, name)
elif structure_type == "BST":
bst_find_iterative(data_structure, name)
end = time.perf_counter()
return end - start
def measure_deletion(data_structure, structure_type, names_to_delete):
start = time.perf_counter()
for name in names_to_delete:
if structure_type == "LinkedList":
data_structure = ll_delete(data_structure, name)
elif structure_type == "HashTable":
ht_delete(data_structure, name)
elif structure_type == "BST":
data_structure = bst_delete_iterative(data_structure, name)
end = time.perf_counter()
return data_structure, (end - start)
def run_experiment(N=5000, repeats=5):
print(f"Генерация тестовых данных (N={N})...")
records_shuffled, records_sorted = generate_test_data(N)
existing_names = [name for name, _ in random.sample(records_shuffled, min(100, N))]
non_existing_names = [f"None_{i}" for i in range(10)]
delete_names = [name for name, _ in random.sample(records_shuffled, min(50, N))]
results = []
structures = ["LinkedList", "HashTable", "BST"]
modes = ["случайный", "отсортированный"]
for struct in structures:
for mode in modes:
records = records_shuffled if mode == "случайный" else records_sorted
print(f"\nТестирование: {struct}, режим: {mode}")
insertion_times = []
search_times = []
deletion_times = []
for rep in range(repeats):
print(f" Повторение {rep+1}/{repeats}...")
data_structure, insert_time = measure_insertion(struct, records)
insertion_times.append(insert_time)
search_time = measure_search(data_structure, struct, existing_names, non_existing_names)
search_times.append(search_time)
data_structure, delete_time = measure_deletion(data_structure, struct, delete_names)
deletion_times.append(delete_time)
avg_insert = sum(insertion_times) / repeats
avg_search = sum(search_times) / repeats
avg_delete = sum(deletion_times) / repeats
results.append({
"structure": struct,
"mode": mode,
"insertion_avg": avg_insert,
"insertion_all": insertion_times,
"search_avg": avg_search,
"search_all": search_times,
"deletion_avg": avg_delete,
"deletion_all": deletion_times
})
print(f" Вставка: {avg_insert:.6f} сек (замеры: {[f'{t:.6f}' for t in insertion_times]})")
print(f" Поиск: {avg_search:.6f} сек (замеры: {[f'{t:.6f}' for t in search_times]})")
print(f" Удаление: {avg_delete:.6f} сек (замеры: {[f'{t:.6f}' for t in deletion_times]})")
return results
def save_results_to_csv(results, filename="results.csv"):
with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["Структура", "Режим", "Операция", "Повторение", "Время (сек)"])
for res in results:
struct = res["structure"]
mode = res["mode"]
for i, t in enumerate(res["insertion_all"]):
writer.writerow([struct, mode, "вставка", i+1, t])
writer.writerow([struct, mode, "вставка", "СРЕДНЕЕ", res["insertion_avg"]])
for i, t in enumerate(res["search_all"]):
writer.writerow([struct, mode, "поиск", i+1, t])
writer.writerow([struct, mode, "поиск", "СРЕДНЕЕ", res["search_avg"]])
for i, t in enumerate(res["deletion_all"]):
writer.writerow([struct, mode, "удаление", i+1, t])
writer.writerow([struct, mode, "удаление", "СРЕДНЕЕ", res["deletion_avg"]])
print(f"\nРезультаты сохранены в {filename}")
def print_summary_table(results):
print("\n" + "="*80)
print("СВОДНАЯ ТАБЛИЦА РЕЗУЛЬТАТОВ (среднее время в секундах)")
print("="*80)
print(f"{'Структура':<15} {'Режим':<12} {'Вставка':<12} {'Поиск (110)':<12} {'Удаление (50)':<12}")
print("-"*80)
for res in results:
print(f"{res['structure']:<15} {res['mode']:<12} {res['insertion_avg']:<12.6f} "
f"{res['search_avg']:<12.6f} {res['deletion_avg']:<12.6f}")
print("\n" + "="*80)
print("АНАЛИЗ ДЕГРАДАЦИИ BST")
print("="*80)
bst_random = next(r for r in results if r['structure'] == "BST" and r['mode'] == "случайный")
bst_sorted = next(r for r in results if r['structure'] == "BST" and r['mode'] == "отсортированный")
degradation = bst_sorted['insertion_avg'] / bst_random['insertion_avg']
print(f"BST: отсортированные данные в {degradation:.1f} раз медленнее случайных")
print("Причина: вырождение дерева в линейный связный список (O(n) вместо O(log n))")
if __name__ == "__main__":
print("="*80)
print("ЭКСПЕРИМЕНТАЛЬНОЕ СРАВНЕНИЕ СТРУКТУР ДАННЫХ ДЛЯ ТЕЛЕФОННОГО СПРАВОЧНИКА")
print("="*80)
results = run_experiment(N=5000, repeats=5)
save_results_to_csv(results)
print_summary_table(results)
print("\n" + "="*80)
print("ВЫВОДЫ И РЕКОМЕНДАЦИИ")
print("="*80)
print("""
1. Хеш-таблица:
Лучшая производительность для операций поиска и вставки (O(1) в среднем)
Не чувствительна к порядку входных данных
Требует память под массив бакетов
Не поддерживает естественный порядок (нужна сортировка)
Идеально для справочников с частым поиском
2. Двоичное дерево поиска:
Естественная сортировка (in-order обход)
Хорошая производительность на случайных данных (O(log n))
Сильная деградация на отсортированных данных (O(n))
Рекурсивные операции требуют больше памяти
Хорошо для задач, где нужен отсортированный вывод
3. Связный список:
Простота реализации
Медленный поиск и удаление (O(n))
Неэффективен для больших объёмов данных
Применим только для очень маленьких справочников
РЕКОМЕНДАЦИИ ДЛЯ РЕАЛЬНЫХ ЗАДАЧ:
Частый поиск, редкие вставки -> ХЕШ-ТАБЛИЦА
Нужен отсортированный вывод -> ДЕРЕВО (с балансировкой)
Очень маленький справочник (<100 записей) -> СПИСОК
В реальных БД -> хеш-таблица + B-деревья
""")
print("\n" + "="*80)
print("ДОПОЛНИТЕЛЬНЫЙ АНАЛИЗ")
print("="*80)
for struct in ["LinkedList", "HashTable", "BST"]:
res_random = next(r for r in results if r['structure'] == struct and r['mode'] == "случайный")
print(f"{struct:12} поиск 110 записей: {res_random['search_avg']:.6f} сек")
ll_random = next(r for r in results if r['structure'] == "LinkedList" and r['mode'] == "случайный")
ll_sorted = next(r for r in results if r['structure'] == "LinkedList" and r['mode'] == "отсортированный")
print(f"\nСвязный список: деградация {ll_sorted['insertion_avg'] / ll_random['insertion_avg']:.2f}х")