395 lines
14 KiB
Python
395 lines
14 KiB
Python
|
|
import time
|
|||
|
|
import random
|
|||
|
|
import csv
|
|||
|
|
import sys
|
|||
|
|
sys.setrecursionlimit(100000)
|
|||
|
|
|
|||
|
|
def ll_insert(head, name, phone):
|
|||
|
|
new_node = {'name': name, 'phone': phone, 'next': None}
|
|||
|
|
|
|||
|
|
if head is None:
|
|||
|
|
return new_node
|
|||
|
|
|
|||
|
|
curr = head
|
|||
|
|
prev = None
|
|||
|
|
while curr:
|
|||
|
|
if curr['name'] == name:
|
|||
|
|
curr['phone'] = phone
|
|||
|
|
return head
|
|||
|
|
prev = curr
|
|||
|
|
curr = curr['next']
|
|||
|
|
|
|||
|
|
prev['next'] = new_node
|
|||
|
|
return head
|
|||
|
|
|
|||
|
|
def ll_find(head, name):
|
|||
|
|
curr = head
|
|||
|
|
while curr:
|
|||
|
|
if curr['name'] == name:
|
|||
|
|
return curr['phone']
|
|||
|
|
curr = curr['next']
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
def ll_delete(head, name):
|
|||
|
|
if head is None:
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
if head['name'] == name:
|
|||
|
|
return head['next']
|
|||
|
|
|
|||
|
|
curr = head
|
|||
|
|
while curr['next']:
|
|||
|
|
if curr['next']['name'] == name:
|
|||
|
|
curr['next'] = curr['next']['next']
|
|||
|
|
return head
|
|||
|
|
curr = curr['next']
|
|||
|
|
return head
|
|||
|
|
|
|||
|
|
def ll_list_all(head):
|
|||
|
|
records = []
|
|||
|
|
curr = head
|
|||
|
|
while curr:
|
|||
|
|
records.append((curr['name'], curr['phone']))
|
|||
|
|
curr = curr['next']
|
|||
|
|
records.sort(key=lambda x: x[0])
|
|||
|
|
return records
|
|||
|
|
|
|||
|
|
def hash_function(name, table_size):
|
|||
|
|
return sum(ord(c) for c in name) % table_size
|
|||
|
|
|
|||
|
|
def ht_create(size=1000):
|
|||
|
|
return [None] * size
|
|||
|
|
|
|||
|
|
def ht_insert(buckets, name, phone):
|
|||
|
|
index = hash_function(name, len(buckets))
|
|||
|
|
buckets[index] = ll_insert(buckets[index], name, phone)
|
|||
|
|
|
|||
|
|
def ht_find(buckets, name):
|
|||
|
|
index = hash_function(name, len(buckets))
|
|||
|
|
return ll_find(buckets[index], name)
|
|||
|
|
|
|||
|
|
def ht_delete(buckets, name):
|
|||
|
|
index = hash_function(name, len(buckets))
|
|||
|
|
buckets[index] = ll_delete(buckets[index], name)
|
|||
|
|
|
|||
|
|
def ht_list_all(buckets):
|
|||
|
|
records = []
|
|||
|
|
for head in buckets:
|
|||
|
|
curr = head
|
|||
|
|
while curr:
|
|||
|
|
records.append((curr['name'], curr['phone']))
|
|||
|
|
curr = curr['next']
|
|||
|
|
records.sort(key=lambda x: x[0])
|
|||
|
|
return records
|
|||
|
|
|
|||
|
|
def bst_insert_iterative(root, name, phone):
|
|||
|
|
new_node = {'name': name, 'phone': phone, 'left': None, 'right': None}
|
|||
|
|
|
|||
|
|
if root is None:
|
|||
|
|
return new_node
|
|||
|
|
|
|||
|
|
curr = root
|
|||
|
|
while True:
|
|||
|
|
if name < curr['name']:
|
|||
|
|
if curr['left'] is None:
|
|||
|
|
curr['left'] = new_node
|
|||
|
|
break
|
|||
|
|
curr = curr['left']
|
|||
|
|
elif name > curr['name']:
|
|||
|
|
if curr['right'] is None:
|
|||
|
|
curr['right'] = new_node
|
|||
|
|
break
|
|||
|
|
curr = curr['right']
|
|||
|
|
else:
|
|||
|
|
curr['phone'] = phone
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
return root
|
|||
|
|
|
|||
|
|
def bst_find_iterative(root, name):
|
|||
|
|
curr = root
|
|||
|
|
while curr:
|
|||
|
|
if name == curr['name']:
|
|||
|
|
return curr['phone']
|
|||
|
|
elif name < curr['name']:
|
|||
|
|
curr = curr['left']
|
|||
|
|
else:
|
|||
|
|
curr = curr['right']
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
def bst_find_min(node):
|
|||
|
|
while node and node['left']:
|
|||
|
|
node = node['left']
|
|||
|
|
return node
|
|||
|
|
|
|||
|
|
def bst_delete_iterative(root, name):
|
|||
|
|
if root is None:
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
if name < root['name']:
|
|||
|
|
root['left'] = bst_delete_iterative(root['left'], name)
|
|||
|
|
elif name > root['name']:
|
|||
|
|
root['right'] = bst_delete_iterative(root['right'], name)
|
|||
|
|
else:
|
|||
|
|
if root['left'] is None:
|
|||
|
|
return root['right']
|
|||
|
|
elif root['right'] is None:
|
|||
|
|
return root['left']
|
|||
|
|
|
|||
|
|
parent = root
|
|||
|
|
successor = root['right']
|
|||
|
|
while successor['left']:
|
|||
|
|
parent = successor
|
|||
|
|
successor = successor['left']
|
|||
|
|
|
|||
|
|
root['name'] = successor['name']
|
|||
|
|
root['phone'] = successor['phone']
|
|||
|
|
|
|||
|
|
if parent == root:
|
|||
|
|
parent['right'] = successor['right']
|
|||
|
|
else:
|
|||
|
|
parent['left'] = successor['right']
|
|||
|
|
|
|||
|
|
return root
|
|||
|
|
|
|||
|
|
def bst_list_all(root):
|
|||
|
|
result = []
|
|||
|
|
stack = []
|
|||
|
|
curr = root
|
|||
|
|
|
|||
|
|
while stack or curr:
|
|||
|
|
while curr:
|
|||
|
|
stack.append(curr)
|
|||
|
|
curr = curr['left']
|
|||
|
|
curr = stack.pop()
|
|||
|
|
result.append((curr['name'], curr['phone']))
|
|||
|
|
curr = curr['right']
|
|||
|
|
|
|||
|
|
return result
|
|||
|
|
|
|||
|
|
def generate_test_data(N=10000):
|
|||
|
|
names = [f"User_{i:05d}" for i in range(N)]
|
|||
|
|
phones = [f"+7-999-{random.randint(1000000, 9999999)}" for _ in range(N)]
|
|||
|
|
|
|||
|
|
records = list(zip(names, phones))
|
|||
|
|
|
|||
|
|
records_shuffled = records.copy()
|
|||
|
|
random.shuffle(records_shuffled)
|
|||
|
|
|
|||
|
|
records_sorted = sorted(records, key=lambda x: x[0])
|
|||
|
|
|
|||
|
|
return records_shuffled, records_sorted
|
|||
|
|
|
|||
|
|
def measure_insertion(structure_type, records, ht_size=1000):
|
|||
|
|
if structure_type == "LinkedList":
|
|||
|
|
head = None
|
|||
|
|
start = time.perf_counter()
|
|||
|
|
for name, phone in records:
|
|||
|
|
head = ll_insert(head, name, phone)
|
|||
|
|
end = time.perf_counter()
|
|||
|
|
return head, (end - start)
|
|||
|
|
|
|||
|
|
elif structure_type == "HashTable":
|
|||
|
|
buckets = ht_create(ht_size)
|
|||
|
|
start = time.perf_counter()
|
|||
|
|
for name, phone in records:
|
|||
|
|
ht_insert(buckets, name, phone)
|
|||
|
|
end = time.perf_counter()
|
|||
|
|
return buckets, (end - start)
|
|||
|
|
|
|||
|
|
elif structure_type == "BST":
|
|||
|
|
root = None
|
|||
|
|
start = time.perf_counter()
|
|||
|
|
for name, phone in records:
|
|||
|
|
root = bst_insert_iterative(root, name, phone)
|
|||
|
|
end = time.perf_counter()
|
|||
|
|
return root, (end - start)
|
|||
|
|
|
|||
|
|
def measure_search(data_structure, structure_type, existing_names, non_existing_names):
|
|||
|
|
start = time.perf_counter()
|
|||
|
|
for name in existing_names:
|
|||
|
|
if structure_type == "LinkedList":
|
|||
|
|
ll_find(data_structure, name)
|
|||
|
|
elif structure_type == "HashTable":
|
|||
|
|
ht_find(data_structure, name)
|
|||
|
|
elif structure_type == "BST":
|
|||
|
|
bst_find_iterative(data_structure, name)
|
|||
|
|
|
|||
|
|
for name in non_existing_names:
|
|||
|
|
if structure_type == "LinkedList":
|
|||
|
|
ll_find(data_structure, name)
|
|||
|
|
elif structure_type == "HashTable":
|
|||
|
|
ht_find(data_structure, name)
|
|||
|
|
elif structure_type == "BST":
|
|||
|
|
bst_find_iterative(data_structure, name)
|
|||
|
|
end = time.perf_counter()
|
|||
|
|
|
|||
|
|
return end - start
|
|||
|
|
|
|||
|
|
def measure_deletion(data_structure, structure_type, names_to_delete):
|
|||
|
|
start = time.perf_counter()
|
|||
|
|
for name in names_to_delete:
|
|||
|
|
if structure_type == "LinkedList":
|
|||
|
|
data_structure = ll_delete(data_structure, name)
|
|||
|
|
elif structure_type == "HashTable":
|
|||
|
|
ht_delete(data_structure, name)
|
|||
|
|
elif structure_type == "BST":
|
|||
|
|
data_structure = bst_delete_iterative(data_structure, name)
|
|||
|
|
end = time.perf_counter()
|
|||
|
|
|
|||
|
|
return data_structure, (end - start)
|
|||
|
|
|
|||
|
|
def run_experiment(N=5000, repeats=5):
|
|||
|
|
print(f"Генерация тестовых данных (N={N})...")
|
|||
|
|
records_shuffled, records_sorted = generate_test_data(N)
|
|||
|
|
|
|||
|
|
existing_names = [name for name, _ in random.sample(records_shuffled, min(100, N))]
|
|||
|
|
non_existing_names = [f"None_{i}" for i in range(10)]
|
|||
|
|
delete_names = [name for name, _ in random.sample(records_shuffled, min(50, N))]
|
|||
|
|
|
|||
|
|
results = []
|
|||
|
|
|
|||
|
|
structures = ["LinkedList", "HashTable", "BST"]
|
|||
|
|
modes = ["случайный", "отсортированный"]
|
|||
|
|
|
|||
|
|
for struct in structures:
|
|||
|
|
for mode in modes:
|
|||
|
|
records = records_shuffled if mode == "случайный" else records_sorted
|
|||
|
|
|
|||
|
|
print(f"\nТестирование: {struct}, режим: {mode}")
|
|||
|
|
|
|||
|
|
insertion_times = []
|
|||
|
|
search_times = []
|
|||
|
|
deletion_times = []
|
|||
|
|
|
|||
|
|
for rep in range(repeats):
|
|||
|
|
print(f" Повторение {rep+1}/{repeats}...")
|
|||
|
|
|
|||
|
|
data_structure, insert_time = measure_insertion(struct, records)
|
|||
|
|
insertion_times.append(insert_time)
|
|||
|
|
|
|||
|
|
search_time = measure_search(data_structure, struct, existing_names, non_existing_names)
|
|||
|
|
search_times.append(search_time)
|
|||
|
|
|
|||
|
|
data_structure, delete_time = measure_deletion(data_structure, struct, delete_names)
|
|||
|
|
deletion_times.append(delete_time)
|
|||
|
|
|
|||
|
|
avg_insert = sum(insertion_times) / repeats
|
|||
|
|
avg_search = sum(search_times) / repeats
|
|||
|
|
avg_delete = sum(deletion_times) / repeats
|
|||
|
|
|
|||
|
|
results.append({
|
|||
|
|
"structure": struct,
|
|||
|
|
"mode": mode,
|
|||
|
|
"insertion_avg": avg_insert,
|
|||
|
|
"insertion_all": insertion_times,
|
|||
|
|
"search_avg": avg_search,
|
|||
|
|
"search_all": search_times,
|
|||
|
|
"deletion_avg": avg_delete,
|
|||
|
|
"deletion_all": deletion_times
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
print(f" Вставка: {avg_insert:.6f} сек (замеры: {[f'{t:.6f}' for t in insertion_times]})")
|
|||
|
|
print(f" Поиск: {avg_search:.6f} сек (замеры: {[f'{t:.6f}' for t in search_times]})")
|
|||
|
|
print(f" Удаление: {avg_delete:.6f} сек (замеры: {[f'{t:.6f}' for t in deletion_times]})")
|
|||
|
|
|
|||
|
|
return results
|
|||
|
|
|
|||
|
|
def save_results_to_csv(results, filename="results.csv"):
|
|||
|
|
with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
|
|||
|
|
writer = csv.writer(csvfile)
|
|||
|
|
writer.writerow(["Структура", "Режим", "Операция", "Повторение", "Время (сек)"])
|
|||
|
|
|
|||
|
|
for res in results:
|
|||
|
|
struct = res["structure"]
|
|||
|
|
mode = res["mode"]
|
|||
|
|
|
|||
|
|
for i, t in enumerate(res["insertion_all"]):
|
|||
|
|
writer.writerow([struct, mode, "вставка", i+1, t])
|
|||
|
|
writer.writerow([struct, mode, "вставка", "СРЕДНЕЕ", res["insertion_avg"]])
|
|||
|
|
|
|||
|
|
for i, t in enumerate(res["search_all"]):
|
|||
|
|
writer.writerow([struct, mode, "поиск", i+1, t])
|
|||
|
|
writer.writerow([struct, mode, "поиск", "СРЕДНЕЕ", res["search_avg"]])
|
|||
|
|
|
|||
|
|
for i, t in enumerate(res["deletion_all"]):
|
|||
|
|
writer.writerow([struct, mode, "удаление", i+1, t])
|
|||
|
|
writer.writerow([struct, mode, "удаление", "СРЕДНЕЕ", res["deletion_avg"]])
|
|||
|
|
|
|||
|
|
print(f"\nРезультаты сохранены в {filename}")
|
|||
|
|
|
|||
|
|
def print_summary_table(results):
|
|||
|
|
print("\n" + "="*80)
|
|||
|
|
print("СВОДНАЯ ТАБЛИЦА РЕЗУЛЬТАТОВ (среднее время в секундах)")
|
|||
|
|
print("="*80)
|
|||
|
|
print(f"{'Структура':<15} {'Режим':<12} {'Вставка':<12} {'Поиск (110)':<12} {'Удаление (50)':<12}")
|
|||
|
|
print("-"*80)
|
|||
|
|
|
|||
|
|
for res in results:
|
|||
|
|
print(f"{res['structure']:<15} {res['mode']:<12} {res['insertion_avg']:<12.6f} "
|
|||
|
|
f"{res['search_avg']:<12.6f} {res['deletion_avg']:<12.6f}")
|
|||
|
|
|
|||
|
|
print("\n" + "="*80)
|
|||
|
|
print("АНАЛИЗ ДЕГРАДАЦИИ BST")
|
|||
|
|
print("="*80)
|
|||
|
|
|
|||
|
|
bst_random = next(r for r in results if r['structure'] == "BST" and r['mode'] == "случайный")
|
|||
|
|
bst_sorted = next(r for r in results if r['structure'] == "BST" and r['mode'] == "отсортированный")
|
|||
|
|
|
|||
|
|
degradation = bst_sorted['insertion_avg'] / bst_random['insertion_avg']
|
|||
|
|
print(f"BST: отсортированные данные в {degradation:.1f} раз медленнее случайных")
|
|||
|
|
print("Причина: вырождение дерева в линейный связный список (O(n) вместо O(log n))")
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
print("="*80)
|
|||
|
|
print("ЭКСПЕРИМЕНТАЛЬНОЕ СРАВНЕНИЕ СТРУКТУР ДАННЫХ ДЛЯ ТЕЛЕФОННОГО СПРАВОЧНИКА")
|
|||
|
|
print("="*80)
|
|||
|
|
|
|||
|
|
results = run_experiment(N=5000, repeats=5)
|
|||
|
|
|
|||
|
|
save_results_to_csv(results)
|
|||
|
|
|
|||
|
|
print_summary_table(results)
|
|||
|
|
|
|||
|
|
print("\n" + "="*80)
|
|||
|
|
print("ВЫВОДЫ И РЕКОМЕНДАЦИИ")
|
|||
|
|
print("="*80)
|
|||
|
|
print("""
|
|||
|
|
1. Хеш-таблица:
|
|||
|
|
Лучшая производительность для операций поиска и вставки (O(1) в среднем)
|
|||
|
|
Не чувствительна к порядку входных данных
|
|||
|
|
Требует память под массив бакетов
|
|||
|
|
Не поддерживает естественный порядок (нужна сортировка)
|
|||
|
|
Идеально для справочников с частым поиском
|
|||
|
|
|
|||
|
|
2. Двоичное дерево поиска:
|
|||
|
|
Естественная сортировка (in-order обход)
|
|||
|
|
Хорошая производительность на случайных данных (O(log n))
|
|||
|
|
Сильная деградация на отсортированных данных (O(n))
|
|||
|
|
Рекурсивные операции требуют больше памяти
|
|||
|
|
Хорошо для задач, где нужен отсортированный вывод
|
|||
|
|
|
|||
|
|
3. Связный список:
|
|||
|
|
Простота реализации
|
|||
|
|
Медленный поиск и удаление (O(n))
|
|||
|
|
Неэффективен для больших объёмов данных
|
|||
|
|
Применим только для очень маленьких справочников
|
|||
|
|
|
|||
|
|
РЕКОМЕНДАЦИИ ДЛЯ РЕАЛЬНЫХ ЗАДАЧ:
|
|||
|
|
Частый поиск, редкие вставки -> ХЕШ-ТАБЛИЦА
|
|||
|
|
Нужен отсортированный вывод -> ДЕРЕВО (с балансировкой)
|
|||
|
|
Очень маленький справочник (<100 записей) -> СПИСОК
|
|||
|
|
В реальных БД -> хеш-таблица + B-деревья
|
|||
|
|
""")
|
|||
|
|
|
|||
|
|
print("\n" + "="*80)
|
|||
|
|
print("ДОПОЛНИТЕЛЬНЫЙ АНАЛИЗ")
|
|||
|
|
print("="*80)
|
|||
|
|
|
|||
|
|
for struct in ["LinkedList", "HashTable", "BST"]:
|
|||
|
|
res_random = next(r for r in results if r['structure'] == struct and r['mode'] == "случайный")
|
|||
|
|
print(f"{struct:12} поиск 110 записей: {res_random['search_avg']:.6f} сек")
|
|||
|
|
|
|||
|
|
ll_random = next(r for r in results if r['structure'] == "LinkedList" and r['mode'] == "случайный")
|
|||
|
|
ll_sorted = next(r for r in results if r['structure'] == "LinkedList" and r['mode'] == "отсортированный")
|
|||
|
|
print(f"\nСвязный список: деградация {ll_sorted['insertion_avg'] / ll_random['insertion_avg']:.2f}х")
|