2026-rff_mp/novikovsd/hashtab.py
novikovsd 52a5aa8fc6 FINISH
создана функция, проводящая замеры, функция сохраняющая замеры
2026-05-24 13:34:12 +00:00

306 lines
9.1 KiB
Python

import time
import random
import csv
import os
import sys
sys.setrecursionlimit(30000)
def ll_insert(head, name, phone):
curr = head
while curr is not None:
if curr['name'] == name:
curr['phone'] = phone
return head
curr = curr['next']
new_node = {'name': name, 'phone': phone, 'next': head}
return new_node
def ll_find(head, name):
curr = head
while curr is not None:
if curr['name'] == name:
return curr['phone']
curr = curr['next']
return None
def ll_delete(head, name):
if head is None:
return None
if head['name'] == name:
return head['next']
prev = head
curr = head['next']
while curr is not None:
if curr['name'] == name:
prev['next'] = curr['next']
return head
prev = curr
curr = curr['next']
return head
def ll_list_all(head):
entries = []
curr = head
while curr is not None:
entries.append((curr['name'], curr['phone']))
curr = curr['next']
entries.sort(key=lambda x: x[0])
return entries
def _hash(name, bucket_count):
h = 0
for ch in name:
h = (h * 31 + ord(ch)) % bucket_count
return h
def ht_create(bucket_count=2000):
return [None] * bucket_count
def ht_insert(buckets, name, phone):
idx = _hash(name, len(buckets))
buckets[idx] = ll_insert(buckets[idx], name, phone)
def ht_find(buckets, name):
idx = _hash(name, len(buckets))
return ll_find(buckets[idx], name)
def ht_delete(buckets, name):
idx = _hash(name, len(buckets))
buckets[idx] = ll_delete(buckets[idx], name)
def ht_list_all(buckets):
entries = []
for head in buckets:
curr = head
while curr is not None:
entries.append((curr['name'], curr['phone']))
curr = curr['next']
entries.sort(key=lambda x: x[0])
return entries
def bst_insert(root, name, phone):
new_node = {'name': name, 'phone': phone, 'left': None, 'right': None}
if root is None:
return new_node
parent = None
curr = root
while curr is not None:
parent = curr
if name < curr['name']:
curr = curr['left']
elif name > curr['name']:
curr = curr['right']
else:
curr['phone'] = phone
return root
if name < parent['name']:
parent['left'] = new_node
else:
parent['right'] = new_node
return root
def bst_find(root, name):
while root is not None:
if name == root['name']:
return root['phone']
elif name < root['name']:
root = root['left']
else:
root = root['right']
return None
def _bst_min_node(node):
while node and node['left'] is not None:
node = node['left']
return node
def bst_delete(root, name):
if root is None:
return None
if name < root['name']:
root['left'] = bst_delete(root['left'], name)
elif name > root['name']:
root['right'] = bst_delete(root['right'], name)
else:
if root['left'] is None:
return root['right']
if root['right'] is None:
return root['left']
min_node = _bst_min_node(root['right'])
root['name'] = min_node['name']
root['phone'] = min_node['phone']
root['right'] = bst_delete(root['right'], min_node['name'])
return root
def bst_list_all(root):
def inorder(node, res):
if node is None:
return
inorder(node['left'], res)
res.append((node['name'], node['phone']))
inorder(node['right'], res)
result = []
inorder(root, result)
return result
def generate_test_data(n=10000):
records = [(f"User_{i:05d}", f"+7-999-{i:05d}") for i in range(n)]
records_sorted = records[:]
records_shuffled = records[:]
random.shuffle(records_shuffled)
return records_sorted, records_shuffled
def measure_insert(struct_name, records):
start = time.perf_counter()
if struct_name == "LinkedList":
head = None
for name, phone in records:
head = ll_insert(head, name, phone)
obj = head
elif struct_name == "HashTable":
buckets = ht_create(bucket_count=2000)
for name, phone in records:
ht_insert(buckets, name, phone)
obj = buckets
elif struct_name == "BST":
root = None
for name, phone in records:
root = bst_insert(root, name, phone)
obj = root
else:
raise ValueError(f"Unknown structure: {struct_name}")
elapsed = time.perf_counter() - start
return elapsed, obj
def measure_find(obj, struct_name, existing_names, nonexisting_names):
start = time.perf_counter()
for name in existing_names:
if struct_name == "LinkedList":
ll_find(obj, name)
elif struct_name == "HashTable":
ht_find(obj, name)
else:
bst_find(obj, name)
for name in nonexisting_names:
if struct_name == "LinkedList":
ll_find(obj, name)
elif struct_name == "HashTable":
ht_find(obj, name)
else:
bst_find(obj, name)
return time.perf_counter() - start
def measure_delete(obj, struct_name, names_to_delete):
start = time.perf_counter()
if struct_name == "LinkedList":
for name in names_to_delete:
obj = ll_delete(obj, name)
elif struct_name == "HashTable":
for name in names_to_delete:
ht_delete(obj, name)
else:
for name in names_to_delete:
obj = bst_delete(obj, name)
elapsed = time.perf_counter() - start
return elapsed, obj
def run_experiment(n=10000, repeats=5):
records_sorted, records_shuffled = generate_test_data(n)
existing_names = [name for name, _ in records_sorted[:100]]
nonexisting_names = [f"None_{i}" for i in range(10)]
all_names = [name for name, _ in records_sorted]
structures = ["LinkedList", "HashTable", "BST"]
modes = [("shuffled", records_shuffled), ("sorted", records_sorted)]
results = []
for struct_name in structures:
for mode_name, records in modes:
for rep in range(repeats):
insert_time, obj = measure_insert(struct_name, records)
results.append([struct_name, mode_name, "insert", rep+1, insert_time])
find_time = measure_find(obj, struct_name, existing_names, nonexisting_names)
results.append([struct_name, mode_name, "find", rep+1, find_time])
random.seed(rep)
to_delete = random.sample(all_names, 50)
delete_time, obj = measure_delete(obj, struct_name, to_delete)
results.append([struct_name, mode_name, "delete", rep+1, delete_time])
return results
def save_results_to_csv(results, filename="docs/data/results.csv"):
os.makedirs(os.path.dirname(filename), exist_ok=True)
with open(filename, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(["Structure", "Mode", "Operation", "Repeat", "Time_sec"])
writer.writerows(results)
print(f"Результаты сохранены в {filename}")
def aggregate_results(results):
from collections import defaultdict
agg = defaultdict(list)
for row in results:
struct, mode, op, rep, t = row
agg[(struct, mode, op)].append(t)
means = {k: sum(v)/len(v) for k, v in agg.items()}
return means
def plot_results(means, output_dir="docs"):
try:
import matplotlib.pyplot as plt
import numpy as np
except ImportError:
print("Matplotlib не установлен. Графики не построены.")
return
operations = ["insert", "find", "delete"]
structures = ["LinkedList", "HashTable", "BST"]
modes = ["shuffled", "sorted"]
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
for idx, op in enumerate(operations):
ax = axes[idx]
x = np.arange(len(structures))
width = 0.35
shuffled_means = [means.get((struct, "shuffled", op), 0) for struct in structures]
sorted_means = [means.get((struct, "sorted", op), 0) for struct in structures]
ax.bar(x - width/2, shuffled_means, width, label='случайный порядок', color='skyblue')
ax.bar(x + width/2, sorted_means, width, label='отсортированный порядок', color='salmon')
ax.set_xticks(x)
ax.set_xticklabels(structures, rotation=15)
ax.set_ylabel('Время (сек)')
ax.set_title(f'{op.upper()}')
ax.legend()
ax.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.savefig(os.path.join(output_dir, "comparison.png"), dpi=150)
plt.show()
if __name__ == "__main__":
results = run_experiment(n=10000, repeats=5)
save_results_to_csv(results)
means = aggregate_results(results)
print("\nСреднее время по операциям (сек):")
for (struct, mode, op), t in sorted(means.items()):
print(f"{struct:12} {mode:8} {op:6} : {t:.6f}")
plot_results(means)