2026-rff_mp/YaroslavtsevAS/lab1.py

616 lines
14 KiB
Python
Raw Normal View History

import time
import random
import csv
import os
import sys
import matplotlib.pyplot as plt
import numpy as np
sys.setrecursionlimit(20000)
def ll_insert(head, name, phone):
current = head
while current:
if current["name"] == name:
current["phone"] = phone
return head
current = current["next"]
return {
"name": name,
"phone": phone,
"next": head
}
def ll_find(head, name):
current = head
while current:
if current["name"] == name:
return current["phone"]
current = current["next"]
return None
def ll_delete(head, name):
if not head:
return None
if head["name"] == name:
return head["next"]
current = head
while current["next"]:
if current["next"]["name"] == name:
current["next"] = current["next"]["next"]
return head
current = current["next"]
return head
def ll_list_all(head):
records = []
current = head
while current:
records.append(
(current["name"], current["phone"])
)
current = current["next"]
records.sort(key=lambda x: x[0])
return records
def hash_function(name, size):
return sum(ord(c) for c in name) % size
def ht_create(size=2000):
return [None] * size
def ht_insert(buckets, name, phone):
index = hash_function(name, len(buckets))
buckets[index] = ll_insert(
buckets[index],
name,
phone
)
def ht_find(buckets, name):
index = hash_function(name, len(buckets))
return ll_find(
buckets[index],
name
)
def ht_delete(buckets, name):
index = hash_function(name, len(buckets))
buckets[index] = ll_delete(
buckets[index],
name
)
def ht_list_all(buckets):
records = []
for bucket in buckets:
current = bucket
while current:
records.append(
(current["name"], current["phone"])
)
current = current["next"]
records.sort(key=lambda x: x[0])
return records
def bst_insert(root, name, phone):
new_node = {
"name": name,
"phone": phone,
"left": None,
"right": None
}
if root is None:
return new_node
current = root
while True:
if name < current["name"]:
if current["left"] is None:
current["left"] = new_node
break
current = current["left"]
elif name > current["name"]:
if current["right"] is None:
current["right"] = new_node
break
current = current["right"]
else:
current["phone"] = phone
break
return root
def bst_find(root, name):
current = root
while current:
if name == current["name"]:
return current["phone"]
if name < current["name"]:
current = current["left"]
else:
current = current["right"]
return None
def bst_find_min(node):
current = node
while current["left"]:
current = current["left"]
return current
def bst_delete(root, name):
if root is None:
return None
if name < root["name"]:
root["left"] = bst_delete(
root["left"],
name
)
elif name > root["name"]:
root["right"] = bst_delete(
root["right"],
name
)
else:
if root["left"] is None:
return root["right"]
if root["right"] is None:
return root["left"]
min_node = bst_find_min(root["right"])
root["name"] = min_node["name"]
root["phone"] = min_node["phone"]
root["right"] = bst_delete(
root["right"],
min_node["name"]
)
return root
def bst_list_all(root):
records = []
stack = []
current = root
while stack or current:
while current:
stack.append(current)
current = current["left"]
current = stack.pop()
records.append(
(current["name"], current["phone"])
)
current = current["right"]
return records
def copy_linked_list(head):
if not head:
return None
new_head = {
"name": head["name"],
"phone": head["phone"],
"next": None
}
current_new = new_head
current_old = head["next"]
while current_old:
current_new["next"] = {
"name": current_old["name"],
"phone": current_old["phone"],
"next": None
}
current_new = current_new["next"]
current_old = current_old["next"]
return new_head
def copy_bst(node):
if not node:
return None
return {
"name": node["name"],
"phone": node["phone"],
"left": copy_bst(node["left"]),
"right": copy_bst(node["right"])
}
def generate_test_data(N=10000):
records = []
for i in range(N):
name = f"User_{i:05d}"
phone = f"+7-999-{random.randint(1000000, 9999999)}"
records.append((name, phone))
records_shuffled = records.copy()
random.shuffle(records_shuffled)
records_sorted = sorted(records)
return records_shuffled, records_sorted
def get_test_queries(records):
existing = random.sample(records, 100)
existing_names = [name for name, _ in existing]
missing_names = [
f"None_{i:05d}"
for i in range(10)
]
queries = existing_names + missing_names
random.shuffle(queries)
return queries
def get_delete_names(records):
selected = random.sample(records, 50)
return [name for name, _ in selected]
def measure_insertion(structure_type, records, repeats=5):
times = []
for _ in range(repeats):
if structure_type == "LinkedList":
structure = None
elif structure_type == "HashTable":
structure = ht_create()
else:
structure = None
start = time.perf_counter()
for name, phone in records:
if structure_type == "LinkedList":
structure = ll_insert(
structure,
name,
phone
)
elif structure_type == "HashTable":
ht_insert(
structure,
name,
phone
)
else:
structure = bst_insert(
structure,
name,
phone
)
end = time.perf_counter()
times.append(end - start)
return times
def measure_search(
structure_type,
structure,
queries,
repeats=5
):
times = []
for _ in range(repeats):
start = time.perf_counter()
for name in queries:
if structure_type == "LinkedList":
ll_find(structure, name)
elif structure_type == "HashTable":
ht_find(structure, name)
else:
bst_find(structure, name)
end = time.perf_counter()
times.append(end - start)
return times
def measure_deletion(
structure_type,
structure,
delete_names,
repeats=5
):
times = []
for _ in range(repeats):
if structure_type == "LinkedList":
temp = copy_linked_list(structure)
elif structure_type == "HashTable":
temp = structure.copy()
for i in range(len(temp)):
if temp[i]:
temp[i] = copy_linked_list(temp[i])
else:
temp = copy_bst(structure)
start = time.perf_counter()
for name in delete_names:
if structure_type == "LinkedList":
temp = ll_delete(temp, name)
elif structure_type == "HashTable":
ht_delete(temp, name)
else:
temp = bst_delete(temp, name)
end = time.perf_counter()
times.append(end - start)
return times
def build_structure(structure_type, records):
if structure_type == "LinkedList":
structure = None
for name, phone in records:
structure = ll_insert(
structure,
name,
phone
)
elif structure_type == "HashTable":
structure = ht_create()
for name, phone in records:
ht_insert(
structure,
name,
phone
)
else:
structure = None
for name, phone in records:
structure = bst_insert(
structure,
name,
phone
)
return structure
def run_experiment(N=10000):
records_shuffled, records_sorted = generate_test_data(N)
queries = get_test_queries(records_shuffled)
delete_names = get_delete_names(records_shuffled)
structures = [
"LinkedList",
"HashTable",
"BST"
]
modes = [
("random", records_shuffled),
("sorted", records_sorted)
]
results = []
for structure in structures:
for mode_name, records in modes:
insert_times = measure_insertion(
structure,
records
)
final_structure = build_structure(
structure,
records
)
search_times = measure_search(
structure,
final_structure,
queries
)
delete_times = measure_deletion(
structure,
final_structure,
delete_names
)
results.append({
"Structure": structure,
"Mode": mode_name,
"Insert": insert_times,
"Search": search_times,
"Delete": delete_times,
"AvgInsert":
sum(insert_times) / len(insert_times),
"AvgSearch":
sum(search_times) / len(search_times),
"AvgDelete":
sum(delete_times) / len(delete_times)
})
return results
def save_to_csv(results):
os.makedirs("docs", exist_ok=True)
with open(
"docs/results.csv",
"w",
newline="",
encoding="utf-8"
) as file:
writer = csv.writer(file)
writer.writerow([
"Structure",
"Mode",
"Operation",
"Run1",
"Run2",
"Run3",
"Run4",
"Run5",
"Average"
])
for result in results:
writer.writerow([
result["Structure"],
result["Mode"],
"Insert",
*[f"{x:.6f}" for x in result["Insert"]],
f"{result['AvgInsert']:.6f}"
])
writer.writerow([
result["Structure"],
result["Mode"],
"Search",
*[f"{x:.6f}" for x in result["Search"]],
f"{result['AvgSearch']:.6f}"
])
writer.writerow([
result["Structure"],
result["Mode"],
"Delete",
*[f"{x:.6f}" for x in result["Delete"]],
f"{result['AvgDelete']:.6f}"
])
def plot_results(results):
structures = [
"LinkedList",
"HashTable",
"BST"
]
operations = [
"AvgInsert",
"AvgSearch",
"AvgDelete"
]
titles = [
"Insert",
"Search",
"Delete"
]
fig, axes = plt.subplots(
1,
3,
figsize=(18, 6)
)
for ax, operation, title in zip(
axes,
operations,
titles
):
x = np.arange(len(structures))
width = 0.35
random_vals = []
sorted_vals = []
for structure in structures:
for result in results:
if (
result["Structure"] == structure
and result["Mode"] == "random"
):
random_vals.append(
result[operation]
)
if (
result["Structure"] == structure
and result["Mode"] == "sorted"
):
sorted_vals.append(
result[operation]
)
ax.bar(
x - width / 2,
random_vals,
width,
label="Random"
)
ax.bar(
x + width / 2,
sorted_vals,
width,
label="Sorted"
)
ax.set_xticks(x)
ax.set_xticklabels(structures)
ax.set_ylabel("Time (sec)")
ax.set_title(title)
ax.legend()
ax.grid(True)
plt.tight_layout()
plt.savefig(
"docs/performance_comparison.png",
dpi=300
)
plt.show()
if __name__ == "__main__":
print("\nTesting data structures...\n")
results = run_experiment(N=10000)
save_to_csv(results)
plot_results(results)
print("\nResults saved:")
print("docs/results.csv")
print("docs/performance_comparison.png")
print("\nConclusions:\n")
print(
"1. LinkedList is the slowest structure "
"for searching."
)
print(
"2. HashTable shows the best "
"search performance."
)
print(
"3. BST works well on random data."
)
print(
"4. Sorted data causes BST degradation."
)
print(
"5. HashTable is best for frequent search."
)
print(
"6. BST is useful for ordered data."
)