616 lines
14 KiB
Python
616 lines
14 KiB
Python
import time
|
|
import random
|
|
import csv
|
|
import os
|
|
import sys
|
|
|
|
import matplotlib.pyplot as plt
|
|
import numpy as np
|
|
|
|
sys.setrecursionlimit(20000)
|
|
|
|
|
|
def ll_insert(head, name, phone):
|
|
current = head
|
|
while current:
|
|
if current["name"] == name:
|
|
current["phone"] = phone
|
|
return head
|
|
current = current["next"]
|
|
return {
|
|
"name": name,
|
|
"phone": phone,
|
|
"next": head
|
|
}
|
|
|
|
def ll_find(head, name):
|
|
current = head
|
|
while current:
|
|
if current["name"] == name:
|
|
return current["phone"]
|
|
current = current["next"]
|
|
return None
|
|
|
|
def ll_delete(head, name):
|
|
if not head:
|
|
return None
|
|
if head["name"] == name:
|
|
return head["next"]
|
|
current = head
|
|
while current["next"]:
|
|
if current["next"]["name"] == name:
|
|
current["next"] = current["next"]["next"]
|
|
return head
|
|
current = current["next"]
|
|
return head
|
|
|
|
def ll_list_all(head):
|
|
records = []
|
|
current = head
|
|
while current:
|
|
records.append(
|
|
(current["name"], current["phone"])
|
|
)
|
|
current = current["next"]
|
|
records.sort(key=lambda x: x[0])
|
|
return records
|
|
|
|
|
|
def hash_function(name, size):
|
|
return sum(ord(c) for c in name) % size
|
|
|
|
def ht_create(size=2000):
|
|
return [None] * size
|
|
|
|
def ht_insert(buckets, name, phone):
|
|
index = hash_function(name, len(buckets))
|
|
buckets[index] = ll_insert(
|
|
buckets[index],
|
|
name,
|
|
phone
|
|
)
|
|
|
|
def ht_find(buckets, name):
|
|
index = hash_function(name, len(buckets))
|
|
return ll_find(
|
|
buckets[index],
|
|
name
|
|
)
|
|
|
|
def ht_delete(buckets, name):
|
|
index = hash_function(name, len(buckets))
|
|
buckets[index] = ll_delete(
|
|
buckets[index],
|
|
name
|
|
)
|
|
|
|
def ht_list_all(buckets):
|
|
records = []
|
|
for bucket in buckets:
|
|
current = bucket
|
|
while current:
|
|
records.append(
|
|
(current["name"], current["phone"])
|
|
)
|
|
|
|
current = current["next"]
|
|
|
|
records.sort(key=lambda x: x[0])
|
|
|
|
return records
|
|
|
|
|
|
def bst_insert(root, name, phone):
|
|
|
|
new_node = {
|
|
"name": name,
|
|
"phone": phone,
|
|
"left": None,
|
|
"right": None
|
|
}
|
|
|
|
if root is None:
|
|
return new_node
|
|
|
|
current = root
|
|
|
|
while True:
|
|
|
|
if name < current["name"]:
|
|
|
|
if current["left"] is None:
|
|
current["left"] = new_node
|
|
break
|
|
|
|
current = current["left"]
|
|
|
|
elif name > current["name"]:
|
|
if current["right"] is None:
|
|
current["right"] = new_node
|
|
break
|
|
current = current["right"]
|
|
|
|
else:
|
|
current["phone"] = phone
|
|
break
|
|
|
|
return root
|
|
|
|
|
|
def bst_find(root, name):
|
|
|
|
current = root
|
|
|
|
while current:
|
|
if name == current["name"]:
|
|
return current["phone"]
|
|
|
|
if name < current["name"]:
|
|
current = current["left"]
|
|
|
|
else:
|
|
current = current["right"]
|
|
|
|
return None
|
|
|
|
|
|
def bst_find_min(node):
|
|
current = node
|
|
while current["left"]:
|
|
current = current["left"]
|
|
return current
|
|
|
|
|
|
def bst_delete(root, name):
|
|
|
|
if root is None:
|
|
return None
|
|
|
|
if name < root["name"]:
|
|
root["left"] = bst_delete(
|
|
root["left"],
|
|
name
|
|
)
|
|
|
|
elif name > root["name"]:
|
|
root["right"] = bst_delete(
|
|
root["right"],
|
|
name
|
|
)
|
|
|
|
else:
|
|
if root["left"] is None:
|
|
return root["right"]
|
|
if root["right"] is None:
|
|
return root["left"]
|
|
min_node = bst_find_min(root["right"])
|
|
|
|
root["name"] = min_node["name"]
|
|
root["phone"] = min_node["phone"]
|
|
|
|
root["right"] = bst_delete(
|
|
root["right"],
|
|
min_node["name"]
|
|
)
|
|
|
|
return root
|
|
|
|
|
|
def bst_list_all(root):
|
|
records = []
|
|
stack = []
|
|
current = root
|
|
while stack or current:
|
|
while current:
|
|
stack.append(current)
|
|
current = current["left"]
|
|
|
|
current = stack.pop()
|
|
|
|
records.append(
|
|
(current["name"], current["phone"])
|
|
)
|
|
|
|
current = current["right"]
|
|
return records
|
|
|
|
def copy_linked_list(head):
|
|
if not head:
|
|
return None
|
|
|
|
new_head = {
|
|
"name": head["name"],
|
|
"phone": head["phone"],
|
|
"next": None
|
|
}
|
|
|
|
current_new = new_head
|
|
current_old = head["next"]
|
|
|
|
while current_old:
|
|
|
|
current_new["next"] = {
|
|
"name": current_old["name"],
|
|
"phone": current_old["phone"],
|
|
"next": None
|
|
}
|
|
|
|
current_new = current_new["next"]
|
|
current_old = current_old["next"]
|
|
|
|
return new_head
|
|
|
|
|
|
def copy_bst(node):
|
|
|
|
if not node:
|
|
return None
|
|
|
|
return {
|
|
"name": node["name"],
|
|
"phone": node["phone"],
|
|
"left": copy_bst(node["left"]),
|
|
"right": copy_bst(node["right"])
|
|
}
|
|
|
|
|
|
def generate_test_data(N=10000):
|
|
|
|
records = []
|
|
|
|
for i in range(N):
|
|
name = f"User_{i:05d}"
|
|
phone = f"+7-999-{random.randint(1000000, 9999999)}"
|
|
records.append((name, phone))
|
|
records_shuffled = records.copy()
|
|
random.shuffle(records_shuffled)
|
|
records_sorted = sorted(records)
|
|
return records_shuffled, records_sorted
|
|
|
|
|
|
def get_test_queries(records):
|
|
|
|
existing = random.sample(records, 100)
|
|
existing_names = [name for name, _ in existing]
|
|
missing_names = [
|
|
f"None_{i:05d}"
|
|
for i in range(10)
|
|
]
|
|
queries = existing_names + missing_names
|
|
random.shuffle(queries)
|
|
return queries
|
|
|
|
|
|
def get_delete_names(records):
|
|
selected = random.sample(records, 50)
|
|
return [name for name, _ in selected]
|
|
|
|
|
|
def measure_insertion(structure_type, records, repeats=5):
|
|
|
|
times = []
|
|
for _ in range(repeats):
|
|
if structure_type == "LinkedList":
|
|
structure = None
|
|
elif structure_type == "HashTable":
|
|
structure = ht_create()
|
|
|
|
else:
|
|
structure = None
|
|
start = time.perf_counter()
|
|
for name, phone in records:
|
|
if structure_type == "LinkedList":
|
|
structure = ll_insert(
|
|
structure,
|
|
name,
|
|
phone
|
|
)
|
|
|
|
elif structure_type == "HashTable":
|
|
ht_insert(
|
|
structure,
|
|
name,
|
|
phone
|
|
)
|
|
|
|
else:
|
|
structure = bst_insert(
|
|
structure,
|
|
name,
|
|
phone
|
|
)
|
|
end = time.perf_counter()
|
|
times.append(end - start)
|
|
return times
|
|
|
|
|
|
|
|
def measure_search(
|
|
structure_type,
|
|
structure,
|
|
queries,
|
|
repeats=5
|
|
):
|
|
|
|
times = []
|
|
|
|
for _ in range(repeats):
|
|
start = time.perf_counter()
|
|
for name in queries:
|
|
if structure_type == "LinkedList":
|
|
ll_find(structure, name)
|
|
elif structure_type == "HashTable":
|
|
ht_find(structure, name)
|
|
|
|
else:
|
|
|
|
bst_find(structure, name)
|
|
end = time.perf_counter()
|
|
times.append(end - start)
|
|
return times
|
|
|
|
|
|
def measure_deletion(
|
|
structure_type,
|
|
structure,
|
|
delete_names,
|
|
repeats=5
|
|
):
|
|
|
|
times = []
|
|
|
|
for _ in range(repeats):
|
|
if structure_type == "LinkedList":
|
|
temp = copy_linked_list(structure)
|
|
elif structure_type == "HashTable":
|
|
temp = structure.copy()
|
|
for i in range(len(temp)):
|
|
if temp[i]:
|
|
temp[i] = copy_linked_list(temp[i])
|
|
|
|
else:
|
|
temp = copy_bst(structure)
|
|
start = time.perf_counter()
|
|
for name in delete_names:
|
|
if structure_type == "LinkedList":
|
|
temp = ll_delete(temp, name)
|
|
elif structure_type == "HashTable":
|
|
ht_delete(temp, name)
|
|
else:
|
|
temp = bst_delete(temp, name)
|
|
end = time.perf_counter()
|
|
times.append(end - start)
|
|
return times
|
|
|
|
def build_structure(structure_type, records):
|
|
if structure_type == "LinkedList":
|
|
structure = None
|
|
for name, phone in records:
|
|
structure = ll_insert(
|
|
structure,
|
|
name,
|
|
phone
|
|
)
|
|
|
|
elif structure_type == "HashTable":
|
|
structure = ht_create()
|
|
for name, phone in records:
|
|
ht_insert(
|
|
structure,
|
|
name,
|
|
phone
|
|
)
|
|
|
|
else:
|
|
structure = None
|
|
for name, phone in records:
|
|
structure = bst_insert(
|
|
structure,
|
|
name,
|
|
phone
|
|
)
|
|
return structure
|
|
|
|
|
|
def run_experiment(N=10000):
|
|
records_shuffled, records_sorted = generate_test_data(N)
|
|
queries = get_test_queries(records_shuffled)
|
|
delete_names = get_delete_names(records_shuffled)
|
|
structures = [
|
|
"LinkedList",
|
|
"HashTable",
|
|
"BST"
|
|
]
|
|
modes = [
|
|
("random", records_shuffled),
|
|
("sorted", records_sorted)
|
|
]
|
|
results = []
|
|
for structure in structures:
|
|
for mode_name, records in modes:
|
|
insert_times = measure_insertion(
|
|
structure,
|
|
records
|
|
)
|
|
final_structure = build_structure(
|
|
structure,
|
|
records
|
|
)
|
|
search_times = measure_search(
|
|
structure,
|
|
final_structure,
|
|
queries
|
|
)
|
|
delete_times = measure_deletion(
|
|
structure,
|
|
final_structure,
|
|
delete_names
|
|
)
|
|
results.append({
|
|
"Structure": structure,
|
|
"Mode": mode_name,
|
|
"Insert": insert_times,
|
|
"Search": search_times,
|
|
"Delete": delete_times,
|
|
"AvgInsert":
|
|
sum(insert_times) / len(insert_times),
|
|
"AvgSearch":
|
|
sum(search_times) / len(search_times),
|
|
"AvgDelete":
|
|
sum(delete_times) / len(delete_times)
|
|
})
|
|
|
|
return results
|
|
|
|
def save_to_csv(results):
|
|
os.makedirs("docs", exist_ok=True)
|
|
with open(
|
|
"docs/results.csv",
|
|
"w",
|
|
newline="",
|
|
encoding="utf-8"
|
|
) as file:
|
|
writer = csv.writer(file)
|
|
writer.writerow([
|
|
"Structure",
|
|
"Mode",
|
|
"Operation",
|
|
"Run1",
|
|
"Run2",
|
|
"Run3",
|
|
"Run4",
|
|
"Run5",
|
|
"Average"
|
|
])
|
|
|
|
for result in results:
|
|
writer.writerow([
|
|
result["Structure"],
|
|
result["Mode"],
|
|
"Insert",
|
|
*[f"{x:.6f}" for x in result["Insert"]],
|
|
f"{result['AvgInsert']:.6f}"
|
|
])
|
|
writer.writerow([
|
|
result["Structure"],
|
|
result["Mode"],
|
|
"Search",
|
|
*[f"{x:.6f}" for x in result["Search"]],
|
|
f"{result['AvgSearch']:.6f}"
|
|
])
|
|
writer.writerow([
|
|
result["Structure"],
|
|
result["Mode"],
|
|
"Delete",
|
|
*[f"{x:.6f}" for x in result["Delete"]],
|
|
f"{result['AvgDelete']:.6f}"
|
|
])
|
|
|
|
|
|
def plot_results(results):
|
|
structures = [
|
|
"LinkedList",
|
|
"HashTable",
|
|
"BST"
|
|
]
|
|
operations = [
|
|
"AvgInsert",
|
|
"AvgSearch",
|
|
"AvgDelete"
|
|
]
|
|
titles = [
|
|
"Insert",
|
|
"Search",
|
|
"Delete"
|
|
]
|
|
fig, axes = plt.subplots(
|
|
1,
|
|
3,
|
|
figsize=(18, 6)
|
|
)
|
|
for ax, operation, title in zip(
|
|
axes,
|
|
operations,
|
|
titles
|
|
):
|
|
x = np.arange(len(structures))
|
|
width = 0.35
|
|
random_vals = []
|
|
sorted_vals = []
|
|
for structure in structures:
|
|
for result in results:
|
|
if (
|
|
result["Structure"] == structure
|
|
and result["Mode"] == "random"
|
|
):
|
|
random_vals.append(
|
|
result[operation]
|
|
)
|
|
if (
|
|
result["Structure"] == structure
|
|
and result["Mode"] == "sorted"
|
|
):
|
|
sorted_vals.append(
|
|
result[operation]
|
|
)
|
|
|
|
ax.bar(
|
|
x - width / 2,
|
|
random_vals,
|
|
width,
|
|
label="Random"
|
|
)
|
|
ax.bar(
|
|
x + width / 2,
|
|
sorted_vals,
|
|
width,
|
|
label="Sorted"
|
|
)
|
|
ax.set_xticks(x)
|
|
ax.set_xticklabels(structures)
|
|
ax.set_ylabel("Time (sec)")
|
|
ax.set_title(title)
|
|
ax.legend()
|
|
ax.grid(True)
|
|
plt.tight_layout()
|
|
plt.savefig(
|
|
"docs/performance_comparison.png",
|
|
dpi=300
|
|
)
|
|
plt.show()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
print("\nTesting data structures...\n")
|
|
results = run_experiment(N=10000)
|
|
save_to_csv(results)
|
|
plot_results(results)
|
|
print("\nResults saved:")
|
|
print("docs/results.csv")
|
|
print("docs/performance_comparison.png")
|
|
print("\nConclusions:\n")
|
|
print(
|
|
"1. LinkedList is the slowest structure "
|
|
"for searching."
|
|
)
|
|
|
|
print(
|
|
"2. HashTable shows the best "
|
|
"search performance."
|
|
)
|
|
|
|
print(
|
|
"3. BST works well on random data."
|
|
)
|
|
|
|
print(
|
|
"4. Sorted data causes BST degradation."
|
|
)
|
|
|
|
print(
|
|
"5. HashTable is best for frequent search."
|
|
)
|
|
|
|
print(
|
|
"6. BST is useful for ordered data."
|
|
) |