diff --git a/YaroslavtsevAS/docs/ReportLab1.docx b/YaroslavtsevAS/docs/ReportLab1.docx new file mode 100644 index 0000000..ee6f2f1 Binary files /dev/null and b/YaroslavtsevAS/docs/ReportLab1.docx differ diff --git a/YaroslavtsevAS/docs/performance_comparison.png b/YaroslavtsevAS/docs/performance_comparison.png new file mode 100644 index 0000000..609cfef Binary files /dev/null and b/YaroslavtsevAS/docs/performance_comparison.png differ diff --git a/YaroslavtsevAS/docs/results.csv b/YaroslavtsevAS/docs/results.csv new file mode 100644 index 0000000..e820e6a --- /dev/null +++ b/YaroslavtsevAS/docs/results.csv @@ -0,0 +1,19 @@ +Structure,Mode,Operation,Run1,Run2,Run3,Run4,Run5,Average +LinkedList,random,Insert,7.614908,8.007141,8.790796,9.036304,9.488376,8.587505 +LinkedList,random,Search,0.084409,0.086699,0.114697,0.095945,0.220154,0.120381 +LinkedList,random,Delete,0.116604,0.110528,0.109806,0.113804,0.050557,0.100260 +LinkedList,sorted,Insert,9.123691,10.006184,8.030617,9.528127,7.143701,8.766464 +LinkedList,sorted,Search,0.119573,0.094237,0.082494,0.104148,0.043972,0.088885 +LinkedList,sorted,Delete,0.023634,0.023499,0.023649,0.023649,0.023822,0.023651 +HashTable,random,Insert,0.397523,0.339688,0.343173,0.445151,0.352442,0.375595 +HashTable,random,Search,0.009526,0.008965,0.009936,0.008976,0.008971,0.009275 +HashTable,random,Delete,0.004966,0.004936,0.004983,0.005517,0.005983,0.005277 +HashTable,sorted,Insert,0.935748,0.769911,0.394893,0.332380,0.302012,0.546989 +HashTable,sorted,Search,0.005751,0.005670,0.005420,0.005354,0.005321,0.005503 +HashTable,sorted,Delete,0.003136,0.003161,0.003117,0.003158,0.003138,0.003142 +BST,random,Insert,0.046068,0.046076,0.042452,0.023921,0.023297,0.036363 +BST,random,Search,0.000274,0.000229,0.000228,0.000226,0.000226,0.000236 +BST,random,Delete,0.000362,0.000344,0.000343,0.000336,0.000347,0.000346 +BST,sorted,Insert,14.359368,13.962086,16.018128,14.777863,15.188039,14.861097 +BST,sorted,Search,0.056389,0.053998,0.057459,0.087951,0.084866,0.068133 +BST,sorted,Delete,0.070368,0.066086,0.069282,0.066915,0.071834,0.068897 diff --git a/YaroslavtsevAS/lab1.py b/YaroslavtsevAS/lab1.py new file mode 100644 index 0000000..39cdf7c --- /dev/null +++ b/YaroslavtsevAS/lab1.py @@ -0,0 +1,616 @@ +import time +import random +import csv +import os +import sys + +import matplotlib.pyplot as plt +import numpy as np + +sys.setrecursionlimit(20000) + + +def ll_insert(head, name, phone): + current = head + while current: + if current["name"] == name: + current["phone"] = phone + return head + current = current["next"] + return { + "name": name, + "phone": phone, + "next": head + } + +def ll_find(head, name): + current = head + while current: + if current["name"] == name: + return current["phone"] + current = current["next"] + return None + +def ll_delete(head, name): + if not head: + return None + if head["name"] == name: + return head["next"] + current = head + while current["next"]: + if current["next"]["name"] == name: + current["next"] = current["next"]["next"] + return head + current = current["next"] + return head + +def ll_list_all(head): + records = [] + current = head + while current: + records.append( + (current["name"], current["phone"]) + ) + current = current["next"] + records.sort(key=lambda x: x[0]) + return records + + +def hash_function(name, size): + return sum(ord(c) for c in name) % size + +def ht_create(size=2000): + return [None] * size + +def ht_insert(buckets, name, phone): + index = hash_function(name, len(buckets)) + buckets[index] = ll_insert( + buckets[index], + name, + phone + ) + +def ht_find(buckets, name): + index = hash_function(name, len(buckets)) + return ll_find( + buckets[index], + name + ) + +def ht_delete(buckets, name): + index = hash_function(name, len(buckets)) + buckets[index] = ll_delete( + buckets[index], + name + ) + +def ht_list_all(buckets): + records = [] + for bucket in buckets: + current = bucket + while current: + records.append( + (current["name"], current["phone"]) + ) + + current = current["next"] + + records.sort(key=lambda x: x[0]) + + return records + + +def bst_insert(root, name, phone): + + new_node = { + "name": name, + "phone": phone, + "left": None, + "right": None + } + + if root is None: + return new_node + + current = root + + while True: + + if name < current["name"]: + + if current["left"] is None: + current["left"] = new_node + break + + current = current["left"] + + elif name > current["name"]: + if current["right"] is None: + current["right"] = new_node + break + current = current["right"] + + else: + current["phone"] = phone + break + + return root + + +def bst_find(root, name): + + current = root + + while current: + if name == current["name"]: + return current["phone"] + + if name < current["name"]: + current = current["left"] + + else: + current = current["right"] + + return None + + +def bst_find_min(node): + current = node + while current["left"]: + current = current["left"] + return current + + +def bst_delete(root, name): + + if root is None: + return None + + if name < root["name"]: + root["left"] = bst_delete( + root["left"], + name + ) + + elif name > root["name"]: + root["right"] = bst_delete( + root["right"], + name + ) + + else: + if root["left"] is None: + return root["right"] + if root["right"] is None: + return root["left"] + min_node = bst_find_min(root["right"]) + + root["name"] = min_node["name"] + root["phone"] = min_node["phone"] + + root["right"] = bst_delete( + root["right"], + min_node["name"] + ) + + return root + + +def bst_list_all(root): + records = [] + stack = [] + current = root + while stack or current: + while current: + stack.append(current) + current = current["left"] + + current = stack.pop() + + records.append( + (current["name"], current["phone"]) + ) + + current = current["right"] + return records + +def copy_linked_list(head): + if not head: + return None + + new_head = { + "name": head["name"], + "phone": head["phone"], + "next": None + } + + current_new = new_head + current_old = head["next"] + + while current_old: + + current_new["next"] = { + "name": current_old["name"], + "phone": current_old["phone"], + "next": None + } + + current_new = current_new["next"] + current_old = current_old["next"] + + return new_head + + +def copy_bst(node): + + if not node: + return None + + return { + "name": node["name"], + "phone": node["phone"], + "left": copy_bst(node["left"]), + "right": copy_bst(node["right"]) + } + + +def generate_test_data(N=10000): + + records = [] + + for i in range(N): + name = f"User_{i:05d}" + phone = f"+7-999-{random.randint(1000000, 9999999)}" + records.append((name, phone)) + records_shuffled = records.copy() + random.shuffle(records_shuffled) + records_sorted = sorted(records) + return records_shuffled, records_sorted + + +def get_test_queries(records): + + existing = random.sample(records, 100) + existing_names = [name for name, _ in existing] + missing_names = [ + f"None_{i:05d}" + for i in range(10) + ] + queries = existing_names + missing_names + random.shuffle(queries) + return queries + + +def get_delete_names(records): + selected = random.sample(records, 50) + return [name for name, _ in selected] + + +def measure_insertion(structure_type, records, repeats=5): + + times = [] + for _ in range(repeats): + if structure_type == "LinkedList": + structure = None + elif structure_type == "HashTable": + structure = ht_create() + + else: + structure = None + start = time.perf_counter() + for name, phone in records: + if structure_type == "LinkedList": + structure = ll_insert( + structure, + name, + phone + ) + + elif structure_type == "HashTable": + ht_insert( + structure, + name, + phone + ) + + else: + structure = bst_insert( + structure, + name, + phone + ) + end = time.perf_counter() + times.append(end - start) + return times + + + +def measure_search( + structure_type, + structure, + queries, + repeats=5 +): + + times = [] + + for _ in range(repeats): + start = time.perf_counter() + for name in queries: + if structure_type == "LinkedList": + ll_find(structure, name) + elif structure_type == "HashTable": + ht_find(structure, name) + + else: + + bst_find(structure, name) + end = time.perf_counter() + times.append(end - start) + return times + + +def measure_deletion( + structure_type, + structure, + delete_names, + repeats=5 +): + + times = [] + + for _ in range(repeats): + if structure_type == "LinkedList": + temp = copy_linked_list(structure) + elif structure_type == "HashTable": + temp = structure.copy() + for i in range(len(temp)): + if temp[i]: + temp[i] = copy_linked_list(temp[i]) + + else: + temp = copy_bst(structure) + start = time.perf_counter() + for name in delete_names: + if structure_type == "LinkedList": + temp = ll_delete(temp, name) + elif structure_type == "HashTable": + ht_delete(temp, name) + else: + temp = bst_delete(temp, name) + end = time.perf_counter() + times.append(end - start) + return times + +def build_structure(structure_type, records): + if structure_type == "LinkedList": + structure = None + for name, phone in records: + structure = ll_insert( + structure, + name, + phone + ) + + elif structure_type == "HashTable": + structure = ht_create() + for name, phone in records: + ht_insert( + structure, + name, + phone + ) + + else: + structure = None + for name, phone in records: + structure = bst_insert( + structure, + name, + phone + ) + return structure + + +def run_experiment(N=10000): + records_shuffled, records_sorted = generate_test_data(N) + queries = get_test_queries(records_shuffled) + delete_names = get_delete_names(records_shuffled) + structures = [ + "LinkedList", + "HashTable", + "BST" + ] + modes = [ + ("random", records_shuffled), + ("sorted", records_sorted) + ] + results = [] + for structure in structures: + for mode_name, records in modes: + insert_times = measure_insertion( + structure, + records + ) + final_structure = build_structure( + structure, + records + ) + search_times = measure_search( + structure, + final_structure, + queries + ) + delete_times = measure_deletion( + structure, + final_structure, + delete_names + ) + results.append({ + "Structure": structure, + "Mode": mode_name, + "Insert": insert_times, + "Search": search_times, + "Delete": delete_times, + "AvgInsert": + sum(insert_times) / len(insert_times), + "AvgSearch": + sum(search_times) / len(search_times), + "AvgDelete": + sum(delete_times) / len(delete_times) + }) + + return results + +def save_to_csv(results): + os.makedirs("docs", exist_ok=True) + with open( + "docs/results.csv", + "w", + newline="", + encoding="utf-8" + ) as file: + writer = csv.writer(file) + writer.writerow([ + "Structure", + "Mode", + "Operation", + "Run1", + "Run2", + "Run3", + "Run4", + "Run5", + "Average" + ]) + + for result in results: + writer.writerow([ + result["Structure"], + result["Mode"], + "Insert", + *[f"{x:.6f}" for x in result["Insert"]], + f"{result['AvgInsert']:.6f}" + ]) + writer.writerow([ + result["Structure"], + result["Mode"], + "Search", + *[f"{x:.6f}" for x in result["Search"]], + f"{result['AvgSearch']:.6f}" + ]) + writer.writerow([ + result["Structure"], + result["Mode"], + "Delete", + *[f"{x:.6f}" for x in result["Delete"]], + f"{result['AvgDelete']:.6f}" + ]) + + +def plot_results(results): + structures = [ + "LinkedList", + "HashTable", + "BST" + ] + operations = [ + "AvgInsert", + "AvgSearch", + "AvgDelete" + ] + titles = [ + "Insert", + "Search", + "Delete" + ] + fig, axes = plt.subplots( + 1, + 3, + figsize=(18, 6) + ) + for ax, operation, title in zip( + axes, + operations, + titles + ): + x = np.arange(len(structures)) + width = 0.35 + random_vals = [] + sorted_vals = [] + for structure in structures: + for result in results: + if ( + result["Structure"] == structure + and result["Mode"] == "random" + ): + random_vals.append( + result[operation] + ) + if ( + result["Structure"] == structure + and result["Mode"] == "sorted" + ): + sorted_vals.append( + result[operation] + ) + + ax.bar( + x - width / 2, + random_vals, + width, + label="Random" + ) + ax.bar( + x + width / 2, + sorted_vals, + width, + label="Sorted" + ) + ax.set_xticks(x) + ax.set_xticklabels(structures) + ax.set_ylabel("Time (sec)") + ax.set_title(title) + ax.legend() + ax.grid(True) + plt.tight_layout() + plt.savefig( + "docs/performance_comparison.png", + dpi=300 + ) + plt.show() + + +if __name__ == "__main__": + print("\nTesting data structures...\n") + results = run_experiment(N=10000) + save_to_csv(results) + plot_results(results) + print("\nResults saved:") + print("docs/results.csv") + print("docs/performance_comparison.png") + print("\nConclusions:\n") + print( + "1. LinkedList is the slowest structure " + "for searching." + ) + + print( + "2. HashTable shows the best " + "search performance." + ) + + print( + "3. BST works well on random data." + ) + + print( + "4. Sorted data causes BST degradation." + ) + + print( + "5. HashTable is best for frequent search." + ) + + print( + "6. BST is useful for ordered data." + ) \ No newline at end of file