diff --git a/kuznetsovTD/docs/data_task1/Figure_1.png b/kuznetsovTD/docs/data_task1/Figure_1.png new file mode 100644 index 0000000..47177e9 Binary files /dev/null and b/kuznetsovTD/docs/data_task1/Figure_1.png differ diff --git a/kuznetsovTD/docs/data_task1/Figure_2.png b/kuznetsovTD/docs/data_task1/Figure_2.png new file mode 100644 index 0000000..f779e6e Binary files /dev/null and b/kuznetsovTD/docs/data_task1/Figure_2.png differ diff --git a/kuznetsovTD/docs/data_task1/Figure_3.png b/kuznetsovTD/docs/data_task1/Figure_3.png new file mode 100644 index 0000000..98aa5bc Binary files /dev/null and b/kuznetsovTD/docs/data_task1/Figure_3.png differ diff --git a/kuznetsovTD/docs/data_task1/Figure_4.png b/kuznetsovTD/docs/data_task1/Figure_4.png new file mode 100644 index 0000000..e3b2f4c Binary files /dev/null and b/kuznetsovTD/docs/data_task1/Figure_4.png differ diff --git a/kuznetsovTD/docs/data_task1/Figure_5.png b/kuznetsovTD/docs/data_task1/Figure_5.png new file mode 100644 index 0000000..9676e76 Binary files /dev/null and b/kuznetsovTD/docs/data_task1/Figure_5.png differ diff --git a/kuznetsovTD/docs/data_task1/Figure_6.png b/kuznetsovTD/docs/data_task1/Figure_6.png new file mode 100644 index 0000000..53d5e16 Binary files /dev/null and b/kuznetsovTD/docs/data_task1/Figure_6.png differ diff --git a/kuznetsovTD/docs/data_task1/results.csv b/kuznetsovTD/docs/data_task1/results.csv new file mode 100644 index 0000000..81d3372 --- /dev/null +++ b/kuznetsovTD/docs/data_task1/results.csv @@ -0,0 +1,91 @@ +Structure,Order,Operation,Run,Time +LinkedList,shuffled,insert,1,0.0257587 +LinkedList,shuffled,find,1,0.000279 +LinkedList,shuffled,delete,1,0.0001351 +LinkedList,shuffled,insert,2,0.026379 +LinkedList,shuffled,find,2,0.0002401 +LinkedList,shuffled,delete,2,0.0001445 +LinkedList,shuffled,insert,3,0.0262926 +LinkedList,shuffled,find,3,0.0002492 +LinkedList,shuffled,delete,3,0.0001302 +LinkedList,shuffled,insert,4,0.0272092 +LinkedList,shuffled,find,4,0.0002737 +LinkedList,shuffled,delete,4,0.0001722 +LinkedList,shuffled,insert,5,0.0273511 +LinkedList,shuffled,find,5,0.0003085 +LinkedList,shuffled,delete,5,0.000145 +HashTable,shuffled,insert,1,0.0048036 +HashTable,shuffled,find,1,5.49E-05 +HashTable,shuffled,delete,1,2.61E-05 +HashTable,shuffled,insert,2,0.0048772 +HashTable,shuffled,find,2,4.86E-05 +HashTable,shuffled,delete,2,2.67E-05 +HashTable,shuffled,insert,3,0.0049703 +HashTable,shuffled,find,3,4.97E-05 +HashTable,shuffled,delete,3,2.87E-05 +HashTable,shuffled,insert,4,0.0049025 +HashTable,shuffled,find,4,4.64E-05 +HashTable,shuffled,delete,4,2.44E-05 +HashTable,shuffled,insert,5,0.0044832 +HashTable,shuffled,find,5,4.71E-05 +HashTable,shuffled,delete,5,2.64E-05 +BST,shuffled,insert,1,0.0058005 +BST,shuffled,find,1,5.66E-05 +BST,shuffled,delete,1,6.04E-05 +BST,shuffled,insert,2,0.0065999 +BST,shuffled,find,2,5.00E-05 +BST,shuffled,delete,2,4.06E-05 +BST,shuffled,insert,3,0.0071857 +BST,shuffled,find,3,6.10E-05 +BST,shuffled,delete,3,4.86E-05 +BST,shuffled,insert,4,0.0068526 +BST,shuffled,find,4,5.29E-05 +BST,shuffled,delete,4,3.82E-05 +BST,shuffled,insert,5,0.006372 +BST,shuffled,find,5,5.53E-05 +BST,shuffled,delete,5,4.14E-05 +LinkedList,sorted,insert,1,0.0284396 +LinkedList,sorted,find,1,0.0002385 +LinkedList,sorted,delete,1,0.0001245 +LinkedList,sorted,insert,2,0.0278431 +LinkedList,sorted,find,2,0.0002442 +LinkedList,sorted,delete,2,0.0002502 +LinkedList,sorted,insert,3,0.0295056 +LinkedList,sorted,find,3,0.0002587 +LinkedList,sorted,delete,3,0.0001501 +LinkedList,sorted,insert,4,0.0284319 +LinkedList,sorted,find,4,0.0003089 +LinkedList,sorted,delete,4,0.0001414 +LinkedList,sorted,insert,5,0.0278425 +LinkedList,sorted,find,5,0.000254 +LinkedList,sorted,delete,5,0.0001282 +HashTable,sorted,insert,1,0.0049781 +HashTable,sorted,find,1,4.65E-05 +HashTable,sorted,delete,1,2.48E-05 +HashTable,sorted,insert,2,0.0048804 +HashTable,sorted,find,2,4.48E-05 +HashTable,sorted,delete,2,2.45E-05 +HashTable,sorted,insert,3,0.0051245 +HashTable,sorted,find,3,4.83E-05 +HashTable,sorted,delete,3,2.44E-05 +HashTable,sorted,insert,4,0.0046968 +HashTable,sorted,find,4,4.67E-05 +HashTable,sorted,delete,4,2.50E-05 +HashTable,sorted,insert,5,0.0044921 +HashTable,sorted,find,5,4.89E-05 +HashTable,sorted,delete,5,2.55E-05 +BST,sorted,insert,1,0.057189 +BST,sorted,find,1,0.0003427 +BST,sorted,delete,1,0.0002215 +BST,sorted,insert,2,0.0571381 +BST,sorted,find,2,0.0003848 +BST,sorted,delete,2,0.0002159 +BST,sorted,insert,3,0.0583425 +BST,sorted,find,3,0.0003442 +BST,sorted,delete,3,0.00023 +BST,sorted,insert,4,0.0580135 +BST,sorted,find,4,0.0007455 +BST,sorted,delete,4,0.0005547 +BST,sorted,insert,5,0.0574338 +BST,sorted,find,5,0.0003997 +BST,sorted,delete,5,0.0002239 diff --git a/kuznetsovTD/docs/report_1.docx b/kuznetsovTD/docs/report_1.docx new file mode 100644 index 0000000..7eef3df Binary files /dev/null and b/kuznetsovTD/docs/report_1.docx differ diff --git a/kuznetsovTD/task1/__init__.py b/kuznetsovTD/task1/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/kuznetsovTD/task1/exp.py b/kuznetsovTD/task1/exp.py new file mode 100644 index 0000000..be6e3af --- /dev/null +++ b/kuznetsovTD/task1/exp.py @@ -0,0 +1,117 @@ +import time +import csv +import random + +from generator.randomnames import generate_dataset, name_pool + +from structures.LinkedList import ll_insert, ll_find, ll_delete +from structures.HashTable import ht_insert, ht_find, ht_delete +from structures.BinaryTree import bst_insert, bst_find, bst_delete + + +def build_search_set(): + return random.sample(name_pool, min(100, len(name_pool))) + [ + f"Fake_{i}" for i in range(10) + ] + + +def run_once(insert_f, find_f, delete_f, records, search_set, delete_set): + data = None + + # INSERT + start = time.perf_counter() + for name, phone in records: + data = insert_f(data, name, phone) + insert_time = time.perf_counter() - start + + # FIND + start = time.perf_counter() + for name in search_set: + find_f(data, name) + find_time = time.perf_counter() - start + + # DELETE + start = time.perf_counter() + for name in delete_set: + data = delete_f(data, name) + delete_time = time.perf_counter() - start + + return insert_time, find_time, delete_time + + +def run_all(): + results = [] + + structures = [ + ("LinkedList", ll_insert, ll_find, ll_delete), + ("HashTable", ht_insert, ht_find, ht_delete), + ("BST", bst_insert, bst_find, bst_delete) + ] + + for order_name, sorted_flag in [("shuffled", False), ("sorted", True)]: + + for struct_name, ins, fnd, dlt in structures: + + for run_id in range(5): + + print(f"{struct_name} | {order_name} | run {run_id + 1}") + + records = generate_dataset(10000, sorted_flag=sorted_flag) + + search_set = build_search_set() + delete_set = random.sample(name_pool, 50) + + insert_t, find_t, delete_t = run_once( + ins, fnd, dlt, + records, + search_set, + delete_set + ) + + results.append([ + struct_name, + order_name, + "insert", + run_id + 1, + insert_t + ]) + + results.append([ + struct_name, + order_name, + "find", + run_id + 1, + find_t + ]) + + results.append([ + struct_name, + order_name, + "delete", + run_id + 1, + delete_t + ]) + + return results + + +def save_csv(results): + with open("results.csv", "w", newline="") as f: + writer = csv.writer(f) + + writer.writerow([ + "Structure", + "Order", + "Operation", + "Run", + "Time" + ]) + + writer.writerows(results) + + +if __name__ == "__main__": + results = run_all() + save_csv(results) + + print("\nDONE -> results.csv created") \ No newline at end of file diff --git a/kuznetsovTD/task1/generator/__init__.py b/kuznetsovTD/task1/generator/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/kuznetsovTD/task1/generator/randomnames.py b/kuznetsovTD/task1/generator/randomnames.py new file mode 100644 index 0000000..70ca804 --- /dev/null +++ b/kuznetsovTD/task1/generator/randomnames.py @@ -0,0 +1,52 @@ +import random + + +name_pool = ( + "Ivan", "Maria", "Peter", "Anna", "Sergey", "Elena", "Alexey", "Olga", + "Dmitry", "Tatyana", "Mikhail", "Natalia", "Andrey", "Irina", "Nikolay", + "Svetlana", "Vladimir", "Ekaterina", "Alexander", "Yulia", "Pavel", "Kseniya", + "Victor", "Anastasia", "Artem", "Victoria", "Maxim", "Polina", "Daniil", + "Sofia", "Evgeny", "Alice", "Stanislav", "Daria", "Georgy", "Veronika", + "Kirill", "Margarita", "Timofey", "Arina", "Roman", "Valeria", "Igor", + "Alina", "Oleg", "Diana", "Yuri", "Milana", "Vasily", "Eva", "Nikita", + "Leonid", "Stepan", "Bogdan", "Gleb", "Matvey", "Arseny", "Denis", + "Anton", "Vladislav", "Rodion", "Semyon", "Fedor", "Zahar", "Mark", + "Lev", "Artyomiy", "Yaroslav", "Timur", "Ruslan", "Boris", "Vadim", + "Konstantin", "Gennady", "Pavel", "Ilya", "Egor", "Nazar", "Damir", + "Vsevolod", "Platon", "Savely", "Svyatoslav", "Miron", "Arkady", + "Yevgeny", "Emil", "Arthur", "Demyan", "Rinat", "Marat", "Farid", + "Rustam", "Ilshat", "Azamat", "Marcel", "Albert", "Eduard", "Viktor", "Rostislav", "Gennady", "Yegor", "Petr", "Zakhar", + "Saveliy", "Gavriil", "Nestor", "Ignat", "Prokhor", "Taras", + "Severin", "Luka", "Artyomiy", "Radion", "Demyan", "Yefim","Neo", "Max", "Leo", "Sam", "Alex", "John", "Markus", + "Kevin", "Daniel", "Robert", "James", "Michael", "David", + "Andrew", "Chris", "Brian", "Steven", "Eric", "Thomas", + "Ryan", "Justin", "Aaron", "Jason", "Nathan", "Luke" +) + + +fake_names = [ + "Zero", "Kopek", "Half", "Quarter", "Eighth", + "Pood", "Copper", "Silver", "Gold", "Ninth" +] + + +def generate_phone(length: int = 11) -> str: + start = 10 ** (length - 1) + end = (10 ** length) - 1 + return str(random.randint(start, end)) + + +def generate_dataset(size: int = 10000, sorted_flag: bool = False) -> list: + dataset = [ + (random.choice(name_pool), generate_phone()) + for _ in range(size) + ] + + if sorted_flag: + dataset.sort(key=lambda x: x[0]) + + return dataset + + +def generate_query_set() -> list: + return random.sample(name_pool, 100) + fake_names \ No newline at end of file diff --git a/kuznetsovTD/task1/plot.py b/kuznetsovTD/task1/plot.py new file mode 100644 index 0000000..877a870 --- /dev/null +++ b/kuznetsovTD/task1/plot.py @@ -0,0 +1,49 @@ +import csv +import matplotlib.pyplot as plt +from collections import defaultdict + + +file_path = "results.csv" + +data = defaultdict(lambda: defaultdict(lambda: defaultdict(list))) +# data[order][operation][structure] -> list of times + + +# читаем CSV +with open(file_path, "r") as f: + reader = csv.DictReader(f) + + for row in reader: + structure = row["Structure"] + order = row["Order"] + operation = row["Operation"] + time = float(row["Time"]) + + data[order][operation][structure].append(time) + + +def get_avg(order, operation, structure): + values = data[order][operation][structure] + return sum(values) / len(values) + + +def plot_hist(operation): + structures = ["LinkedList", "HashTable", "BST"] + orders = ["shuffled", "sorted"] + + for order in orders: + values = [get_avg(order, operation, s) for s in structures] + + plt.figure() + plt.bar(structures, values) + + plt.title(f"{operation.upper()} (order: {order})") + plt.ylabel("Time (seconds)") + + plt.show() + + +# 3 графика-гистограммы +plot_hist("insert") +plot_hist("find") +plot_hist("delete") \ No newline at end of file diff --git a/kuznetsovTD/task1/structures/BinaryTree.py b/kuznetsovTD/task1/structures/BinaryTree.py new file mode 100644 index 0000000..7de0a63 --- /dev/null +++ b/kuznetsovTD/task1/structures/BinaryTree.py @@ -0,0 +1,70 @@ +def bst_insert(root: dict | None, name: str, phone: str) -> dict: + if root is None: + return {'name': name, 'phone': phone, 'left': None, 'right': None} + + if name < root['name']: + root['left'] = bst_insert(root['left'], name, phone) + elif name > root['name']: + root['right'] = bst_insert(root['right'], name, phone) + else: + root['phone'] = phone + + return root + + +def bst_find(root: dict | None, name: str) -> str | None: + current = root + + while current is not None: + if name == current['name']: + return current['phone'] + elif name < current['name']: + current = current['left'] + else: + current = current['right'] + + return None + + +def _min_value(node: dict) -> dict: + while node['left'] is not None: + node = node['left'] + return node + + +def bst_delete(root: dict | None, name: str) -> dict | None: + if root is None: + return None + + if name < root['name']: + root['left'] = bst_delete(root['left'], name) + + elif name > root['name']: + root['right'] = bst_delete(root['right'], name) + + else: + + if root['left'] is None: + return root['right'] + if root['right'] is None: + return root['left'] + successor = _min_value(root['right']) + root['name'] = successor['name'] + root['phone'] = successor['phone'] + root['right'] = bst_delete(root['right'], successor['name']) + + return root + + +def bst_list_all(root: dict | None) -> list: + result = [] + + def walk(node: dict | None): + if node is None: + return + walk(node['left']) + result.append((node['name'], node['phone'])) + walk(node['right']) + + walk(root) + return result \ No newline at end of file diff --git a/kuznetsovTD/task1/structures/HashTable.py b/kuznetsovTD/task1/structures/HashTable.py new file mode 100644 index 0000000..ecd22d3 --- /dev/null +++ b/kuznetsovTD/task1/structures/HashTable.py @@ -0,0 +1,49 @@ +from structures.LinkedList import ll_insert, ll_find, ll_delete + + +def _hash_key(key: str, capacity: int) -> int: + acc = 0 + for ch in key: + acc = (acc * 31 + ord(ch)) % capacity + return acc + + +def ht_insert(storage: list | None, key: str, value: str, capacity: int = 50) -> list: + if storage is None: + storage = [None] * capacity + + idx = _hash_key(key, len(storage)) + storage[idx] = ll_insert(storage[idx], key, value) + return storage + + +def ht_find(storage: list | None, key: str) -> str | None: + if storage is None: + return None + + idx = _hash_key(key, len(storage)) + return ll_find(storage[idx], key) + + +def ht_delete(storage: list | None, key: str) -> list | None: + if storage is None: + return None + + idx = _hash_key(key, len(storage)) + storage[idx] = ll_delete(storage[idx], key) + return storage + + +def ht_list_all(storage: list | None) -> list: + if storage is None: + return [] + + result = [] + for chain in storage: + node = chain + while node is not None: + result.append((node['name'], node['phone'])) + node = node['next'] + + result.sort(key=lambda x: x[0]) + return result \ No newline at end of file diff --git a/kuznetsovTD/task1/structures/LinkedList.py b/kuznetsovTD/task1/structures/LinkedList.py new file mode 100644 index 0000000..6edeac5 --- /dev/null +++ b/kuznetsovTD/task1/structures/LinkedList.py @@ -0,0 +1,51 @@ +def ll_list_all(start: dict | None) -> list: + items = [] + node = start + while node is not None: + items.append((node['name'], node['phone'])) + node = node['next'] + items.sort(key=lambda x: x[0]) + return items + + +def ll_delete(start: dict | None, key: str) -> dict | None: + if start is None: + return None + + if start['name'] == key: + return start['next'] + + ptr = start + while ptr['next'] is not None: + if ptr['next']['name'] == key: + ptr['next'] = ptr['next']['next'] + return start + ptr = ptr['next'] + + return start + + +def ll_find(start: dict | None, key: str) -> str | None: + ptr = start + while ptr is not None: + if ptr['name'] == key: + return ptr['phone'] + ptr = ptr['next'] + return None + + +def ll_insert(start: dict | None, key: str, value: str) -> dict: + if start is None: + return {'name': key, 'phone': value, 'next': None} + + ptr = start + while True: + if ptr['name'] == key: + ptr['phone'] = value + return start + + if ptr['next'] is None: + ptr['next'] = {'name': key, 'phone': value, 'next': None} + return start + + ptr = ptr['next'] \ No newline at end of file diff --git a/kuznetsovTD/task1/structures/__init__.py b/kuznetsovTD/task1/structures/__init__.py new file mode 100644 index 0000000..e69de29