forked from UNN/2026-rff_mp
Start
This commit is contained in:
parent
1a041a4dac
commit
155b75f45d
3
BolonkinNM/Task 1/.idea/.gitignore
vendored
Normal file
3
BolonkinNM/Task 1/.idea/.gitignore
vendored
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
14
BolonkinNM/Task 1/.idea/ds_project_archive.iml
Normal file
14
BolonkinNM/Task 1/.idea/ds_project_archive.iml
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="PyDocumentationSettings">
|
||||
<option name="format" value="PLAIN" />
|
||||
<option name="myDocStringFormat" value="Plain" />
|
||||
</component>
|
||||
</module>
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
||||
4
BolonkinNM/Task 1/.idea/misc.xml
Normal file
4
BolonkinNM/Task 1/.idea/misc.xml
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 (ds_project_archive)" project-jdk-type="Python SDK" />
|
||||
</project>
|
||||
8
BolonkinNM/Task 1/.idea/modules.xml
Normal file
8
BolonkinNM/Task 1/.idea/modules.xml
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/ds_project_archive.iml" filepath="$PROJECT_DIR$/.idea/ds_project_archive.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
||||
18
BolonkinNM/Task 1/README.md
Normal file
18
BolonkinNM/Task 1/README.md
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
# Задание 1 — структуры данных
|
||||
|
||||
Процедурная реализация:
|
||||
- linked_list.py
|
||||
- hash_table.py
|
||||
- bst.py
|
||||
|
||||
Эксперименты и отчёты:
|
||||
- experiments.py
|
||||
- plot_results.py
|
||||
- results.csv
|
||||
- docs/report.md
|
||||
- docs/data/*.png
|
||||
|
||||
Запуск:
|
||||
```bash
|
||||
python main.py
|
||||
```
|
||||
82
BolonkinNM/Task 1/Task 1.py
Normal file
82
BolonkinNM/Task 1/Task 1.py
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
col_names = ['T(C)', 'Td(C)', 'HR%', 'ff(kmh)', 'Gust(kmh)', 'P_0(HPa)', 'P_sea(HPa)']
|
||||
|
||||
data = pd.read_csv(
|
||||
"data_meteo.txt",
|
||||
sep=r'\s+',
|
||||
skiprows=1,
|
||||
usecols=[2, 3, 4, 5, 6, 7, 8],
|
||||
names=col_names,
|
||||
engine='python'
|
||||
)
|
||||
|
||||
print("загружено записей (строк):", len(data))
|
||||
|
||||
data = data.apply(pd.to_numeric, errors='coerce')
|
||||
n_before = len(data)
|
||||
data = data.dropna(subset=col_names)
|
||||
n_after = len(data)
|
||||
|
||||
print(f"после приведения к числам и dropna: {n_after} строк (удалено {n_before - n_after})")
|
||||
|
||||
def correlation(vec1, vec2, center=False):
|
||||
v1 = np.asarray(vec1, dtype=float)
|
||||
v2 = np.asarray(vec2, dtype=float)
|
||||
|
||||
if center:
|
||||
v1 = v1 - np.mean(v1)
|
||||
v2 = v2 - np.mean(v2)
|
||||
|
||||
dot_product = np.sum(v1 * v2)
|
||||
norm1 = np.sqrt(np.sum(v1 ** 2))
|
||||
norm2 = np.sqrt(np.sum(v2 ** 2))
|
||||
|
||||
if norm1 == 0 or norm2 == 0:
|
||||
return np.nan
|
||||
|
||||
return dot_product / (norm1 * norm2)
|
||||
|
||||
n = len(col_names)
|
||||
|
||||
raw_matrix = np.zeros((n, n))
|
||||
center_matrix = np.zeros((n, n))
|
||||
|
||||
for i in range(n):
|
||||
for j in range(n):
|
||||
r_raw = correlation(data.iloc[:, i], data.iloc[:, j], center=False)
|
||||
r_center = correlation(data.iloc[:, i], data.iloc[:, j], center=True)
|
||||
|
||||
raw_matrix[i, j] = np.round(r_raw, 3) if not np.isnan(r_raw) else np.nan
|
||||
center_matrix[i, j] = np.round(r_center, 3) if not np.isnan(r_center) else np.nan
|
||||
|
||||
df_raw = pd.DataFrame(raw_matrix, index=col_names, columns=col_names)
|
||||
df_center = pd.DataFrame(center_matrix, index=col_names, columns=col_names)
|
||||
|
||||
print("\nкорреляция (raw):")
|
||||
print(df_raw.to_string())
|
||||
|
||||
print("\nкорреляция (centered):")
|
||||
print(df_center.to_string())
|
||||
|
||||
print("\nразница (centered - raw):")
|
||||
print((df_center - df_raw).round(3).to_string())
|
||||
|
||||
plt.figure(figsize=(14, 8))
|
||||
|
||||
for col in col_names:
|
||||
centered_values = data[col] - data[col].mean()
|
||||
plt.plot(centered_values.values, label=col, marker='.', linewidth=1, markersize=4)
|
||||
|
||||
plt.axhline(0, linestyle='--')
|
||||
plt.title("центрированные метеоданные")
|
||||
plt.xlabel("номер измерения")
|
||||
plt.ylabel("отклонение от среднего")
|
||||
plt.legend()
|
||||
plt.grid()
|
||||
plt.tight_layout()
|
||||
|
||||
plt.savefig("meteo_analysis.png", dpi=150)
|
||||
plt.show()
|
||||
118
BolonkinNM/Task 1/bst.py
Normal file
118
BolonkinNM/Task 1/bst.py
Normal file
|
|
@ -0,0 +1,118 @@
|
|||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
Node = Dict[str, Any]
|
||||
|
||||
|
||||
def _make_node(name: str, phone: str) -> Node:
|
||||
return {"name": name, "phone": phone, "left": None, "right": None}
|
||||
|
||||
|
||||
def bst_insert(root: Optional[Node], name: str, phone: str) -> Node:
|
||||
new_node = _make_node(name, phone)
|
||||
|
||||
if root is None:
|
||||
return new_node
|
||||
|
||||
current = root
|
||||
parent = None
|
||||
|
||||
while current is not None:
|
||||
parent = current
|
||||
if name < current["name"]:
|
||||
current = current["left"]
|
||||
elif name > current["name"]:
|
||||
current = current["right"]
|
||||
else:
|
||||
current["phone"] = phone
|
||||
return root
|
||||
|
||||
if name < parent["name"]:
|
||||
parent["left"] = new_node
|
||||
else:
|
||||
parent["right"] = new_node
|
||||
|
||||
return root
|
||||
|
||||
|
||||
def bst_find(root: Optional[Node], name: str) -> Optional[str]:
|
||||
current = root
|
||||
while current is not None:
|
||||
if name < current["name"]:
|
||||
current = current["left"]
|
||||
elif name > current["name"]:
|
||||
current = current["right"]
|
||||
else:
|
||||
return current["phone"]
|
||||
return None
|
||||
|
||||
|
||||
def _find_min_node(node: Node) -> Node:
|
||||
current = node
|
||||
while current["left"] is not None:
|
||||
current = current["left"]
|
||||
return current
|
||||
|
||||
|
||||
def bst_delete(root: Optional[Node], name: str) -> Optional[Node]:
|
||||
if root is None:
|
||||
return None
|
||||
|
||||
parent = None
|
||||
current = root
|
||||
|
||||
while current is not None and current["name"] != name:
|
||||
parent = current
|
||||
if name < current["name"]:
|
||||
current = current["left"]
|
||||
else:
|
||||
current = current["right"]
|
||||
|
||||
if current is None:
|
||||
return root
|
||||
|
||||
if current["left"] is None or current["right"] is None:
|
||||
child = current["left"] if current["left"] is not None else current["right"]
|
||||
|
||||
if parent is None:
|
||||
return child
|
||||
|
||||
if parent["left"] is current:
|
||||
parent["left"] = child
|
||||
else:
|
||||
parent["right"] = child
|
||||
return root
|
||||
|
||||
succ_parent = current
|
||||
successor = current["right"]
|
||||
while successor["left"] is not None:
|
||||
succ_parent = successor
|
||||
successor = successor["left"]
|
||||
|
||||
current["name"] = successor["name"]
|
||||
current["phone"] = successor["phone"]
|
||||
|
||||
successor_child = successor["right"]
|
||||
if succ_parent["left"] is successor:
|
||||
succ_parent["left"] = successor_child
|
||||
else:
|
||||
succ_parent["right"] = successor_child
|
||||
|
||||
return root
|
||||
|
||||
|
||||
def bst_list_all(root: Optional[Node]) -> List[Dict[str, str]]:
|
||||
result: List[Dict[str, str]] = []
|
||||
stack: List[Node] = []
|
||||
current = root
|
||||
|
||||
while current is not None or stack:
|
||||
while current is not None:
|
||||
stack.append(current)
|
||||
current = current["left"]
|
||||
|
||||
current = stack.pop()
|
||||
result.append({"name": current["name"], "phone": current["phone"]})
|
||||
current = current["right"]
|
||||
|
||||
return result
|
||||
BIN
BolonkinNM/Task 1/docs/data/delete.png
Normal file
BIN
BolonkinNM/Task 1/docs/data/delete.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 61 KiB |
BIN
BolonkinNM/Task 1/docs/data/find.png
Normal file
BIN
BolonkinNM/Task 1/docs/data/find.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 51 KiB |
BIN
BolonkinNM/Task 1/docs/data/insert.png
Normal file
BIN
BolonkinNM/Task 1/docs/data/insert.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 48 KiB |
109
BolonkinNM/Task 1/docs/data/results.csv
Normal file
109
BolonkinNM/Task 1/docs/data/results.csv
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
Структура,Режим,Операция,Замер,Время (сек)
|
||||
LinkedList,случайный,insert,1,4.2622492010
|
||||
LinkedList,случайный,find,1,0.0314994130
|
||||
LinkedList,случайный,delete,1,0.0149069000
|
||||
LinkedList,случайный,insert,2,4.0154580330
|
||||
LinkedList,случайный,find,2,0.0393284500
|
||||
LinkedList,случайный,delete,2,0.0210732100
|
||||
LinkedList,случайный,insert,3,4.0436019780
|
||||
LinkedList,случайный,find,3,0.0344933660
|
||||
LinkedList,случайный,delete,3,0.0152639850
|
||||
LinkedList,случайный,insert,4,3.7182993220
|
||||
LinkedList,случайный,find,4,0.0327698850
|
||||
LinkedList,случайный,delete,4,0.0149959540
|
||||
LinkedList,случайный,insert,5,3.7082228200
|
||||
LinkedList,случайный,find,5,0.0303762490
|
||||
LinkedList,случайный,delete,5,0.0141406560
|
||||
LinkedList,случайный,insert,среднее,3.9495662708
|
||||
LinkedList,случайный,find,среднее,0.0336934726
|
||||
LinkedList,случайный,delete,среднее,0.0160761410
|
||||
HashTable,случайный,insert,1,0.2059865770
|
||||
HashTable,случайный,find,1,0.0014966100
|
||||
HashTable,случайный,delete,1,0.0006891700
|
||||
HashTable,случайный,insert,2,0.2024331460
|
||||
HashTable,случайный,find,2,0.0015934880
|
||||
HashTable,случайный,delete,2,0.0007212620
|
||||
HashTable,случайный,insert,3,0.2126128040
|
||||
HashTable,случайный,find,3,0.0016566220
|
||||
HashTable,случайный,delete,3,0.0008358420
|
||||
HashTable,случайный,insert,4,0.2157934910
|
||||
HashTable,случайный,find,4,0.0015542810
|
||||
HashTable,случайный,delete,4,0.0007269120
|
||||
HashTable,случайный,insert,5,0.2079924580
|
||||
HashTable,случайный,find,5,0.0013696990
|
||||
HashTable,случайный,delete,5,0.0006616050
|
||||
HashTable,случайный,insert,среднее,0.2089636952
|
||||
HashTable,случайный,find,среднее,0.0015341400
|
||||
HashTable,случайный,delete,среднее,0.0007269582
|
||||
BST,случайный,insert,1,0.0166981280
|
||||
BST,случайный,find,1,0.0001569360
|
||||
BST,случайный,delete,1,0.0000917280
|
||||
BST,случайный,insert,2,0.0184119040
|
||||
BST,случайный,find,2,0.0001517110
|
||||
BST,случайный,delete,2,0.0001163770
|
||||
BST,случайный,insert,3,0.0174662270
|
||||
BST,случайный,find,3,0.0001582930
|
||||
BST,случайный,delete,3,0.0000892660
|
||||
BST,случайный,insert,4,0.0191369100
|
||||
BST,случайный,find,4,0.0002087170
|
||||
BST,случайный,delete,4,0.0001067050
|
||||
BST,случайный,insert,5,0.0184276900
|
||||
BST,случайный,find,5,0.0002767720
|
||||
BST,случайный,delete,5,0.0001067660
|
||||
BST,случайный,insert,среднее,0.0180281718
|
||||
BST,случайный,find,среднее,0.0001904858
|
||||
BST,случайный,delete,среднее,0.0001021684
|
||||
LinkedList,отсортированный,insert,1,2.9875078340
|
||||
LinkedList,отсортированный,find,1,0.0237300610
|
||||
LinkedList,отсортированный,delete,1,0.0111698260
|
||||
LinkedList,отсортированный,insert,2,3.0573987940
|
||||
LinkedList,отсортированный,find,2,0.0243270360
|
||||
LinkedList,отсортированный,delete,2,0.0115366030
|
||||
LinkedList,отсортированный,insert,3,2.9641987260
|
||||
LinkedList,отсортированный,find,3,0.0236313330
|
||||
LinkedList,отсортированный,delete,3,0.0112848510
|
||||
LinkedList,отсортированный,insert,4,3.0345914950
|
||||
LinkedList,отсортированный,find,4,0.0240271220
|
||||
LinkedList,отсортированный,delete,4,0.0112117310
|
||||
LinkedList,отсортированный,insert,5,2.9481954700
|
||||
LinkedList,отсортированный,find,5,0.0239006100
|
||||
LinkedList,отсортированный,delete,5,0.0110857710
|
||||
LinkedList,отсортированный,insert,среднее,2.9983784638
|
||||
LinkedList,отсортированный,find,среднее,0.0239232324
|
||||
LinkedList,отсортированный,delete,среднее,0.0112577564
|
||||
HashTable,отсортированный,insert,1,0.1997087560
|
||||
HashTable,отсортированный,find,1,0.0017550400
|
||||
HashTable,отсортированный,delete,1,0.0008407980
|
||||
HashTable,отсортированный,insert,2,0.1968675190
|
||||
HashTable,отсортированный,find,2,0.0019886760
|
||||
HashTable,отсортированный,delete,2,0.0008920910
|
||||
HashTable,отсортированный,insert,3,0.1907563580
|
||||
HashTable,отсортированный,find,3,0.0018447440
|
||||
HashTable,отсортированный,delete,3,0.0008684640
|
||||
HashTable,отсортированный,insert,4,0.2625327630
|
||||
HashTable,отсортированный,find,4,0.0016053140
|
||||
HashTable,отсортированный,delete,4,0.0008098670
|
||||
HashTable,отсортированный,insert,5,0.1936840590
|
||||
HashTable,отсортированный,find,5,0.0019015160
|
||||
HashTable,отсортированный,delete,5,0.0009053780
|
||||
HashTable,отсортированный,insert,среднее,0.2087098910
|
||||
HashTable,отсортированный,find,среднее,0.0018190580
|
||||
HashTable,отсортированный,delete,среднее,0.0008633196
|
||||
BST,отсортированный,insert,1,4.2195800190
|
||||
BST,отсортированный,find,1,0.0389314570
|
||||
BST,отсортированный,delete,1,0.0190308920
|
||||
BST,отсортированный,insert,2,4.1356184250
|
||||
BST,отсортированный,find,2,0.0383339310
|
||||
BST,отсортированный,delete,2,0.0194247740
|
||||
BST,отсортированный,insert,3,4.1204731890
|
||||
BST,отсортированный,find,3,0.0388593320
|
||||
BST,отсортированный,delete,3,0.0215428460
|
||||
BST,отсортированный,insert,4,4.2120902370
|
||||
BST,отсортированный,find,4,0.0378190250
|
||||
BST,отсортированный,delete,4,0.0188528460
|
||||
BST,отсортированный,insert,5,4.1304951260
|
||||
BST,отсортированный,find,5,0.0359927840
|
||||
BST,отсортированный,delete,5,0.0179617110
|
||||
BST,отсортированный,insert,среднее,4.1636513992
|
||||
BST,отсортированный,find,среднее,0.0379873058
|
||||
BST,отсортированный,delete,среднее,0.0193626138
|
||||
|
101
BolonkinNM/Task 1/docs/report.md
Normal file
101
BolonkinNM/Task 1/docs/report.md
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
# Отчёт по заданию 1 — структуры данных
|
||||
|
||||
## Цель работы
|
||||
|
||||
Реализовать три структуры данных с нуля в процедурном стиле:
|
||||
|
||||
- связный список;
|
||||
- хеш-таблицу;
|
||||
- двоичное дерево поиска.
|
||||
|
||||
Также были выполнены измерения времени для операций `insert`, `find`, `delete` и построены графики по результатам эксперимента.
|
||||
|
||||
## Реализованные структуры
|
||||
|
||||
### Связный список
|
||||
|
||||
Узел хранится как словарь:
|
||||
|
||||
```python
|
||||
{"name": "Имя", "phone": "123", "next": None}
|
||||
```
|
||||
|
||||
### Хеш-таблица
|
||||
|
||||
Хранится как список бакетов фиксированной длины, где каждый бакет — голова связного списка или `None`.
|
||||
|
||||
### Двоичное дерево поиска
|
||||
|
||||
Узел хранится как словарь:
|
||||
|
||||
```python
|
||||
{"name": "Имя", "phone": "123", "left": None, "right": None}
|
||||
```
|
||||
|
||||
Для BST использованы итеративные операции, чтобы корректно работать и на отсортированных данных.
|
||||
|
||||
## Методика эксперимента
|
||||
|
||||
- Количество записей: `N = 10000`
|
||||
- Режимы данных:
|
||||
- случайный порядок;
|
||||
- отсортированный порядок.
|
||||
- Каждое измерение повторялось **5 раз**.
|
||||
- В CSV сохранены:
|
||||
- все отдельные замеры;
|
||||
- среднее время для каждой операции, структуры и режима.
|
||||
|
||||
Операции:
|
||||
|
||||
- вставка всех записей;
|
||||
- поиск 100 существующих и 10 отсутствующих имён;
|
||||
- удаление 50 случайных имён.
|
||||
|
||||
## Графики
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
## Средние результаты
|
||||
|
||||
| Режим | Операция | LinkedList | HashTable | BST | Лучший результат |
|
||||
|---|---:|---:|---:|---:|---|
|
||||
| случайный | insert | 3.949566 | 0.208964 | 0.018028 | BST |
|
||||
| случайный | find | 0.033693 | 0.001534 | 0.000190 | BST |
|
||||
| случайный | delete | 0.016076 | 0.000727 | 0.000102 | BST |
|
||||
| отсортированный | insert | 2.998378 | 0.208710 | 4.163651 | HashTable |
|
||||
| отсортированный | find | 0.023923 | 0.001819 | 0.037987 | HashTable |
|
||||
| отсортированный | delete | 0.011258 | 0.000863 | 0.019363 | HashTable |
|
||||
|
||||
## Анализ результатов
|
||||
|
||||
### Влияние порядка входных данных на BST
|
||||
|
||||
На случайных данных BST работает значительно быстрее, чем на отсортированных. Это связано с тем, что при случайной вставке дерево остаётся ближе к сбалансированному состоянию.
|
||||
|
||||
На отсортированных данных дерево вырождается в цепочку, поэтому вставка становится медленной, а поиск и удаление тоже деградируют по времени.
|
||||
|
||||
### Почему хеш-таблица почти не чувствительна к порядку
|
||||
|
||||
Хеш-таблица распределяет элементы по бакетам через хеш-функцию, поэтому сам порядок входа почти не влияет на скорость. Влияние может появляться только из-за коллизий, но в целом поведение остаётся близким к постоянному времени.
|
||||
|
||||
### Почему связный список всегда медленен при поиске
|
||||
|
||||
Поиск в связном списке выполняется последовательным просмотром элементов. Поэтому при большом количестве записей приходится проходить много узлов, и операция остаётся линейной по времени.
|
||||
|
||||
### Как удаление работает в каждой структуре
|
||||
|
||||
- В связном списке нужно сначала найти нужный узел, затем переназначить ссылку.
|
||||
- В хеш-таблице сначала выбирается бакет, затем удаление выполняется внутри короткой цепочки.
|
||||
- В BST удаление зависит от числа потомков: если потомок один или ноль, операция простая; если два — нужно найти преемника.
|
||||
|
||||
## Вывод
|
||||
|
||||
Для частых вставок и особенно частого поиска в реальной задаче чаще всего лучше подходит **хеш-таблица**.
|
||||
|
||||
Если важно получать данные в отсортированном виде, удобнее использовать **BST**.
|
||||
|
||||
**Связный список** подходит для маленьких объёмов данных или очень простых сценариев, но при большом числе записей он проигрывает по скорости поиска.
|
||||
172
BolonkinNM/Task 1/experiments.py
Normal file
172
BolonkinNM/Task 1/experiments.py
Normal file
|
|
@ -0,0 +1,172 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import random
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
from linked_list import ll_insert, ll_find, ll_delete
|
||||
from hash_table import ht_insert, ht_find, ht_delete
|
||||
from bst import bst_insert, bst_find, bst_delete
|
||||
from utils import generate_records, prepare_records_variants
|
||||
|
||||
|
||||
Record = Tuple[str, str]
|
||||
|
||||
|
||||
def make_missing_names(count: int = 10) -> List[str]:
|
||||
return [f"None_{i}" for i in range(count)]
|
||||
|
||||
|
||||
def pick_existing_names(records: List[Record], count: int, seed: int = 42) -> List[str]:
|
||||
rng = random.Random(seed)
|
||||
unique_names = list(dict.fromkeys(name for name, _ in records))
|
||||
if len(unique_names) < count:
|
||||
raise ValueError(f"Not enough unique names: need {count}, got {len(unique_names)}")
|
||||
return rng.sample(unique_names, count)
|
||||
|
||||
|
||||
def pick_delete_names(records: List[Record], count: int = 50, seed: int = 43) -> List[str]:
|
||||
rng = random.Random(seed)
|
||||
unique_names = list(dict.fromkeys(name for name, _ in records))
|
||||
if len(unique_names) < count:
|
||||
raise ValueError(f"Not enough unique names: need {count}, got {len(unique_names)}")
|
||||
return rng.sample(unique_names, count)
|
||||
|
||||
|
||||
def build_structure(structure_name: str, records: List[Record], buckets_count: int = 2048):
|
||||
if structure_name == "linked_list":
|
||||
structure = None
|
||||
for name, phone in records:
|
||||
structure = ll_insert(structure, name, phone)
|
||||
return structure
|
||||
|
||||
if structure_name == "hash_table":
|
||||
buckets = [None] * buckets_count
|
||||
for name, phone in records:
|
||||
buckets = ht_insert(buckets, name, phone)
|
||||
return buckets
|
||||
|
||||
if structure_name == "bst":
|
||||
root = None
|
||||
for name, phone in records:
|
||||
root = bst_insert(root, name, phone)
|
||||
return root
|
||||
|
||||
raise ValueError(f"Unknown structure: {structure_name}")
|
||||
|
||||
|
||||
def do_find(structure_name: str, structure: object, existing_names: List[str], missing_names: List[str]) -> None:
|
||||
if structure_name == "linked_list":
|
||||
for name in existing_names:
|
||||
ll_find(structure, name)
|
||||
for name in missing_names:
|
||||
ll_find(structure, name)
|
||||
return
|
||||
|
||||
if structure_name == "hash_table":
|
||||
for name in existing_names:
|
||||
ht_find(structure, name)
|
||||
for name in missing_names:
|
||||
ht_find(structure, name)
|
||||
return
|
||||
|
||||
if structure_name == "bst":
|
||||
for name in existing_names:
|
||||
bst_find(structure, name)
|
||||
for name in missing_names:
|
||||
bst_find(structure, name)
|
||||
return
|
||||
|
||||
raise ValueError(f"Unknown structure: {structure_name}")
|
||||
|
||||
|
||||
def do_delete(structure_name: str, structure: object, delete_names: List[str]):
|
||||
if structure_name == "linked_list":
|
||||
for name in delete_names:
|
||||
structure = ll_delete(structure, name)
|
||||
return structure
|
||||
|
||||
if structure_name == "hash_table":
|
||||
for name in delete_names:
|
||||
structure = ht_delete(structure, name)
|
||||
return structure
|
||||
|
||||
if structure_name == "bst":
|
||||
for name in delete_names:
|
||||
structure = bst_delete(structure, name)
|
||||
return structure
|
||||
|
||||
raise ValueError(f"Unknown structure: {structure_name}")
|
||||
|
||||
|
||||
def measure_once(structure_name: str, records: List[Record], buckets_count: int = 2048) -> Dict[str, float]:
|
||||
existing_names = pick_existing_names(records, 100, seed=42)
|
||||
missing_names = make_missing_names(10)
|
||||
delete_names = pick_delete_names(records, 50, seed=43)
|
||||
|
||||
start = time.perf_counter()
|
||||
structure = build_structure(structure_name, records, buckets_count=buckets_count)
|
||||
insert_time = time.perf_counter() - start
|
||||
|
||||
start = time.perf_counter()
|
||||
do_find(structure_name, structure, existing_names, missing_names)
|
||||
find_time = time.perf_counter() - start
|
||||
|
||||
start = time.perf_counter()
|
||||
structure = do_delete(structure_name, structure, delete_names)
|
||||
delete_time = time.perf_counter() - start
|
||||
|
||||
return {"insert": insert_time, "find": find_time, "delete": delete_time}
|
||||
|
||||
|
||||
def run_experiments(n: int = 10000, buckets_count: int = 2048, repeats: int = 5):
|
||||
records = generate_records(n, repeat_names=False)
|
||||
records_shuffled, records_sorted = prepare_records_variants(records)
|
||||
|
||||
datasets = [
|
||||
("случайный", records_shuffled),
|
||||
("отсортированный", records_sorted),
|
||||
]
|
||||
structures = [
|
||||
("LinkedList", "linked_list"),
|
||||
("HashTable", "hash_table"),
|
||||
("BST", "bst"),
|
||||
]
|
||||
operations = ("insert", "find", "delete")
|
||||
|
||||
rows = [["Структура", "Режим", "Операция", "Замер", "Время (сек)"]]
|
||||
|
||||
for mode_name, dataset_records in datasets:
|
||||
for human_name, structure_name in structures:
|
||||
times_by_op = {op: [] for op in operations}
|
||||
|
||||
for attempt in range(1, repeats + 1):
|
||||
result = measure_once(structure_name, dataset_records, buckets_count=buckets_count)
|
||||
for op_name in operations:
|
||||
elapsed = result[op_name]
|
||||
times_by_op[op_name].append(elapsed)
|
||||
rows.append([human_name, mode_name, op_name, attempt, f"{elapsed:.10f}"])
|
||||
|
||||
for op_name in operations:
|
||||
avg_time = sum(times_by_op[op_name]) / len(times_by_op[op_name])
|
||||
rows.append([human_name, mode_name, op_name, "среднее", f"{avg_time:.10f}"])
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
def save_results_csv(rows, filename: str = "results.csv"):
|
||||
with open(filename, "w", newline="", encoding="utf-8") as f:
|
||||
writer = csv.writer(f)
|
||||
writer.writerows(rows)
|
||||
|
||||
|
||||
def main():
|
||||
rows = run_experiments(n=10000, buckets_count=2048, repeats=5)
|
||||
save_results_csv(rows, "results.csv")
|
||||
print("Saved results.csv")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
44
BolonkinNM/Task 1/hash_table.py
Normal file
44
BolonkinNM/Task 1/hash_table.py
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from linked_list import ll_insert, ll_find, ll_delete, ll_list_all
|
||||
|
||||
|
||||
Bucket = Optional[Dict[str, Any]]
|
||||
|
||||
|
||||
def _hash_name(name: str, buckets_count: int) -> int:
|
||||
if buckets_count <= 0:
|
||||
return 0
|
||||
return sum(ord(ch) for ch in name) % buckets_count
|
||||
|
||||
|
||||
def ht_insert(buckets: List[Bucket], name: str, phone: str) -> List[Bucket]:
|
||||
if not buckets:
|
||||
return buckets
|
||||
index = _hash_name(name, len(buckets))
|
||||
buckets[index] = ll_insert(buckets[index], name, phone)
|
||||
return buckets
|
||||
|
||||
|
||||
def ht_find(buckets: List[Bucket], name: str) -> Optional[str]:
|
||||
if not buckets:
|
||||
return None
|
||||
index = _hash_name(name, len(buckets))
|
||||
return ll_find(buckets[index], name)
|
||||
|
||||
|
||||
def ht_delete(buckets: List[Bucket], name: str) -> List[Bucket]:
|
||||
if not buckets:
|
||||
return buckets
|
||||
index = _hash_name(name, len(buckets))
|
||||
buckets[index] = ll_delete(buckets[index], name)
|
||||
return buckets
|
||||
|
||||
|
||||
def ht_list_all(buckets: List[Bucket]) -> List[Dict[str, str]]:
|
||||
records: List[Dict[str, str]] = []
|
||||
for head in buckets:
|
||||
records.extend(ll_list_all(head))
|
||||
return sorted(records, key=lambda x: x["name"])
|
||||
73
BolonkinNM/Task 1/linked_list.py
Normal file
73
BolonkinNM/Task 1/linked_list.py
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
Node = Dict[str, Any]
|
||||
|
||||
|
||||
def _make_node(name: str, phone: str) -> Node:
|
||||
return {"name": name, "phone": phone, "next": None}
|
||||
|
||||
|
||||
def sort_records(records: List[Dict[str, str]]) -> List[Dict[str, str]]:
|
||||
|
||||
return sorted(records, key=lambda x: x["name"])
|
||||
|
||||
|
||||
def ll_insert(head: Optional[Node], name: str, phone: str) -> Node:
|
||||
|
||||
new_node = _make_node(name, phone)
|
||||
|
||||
if head is None:
|
||||
return new_node
|
||||
|
||||
current = head
|
||||
while current is not None:
|
||||
if current["name"] == name:
|
||||
current["phone"] = phone
|
||||
return head
|
||||
if current["next"] is None:
|
||||
current["next"] = new_node
|
||||
return head
|
||||
current = current["next"]
|
||||
|
||||
return head
|
||||
|
||||
|
||||
def ll_find(head: Optional[Node], name: str) -> Optional[str]:
|
||||
current = head
|
||||
while current is not None:
|
||||
if current["name"] == name:
|
||||
return current["phone"]
|
||||
current = current["next"]
|
||||
return None
|
||||
|
||||
|
||||
def ll_delete(head: Optional[Node], name: str) -> Optional[Node]:
|
||||
if head is None:
|
||||
return None
|
||||
|
||||
if head["name"] == name:
|
||||
return head["next"]
|
||||
|
||||
prev = head
|
||||
current = head["next"]
|
||||
|
||||
while current is not None:
|
||||
if current["name"] == name:
|
||||
prev["next"] = current["next"]
|
||||
return head
|
||||
prev = current
|
||||
current = current["next"]
|
||||
|
||||
return head
|
||||
|
||||
|
||||
def ll_list_all(head: Optional[Node]) -> List[Dict[str, str]]:
|
||||
records: List[Dict[str, str]] = []
|
||||
current = head
|
||||
while current is not None:
|
||||
records.append({"name": current["name"], "phone": current["phone"]})
|
||||
current = current["next"]
|
||||
return sort_records(records)
|
||||
21
BolonkinNM/Task 1/main.py
Normal file
21
BolonkinNM/Task 1/main.py
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
from pathlib import Path
|
||||
|
||||
from experiments import run_experiments, save_results_csv
|
||||
from plot_results import build_graphs, load_average_results
|
||||
|
||||
|
||||
def main():
|
||||
rows = run_experiments(n=10000, buckets_count=2048, repeats=5)
|
||||
save_results_csv(rows, "results.csv")
|
||||
averaged = load_average_results("results.csv")
|
||||
build_graphs(averaged, output_dir="docs/data")
|
||||
print("Done.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
60
BolonkinNM/Task 1/plot_results.py
Normal file
60
BolonkinNM/Task 1/plot_results.py
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
def load_average_results(csv_file: str):
|
||||
results = []
|
||||
with open(csv_file, "r", encoding="utf-8") as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
if row["Замер"] != "среднее":
|
||||
continue
|
||||
results.append({
|
||||
"structure": row["Структура"],
|
||||
"mode": row["Режим"],
|
||||
"operation": row["Операция"],
|
||||
"time": float(row["Время (сек)"]),
|
||||
})
|
||||
return results
|
||||
|
||||
|
||||
def build_graphs(results, output_dir: str = "docs/data"):
|
||||
output = Path(output_dir)
|
||||
output.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
grouped = defaultdict(list)
|
||||
for row in results:
|
||||
grouped[row["operation"]].append(row)
|
||||
|
||||
for operation in ("insert", "find", "delete"):
|
||||
rows = grouped[operation]
|
||||
labels = [f"{r['structure']}\n{r['mode']}" for r in rows]
|
||||
values = [r["time"] for r in rows]
|
||||
|
||||
plt.figure(figsize=(11, 6))
|
||||
plt.bar(labels, values)
|
||||
plt.title(f"{operation.capitalize()} comparison")
|
||||
plt.xlabel("Structure / data order")
|
||||
plt.ylabel("Time, seconds")
|
||||
plt.xticks(rotation=20)
|
||||
plt.tight_layout()
|
||||
filename = output / f"{operation}.png"
|
||||
plt.savefig(filename, dpi=160)
|
||||
plt.close()
|
||||
print(f"Saved {filename}")
|
||||
|
||||
|
||||
def main():
|
||||
results = load_average_results("results.csv")
|
||||
build_graphs(results)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
1
BolonkinNM/Task 1/requirements.txt
Normal file
1
BolonkinNM/Task 1/requirements.txt
Normal file
|
|
@ -0,0 +1 @@
|
|||
matplotlib>=3.8
|
||||
109
BolonkinNM/Task 1/results.csv
Normal file
109
BolonkinNM/Task 1/results.csv
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
Структура,Режим,Операция,Замер,Время (сек)
|
||||
LinkedList,случайный,insert,1,2.4210275000
|
||||
LinkedList,случайный,find,1,0.0214394000
|
||||
LinkedList,случайный,delete,1,0.0108667000
|
||||
LinkedList,случайный,insert,2,2.4208055000
|
||||
LinkedList,случайный,find,2,0.0216110000
|
||||
LinkedList,случайный,delete,2,0.0106216000
|
||||
LinkedList,случайный,insert,3,2.4210881000
|
||||
LinkedList,случайный,find,3,0.0216503000
|
||||
LinkedList,случайный,delete,3,0.0106497000
|
||||
LinkedList,случайный,insert,4,2.4530798000
|
||||
LinkedList,случайный,find,4,0.0222764000
|
||||
LinkedList,случайный,delete,4,0.0108350000
|
||||
LinkedList,случайный,insert,5,2.4567773000
|
||||
LinkedList,случайный,find,5,0.0219400000
|
||||
LinkedList,случайный,delete,5,0.0108697000
|
||||
LinkedList,случайный,insert,среднее,2.4345556400
|
||||
LinkedList,случайный,find,среднее,0.0217834200
|
||||
LinkedList,случайный,delete,среднее,0.0107685400
|
||||
HashTable,случайный,insert,1,0.1621210000
|
||||
HashTable,случайный,find,1,0.0011201000
|
||||
HashTable,случайный,delete,1,0.0005854000
|
||||
HashTable,случайный,insert,2,0.1732676000
|
||||
HashTable,случайный,find,2,0.0011247000
|
||||
HashTable,случайный,delete,2,0.0005818000
|
||||
HashTable,случайный,insert,3,0.1638609000
|
||||
HashTable,случайный,find,3,0.0011355000
|
||||
HashTable,случайный,delete,3,0.0005814000
|
||||
HashTable,случайный,insert,4,0.1642886000
|
||||
HashTable,случайный,find,4,0.0011268000
|
||||
HashTable,случайный,delete,4,0.0005785000
|
||||
HashTable,случайный,insert,5,0.1640916000
|
||||
HashTable,случайный,find,5,0.0011287000
|
||||
HashTable,случайный,delete,5,0.0005787000
|
||||
HashTable,случайный,insert,среднее,0.1655259400
|
||||
HashTable,случайный,find,среднее,0.0011271600
|
||||
HashTable,случайный,delete,среднее,0.0005811600
|
||||
BST,случайный,insert,1,0.0153754000
|
||||
BST,случайный,find,1,0.0001491000
|
||||
BST,случайный,delete,1,0.0000786000
|
||||
BST,случайный,insert,2,0.0155821000
|
||||
BST,случайный,find,2,0.0001453000
|
||||
BST,случайный,delete,2,0.0000724000
|
||||
BST,случайный,insert,3,0.0151360000
|
||||
BST,случайный,find,3,0.0001437000
|
||||
BST,случайный,delete,3,0.0000741000
|
||||
BST,случайный,insert,4,0.0153703000
|
||||
BST,случайный,find,4,0.0001425000
|
||||
BST,случайный,delete,4,0.0000715000
|
||||
BST,случайный,insert,5,0.0153753000
|
||||
BST,случайный,find,5,0.0001455000
|
||||
BST,случайный,delete,5,0.0000723000
|
||||
BST,случайный,insert,среднее,0.0153678200
|
||||
BST,случайный,find,среднее,0.0001452200
|
||||
BST,случайный,delete,среднее,0.0000737800
|
||||
LinkedList,отсортированный,insert,1,2.5884851000
|
||||
LinkedList,отсортированный,find,1,0.0227221000
|
||||
LinkedList,отсортированный,delete,1,0.0111309000
|
||||
LinkedList,отсортированный,insert,2,2.5095731000
|
||||
LinkedList,отсортированный,find,2,0.0217208000
|
||||
LinkedList,отсортированный,delete,2,0.0107773000
|
||||
LinkedList,отсортированный,insert,3,2.5642096000
|
||||
LinkedList,отсортированный,find,3,0.0228242000
|
||||
LinkedList,отсортированный,delete,3,0.0115945000
|
||||
LinkedList,отсортированный,insert,4,2.7163021000
|
||||
LinkedList,отсортированный,find,4,0.0431456000
|
||||
LinkedList,отсортированный,delete,4,0.0136020000
|
||||
LinkedList,отсортированный,insert,5,2.6891794000
|
||||
LinkedList,отсортированный,find,5,0.0217679000
|
||||
LinkedList,отсортированный,delete,5,0.0106384000
|
||||
LinkedList,отсортированный,insert,среднее,2.6135498600
|
||||
LinkedList,отсортированный,find,среднее,0.0264361200
|
||||
LinkedList,отсортированный,delete,среднее,0.0115486200
|
||||
HashTable,отсортированный,insert,1,0.1524640000
|
||||
HashTable,отсортированный,find,1,0.0014973000
|
||||
HashTable,отсортированный,delete,1,0.0006991000
|
||||
HashTable,отсортированный,insert,2,0.1537592000
|
||||
HashTable,отсортированный,find,2,0.0012225000
|
||||
HashTable,отсортированный,delete,2,0.0006561000
|
||||
HashTable,отсортированный,insert,3,0.1555816000
|
||||
HashTable,отсортированный,find,3,0.0012080000
|
||||
HashTable,отсортированный,delete,3,0.0006472000
|
||||
HashTable,отсортированный,insert,4,0.1546417000
|
||||
HashTable,отсортированный,find,4,0.0015017000
|
||||
HashTable,отсортированный,delete,4,0.0007512000
|
||||
HashTable,отсортированный,insert,5,0.1531659000
|
||||
HashTable,отсортированный,find,5,0.0012219000
|
||||
HashTable,отсортированный,delete,5,0.0006493000
|
||||
HashTable,отсортированный,insert,среднее,0.1539224800
|
||||
HashTable,отсортированный,find,среднее,0.0013302800
|
||||
HashTable,отсортированный,delete,среднее,0.0006805800
|
||||
BST,отсортированный,insert,1,4.5025059000
|
||||
BST,отсортированный,find,1,0.0387267000
|
||||
BST,отсортированный,delete,1,0.0162161000
|
||||
BST,отсортированный,insert,2,4.6704081000
|
||||
BST,отсортированный,find,2,0.0435012000
|
||||
BST,отсортированный,delete,2,0.0203211000
|
||||
BST,отсортированный,insert,3,6.2192950000
|
||||
BST,отсортированный,find,3,0.0578654000
|
||||
BST,отсортированный,delete,3,0.0327529000
|
||||
BST,отсортированный,insert,4,4.7844525000
|
||||
BST,отсортированный,find,4,0.0380228000
|
||||
BST,отсортированный,delete,4,0.0159740000
|
||||
BST,отсортированный,insert,5,4.4861403000
|
||||
BST,отсортированный,find,5,0.0382484000
|
||||
BST,отсортированный,delete,5,0.0159402000
|
||||
BST,отсортированный,insert,среднее,4.9325603600
|
||||
BST,отсортированный,find,среднее,0.0432729000
|
||||
BST,отсортированный,delete,среднее,0.0202408600
|
||||
|
35
BolonkinNM/Task 1/utils.py
Normal file
35
BolonkinNM/Task 1/utils.py
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
import random
|
||||
from typing import List, Tuple
|
||||
|
||||
|
||||
Record = Tuple[str, str]
|
||||
|
||||
|
||||
def generate_records(n: int, repeat_names: bool = False, seed: int = 42) -> List[Record]:
|
||||
rng = random.Random(seed)
|
||||
records: List[Record] = []
|
||||
|
||||
if repeat_names:
|
||||
name_pool = [
|
||||
"User_Alex", "User_Bob", "User_Cat", "User_Dan", "User_Eva",
|
||||
"User_Fox", "User_Geo", "User_Hen", "User_Ira", "User_Leo",
|
||||
]
|
||||
for _ in range(n):
|
||||
name = rng.choice(name_pool)
|
||||
phone = f"{rng.randint(1000000000, 9999999999)}"
|
||||
records.append((name, phone))
|
||||
else:
|
||||
for i in range(n):
|
||||
name = f"User_{i:05d}"
|
||||
phone = f"{1000000000 + i}"
|
||||
records.append((name, phone))
|
||||
|
||||
return records
|
||||
|
||||
|
||||
def prepare_records_variants(records: List[Record], seed: int = 42):
|
||||
rng = random.Random(seed)
|
||||
records_shuffled = list(records)
|
||||
rng.shuffle(records_shuffled)
|
||||
records_sorted = sorted(records, key=lambda x: x[0])
|
||||
return records_shuffled, records_sorted
|
||||
BIN
BolonkinNM/Task 1/график.png
Normal file
BIN
BolonkinNM/Task 1/график.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 193 KiB |
BIN
BolonkinNM/Task 1/отчет.docx
Normal file
BIN
BolonkinNM/Task 1/отчет.docx
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user