init
This commit is contained in:
commit
0c6ad3d920
3
.idea/.gitignore
vendored
Normal file
3
.idea/.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
# Default ignored files
|
||||||
|
/shelf/
|
||||||
|
/workspace.xml
|
6
.idea/inspectionProfiles/profiles_settings.xml
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<component name="InspectionProjectProfileManager">
|
||||||
|
<settings>
|
||||||
|
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||||
|
<version value="1.0" />
|
||||||
|
</settings>
|
||||||
|
</component>
|
7
.idea/misc.xml
Normal file
7
.idea/misc.xml
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="Black">
|
||||||
|
<option name="sdkName" value="Python 3.12 (mlmodule1)" />
|
||||||
|
</component>
|
||||||
|
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12 (mlmodule1)" project-jdk-type="Python SDK" />
|
||||||
|
</project>
|
10
.idea/mlmodule1.iml
Normal file
10
.idea/mlmodule1.iml
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module type="PYTHON_MODULE" version="4">
|
||||||
|
<component name="NewModuleRootManager">
|
||||||
|
<content url="file://$MODULE_DIR$">
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/.venv" />
|
||||||
|
</content>
|
||||||
|
<orderEntry type="inheritedJdk" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
</component>
|
||||||
|
</module>
|
8
.idea/modules.xml
Normal file
8
.idea/modules.xml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectModuleManager">
|
||||||
|
<modules>
|
||||||
|
<module fileurl="file://$PROJECT_DIR$/.idea/mlmodule1.iml" filepath="$PROJECT_DIR$/.idea/mlmodule1.iml" />
|
||||||
|
</modules>
|
||||||
|
</component>
|
||||||
|
</project>
|
6
.idea/vcs.xml
Normal file
6
.idea/vcs.xml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="VcsDirectoryMappings">
|
||||||
|
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||||
|
</component>
|
||||||
|
</project>
|
70
clearing_data.py
Normal file
70
clearing_data.py
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import ast
|
||||||
|
import numpy as np
|
||||||
|
import seaborn as sns
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
data = pd.read_csv("raw_dataset.csv")
|
||||||
|
columns = [
|
||||||
|
"experience",
|
||||||
|
"employment",
|
||||||
|
"salary_min",
|
||||||
|
"salary_max",
|
||||||
|
]
|
||||||
|
|
||||||
|
columns_for_cor = [
|
||||||
|
"experience",
|
||||||
|
"employment",
|
||||||
|
"salary_min",
|
||||||
|
"salary_max",
|
||||||
|
"area_Краснодар",
|
||||||
|
"area_Москва",
|
||||||
|
"area_Санкт-Петербург"
|
||||||
|
]
|
||||||
|
|
||||||
|
schedule = {
|
||||||
|
"Нет опыта": 0,
|
||||||
|
"От 1 года до 3 лет": 1,
|
||||||
|
"От 3 до 6 лет": 2,
|
||||||
|
"Более 6 лет": 3
|
||||||
|
}
|
||||||
|
|
||||||
|
employements = {
|
||||||
|
|
||||||
|
"Полная занятость": 0,
|
||||||
|
"Частичная занятость": 1,
|
||||||
|
"Проектная работа": 2,
|
||||||
|
"Волонтерство": 3,
|
||||||
|
"Стажировка": 4
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
data["area"] = data["area"].map(lambda city: ast.literal_eval(city)["name"])
|
||||||
|
data["experience"] = data["experience"].map(lambda experience: ast.literal_eval(experience)["name"])
|
||||||
|
data["employment"] = data["employment"].map(lambda employment: ast.literal_eval(employment)["name"])
|
||||||
|
|
||||||
|
data["salary_min"] = data["salary"].map(
|
||||||
|
lambda salary_min: ast.literal_eval(salary_min)["from"]
|
||||||
|
if ast.literal_eval(salary_min)["from"] is not None
|
||||||
|
else ast.literal_eval(salary_min)["to"]
|
||||||
|
)
|
||||||
|
data["salary_max"] = data["salary"].map(
|
||||||
|
lambda salary_max: ast.literal_eval(salary_max)["to"]
|
||||||
|
if ast.literal_eval(salary_max)["to"] is not None
|
||||||
|
else ast.literal_eval(salary_max)["from"]
|
||||||
|
)
|
||||||
|
|
||||||
|
data["experience"] = data["experience"].map(lambda experience: schedule[experience])
|
||||||
|
data["employment"] = data["employment"].map(lambda employment: employements[employment])
|
||||||
|
|
||||||
|
data = data.groupby("area").filter(lambda values: len(values) > 30)
|
||||||
|
|
||||||
|
data = pd.get_dummies(data, columns=["area"])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
data_cor = data[columns_for_cor].corr()
|
||||||
|
|
||||||
|
sns.heatmap(data_cor)
|
||||||
|
plt.show()
|
||||||
|
print(data_cor)
|
43
extract.py
Normal file
43
extract.py
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
import requests
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
params = {
|
||||||
|
"page": 0,
|
||||||
|
"per_page": 100,
|
||||||
|
"text": "bi",
|
||||||
|
"area": "113",
|
||||||
|
"currency": "RUR",
|
||||||
|
"date_from": "2024-07-22",
|
||||||
|
"only_with_salary": True
|
||||||
|
}
|
||||||
|
|
||||||
|
base_url = "https://api.hh.ru/vacancies"
|
||||||
|
|
||||||
|
result = requests.get(base_url, params).json()
|
||||||
|
|
||||||
|
vacancies = []
|
||||||
|
vacancies.extend(result["items"])
|
||||||
|
|
||||||
|
for i in range(1, result["pages"]):
|
||||||
|
params["page"] += 1
|
||||||
|
result = requests.get(base_url, params).json()
|
||||||
|
vacancies.extend(result["items"])
|
||||||
|
|
||||||
|
data = pd.DataFrame.from_dict(vacancies)
|
||||||
|
|
||||||
|
data.info()
|
||||||
|
|
||||||
|
columns = [
|
||||||
|
"area",
|
||||||
|
"salary",
|
||||||
|
"schedule",
|
||||||
|
"url",
|
||||||
|
"experience",
|
||||||
|
"employment"
|
||||||
|
]
|
||||||
|
|
||||||
|
data = data[columns]
|
||||||
|
|
||||||
|
|
||||||
|
data.to_csv("raw_dataset.csv")
|
1238
raw_dataset.csv
Normal file
1238
raw_dataset.csv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user