2024-10-22 18:28:36 +00:00
|
|
|
|
import pandas as pd
|
|
|
|
|
import ast
|
|
|
|
|
import numpy as np
|
2024-10-23 19:12:41 +00:00
|
|
|
|
|
2024-10-22 18:28:36 +00:00
|
|
|
|
data = pd.read_csv("raw_dataset.csv")
|
|
|
|
|
columns = [
|
|
|
|
|
"experience",
|
|
|
|
|
"employment",
|
|
|
|
|
"salary_min",
|
|
|
|
|
"salary_max",
|
2024-10-23 19:12:41 +00:00
|
|
|
|
"area"
|
2024-10-22 18:28:36 +00:00
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
columns_for_cor = [
|
|
|
|
|
"experience",
|
|
|
|
|
"employment",
|
|
|
|
|
"salary_min",
|
|
|
|
|
"salary_max",
|
|
|
|
|
"area_Краснодар",
|
|
|
|
|
"area_Москва",
|
|
|
|
|
"area_Санкт-Петербург"
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
schedule = {
|
|
|
|
|
"Нет опыта": 0,
|
|
|
|
|
"От 1 года до 3 лет": 1,
|
|
|
|
|
"От 3 до 6 лет": 2,
|
|
|
|
|
"Более 6 лет": 3
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
employements = {
|
|
|
|
|
|
|
|
|
|
"Полная занятость": 0,
|
|
|
|
|
"Частичная занятость": 1,
|
|
|
|
|
"Проектная работа": 2,
|
|
|
|
|
"Волонтерство": 3,
|
|
|
|
|
"Стажировка": 4
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
data["area"] = data["area"].map(lambda city: ast.literal_eval(city)["name"])
|
|
|
|
|
data["experience"] = data["experience"].map(lambda experience: ast.literal_eval(experience)["name"])
|
|
|
|
|
data["employment"] = data["employment"].map(lambda employment: ast.literal_eval(employment)["name"])
|
|
|
|
|
|
|
|
|
|
data["salary_min"] = data["salary"].map(
|
|
|
|
|
lambda salary_min: ast.literal_eval(salary_min)["from"]
|
|
|
|
|
if ast.literal_eval(salary_min)["from"] is not None
|
|
|
|
|
else ast.literal_eval(salary_min)["to"]
|
|
|
|
|
)
|
|
|
|
|
data["salary_max"] = data["salary"].map(
|
|
|
|
|
lambda salary_max: ast.literal_eval(salary_max)["to"]
|
|
|
|
|
if ast.literal_eval(salary_max)["to"] is not None
|
|
|
|
|
else ast.literal_eval(salary_max)["from"]
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
data["experience"] = data["experience"].map(lambda experience: schedule[experience])
|
|
|
|
|
data["employment"] = data["employment"].map(lambda employment: employements[employment])
|
|
|
|
|
|
2024-10-23 19:12:41 +00:00
|
|
|
|
data[columns].to_csv("cleared.csv")
|
2024-10-22 18:28:36 +00:00
|
|
|
|
|