40 lines
939 B
Python
40 lines
939 B
Python
import seaborn as sns
|
||
import matplotlib.pyplot as plt
|
||
from scipy.stats import shapiro
|
||
import pandas as pd
|
||
|
||
data = pd.read_csv("cleared.csv")
|
||
|
||
columns_for_cor = [
|
||
"experience",
|
||
"employment",
|
||
"salary_min",
|
||
"salary_max",
|
||
"area_краснодар",
|
||
"area_москва",
|
||
"area_санкт-петербург"
|
||
]
|
||
|
||
data["area"] = data["area"].map(lambda area: str(area).lower())
|
||
|
||
data["area"] = data["area"].astype("string")
|
||
|
||
_, p_min = shapiro(data["salary_min"])
|
||
_, p_max = shapiro(data["salary_max"])
|
||
|
||
data.info()
|
||
|
||
sns.kdeplot(data[["salary_max","salary_min"]])
|
||
sns.boxplot(data[["salary_max","salary_min"]])
|
||
|
||
data = data.groupby("area").filter(lambda count: len(count) > 30)
|
||
|
||
data_dum = pd.get_dummies(data, columns=["area"])
|
||
print(data_dum.columns)
|
||
data_dum.info()
|
||
sns.heatmap(data_dum[columns_for_cor].corr())
|
||
plt.show()
|
||
print(data[["salary_min", "salary_max", "area"]].groupby("area").mean())
|
||
|
||
print(data.info())
|