ml_module1/extract.py

43 lines
709 B
Python
Raw Normal View History

2024-10-22 18:28:36 +00:00
import requests
import pandas as pd
params = {
"page": 0,
"per_page": 100,
"text": "bi",
"area": "113",
"currency": "RUR",
"date_from": "2024-07-22",
"only_with_salary": True
}
base_url = "https://api.hh.ru/vacancies"
result = requests.get(base_url, params).json()
vacancies = []
vacancies.extend(result["items"])
for i in range(1, result["pages"]):
params["page"] += 1
result = requests.get(base_url, params).json()
vacancies.extend(result["items"])
data = pd.DataFrame.from_dict(vacancies)
data.info()
columns = [
"area",
"salary",
"schedule",
"url",
"experience",
"employment"
]
data = data[columns]
data.to_csv("raw_dataset.csv")