Furst/Untitled.ipynb

810 lines
24 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 110,
"id": "f939b9bd-d446-4af6-8035-5f31787811da",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'Rank': 'Рейтинг игры в списке', 'Image Link': 'Ссылка на изображение игры', 'Title': 'Название игры', 'Current': 'Текущее количество игроков', '24h Peak': 'Максимальное количество игроков за последние 24 часа', 'All-Time Peak': 'Максимальное количество игроков за все время', 'Genre': 'Жанр игры'}\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Rank</th>\n",
" <th>Title</th>\n",
" <th>Current</th>\n",
" <th>24h Peak</th>\n",
" <th>All-Time Peak</th>\n",
" <th>Genre</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>Counter-Strike 2</td>\n",
" <td>1015721</td>\n",
" <td>1276702</td>\n",
" <td>1818773</td>\n",
" <td>First-person Shooter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>Dota 2</td>\n",
" <td>702487</td>\n",
" <td>805624</td>\n",
" <td>1295114</td>\n",
" <td>MOBA (Multiplayer Online Battle Arena)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>Banana</td>\n",
" <td>409758</td>\n",
" <td>428540</td>\n",
" <td>917272</td>\n",
" <td>Unknown</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>PUBG: BATTLEGROUNDS</td>\n",
" <td>371000</td>\n",
" <td>688475</td>\n",
" <td>3257248</td>\n",
" <td>Battle Royale</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>ELDEN RING</td>\n",
" <td>294359</td>\n",
" <td>319707</td>\n",
" <td>953426</td>\n",
" <td>Action RPG</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>95</th>\n",
" <td>96</td>\n",
" <td>Soundpad</td>\n",
" <td>16028</td>\n",
" <td>17064</td>\n",
" <td>21920</td>\n",
" <td>Simulation / Tycoon</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96</th>\n",
" <td>97</td>\n",
" <td>Supermarket Simulator</td>\n",
" <td>15817</td>\n",
" <td>15875</td>\n",
" <td>51363</td>\n",
" <td>Vampire / Open world</td>\n",
" </tr>\n",
" <tr>\n",
" <th>97</th>\n",
" <td>98</td>\n",
" <td>V Rising</td>\n",
" <td>15803</td>\n",
" <td>16275</td>\n",
" <td>150645</td>\n",
" <td>Tactical Shooter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>98</th>\n",
" <td>99</td>\n",
" <td>Squad</td>\n",
" <td>15729</td>\n",
" <td>19288</td>\n",
" <td>35151</td>\n",
" <td>Grand Strategy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99</th>\n",
" <td>100</td>\n",
" <td>Victoria 3</td>\n",
" <td>15609</td>\n",
" <td>17598</td>\n",
" <td>70100</td>\n",
" <td>Unknown</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>100 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" Rank Title Current 24h Peak All-Time Peak \\\n",
"0 1 Counter-Strike 2 1015721 1276702 1818773 \n",
"1 2 Dota 2 702487 805624 1295114 \n",
"2 3 Banana 409758 428540 917272 \n",
"3 4 PUBG: BATTLEGROUNDS 371000 688475 3257248 \n",
"4 5 ELDEN RING 294359 319707 953426 \n",
".. ... ... ... ... ... \n",
"95 96 Soundpad 16028 17064 21920 \n",
"96 97 Supermarket Simulator 15817 15875 51363 \n",
"97 98 V Rising 15803 16275 150645 \n",
"98 99 Squad 15729 19288 35151 \n",
"99 100 Victoria 3 15609 17598 70100 \n",
"\n",
" Genre \n",
"0 First-person Shooter \n",
"1 MOBA (Multiplayer Online Battle Arena) \n",
"2 Unknown \n",
"3 Battle Royale \n",
"4 Action RPG \n",
".. ... \n",
"95 Simulation / Tycoon \n",
"96 Vampire / Open world \n",
"97 Tactical Shooter \n",
"98 Grand Strategy \n",
"99 Unknown \n",
"\n",
"[100 rows x 6 columns]"
]
},
"execution_count": 110,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"df = pd.read_csv('sample-dataset-a-b-modules.csv')\n",
"data_description = {\n",
" 'Rank': 'Рейтинг игры в списке',\n",
" 'Image Link': 'Ссылка на изображение игры',\n",
" 'Title': 'Название игры',\n",
" 'Current': 'Текущее количество игроков',\n",
" '24h Peak': 'Максимальное количество игроков за последние 24 часа',\n",
" 'All-Time Peak': 'Максимальное количество игроков за все время',\n",
" 'Genre': 'Жанр игры'\n",
"}\n",
"print(data_description)\n",
"df\n",
"\n",
"#Удаление столбцов\n",
"df = df.drop(columns = ['Image Link'])\n",
"\n",
"#ДЛЯ СТОЛБЦА \"All-Time Peak\"\n",
"# 1. Заменяем запятые на точки (для десятичных разделителей)\n",
"df['All-Time Peak'] = df['All-Time Peak'].str.replace(',', '', regex=False) # Убираем запятые как разделители тысяч\n",
"\n",
"# 2. Преобразуем строку в вещественное число, заменяя ошибки на NaN\n",
"df['All-Time Peak'] = pd.to_numeric(df['All-Time Peak'], errors='coerce')\n",
"\n",
"\n",
"#ДЛЯ СТОЛБЦА \"24h Peak\"\n",
"# 1. Заменяем запятые на точки (для десятичных разделителей)\n",
"df['24h Peak'] = df['24h Peak'].str.replace(',', '', regex=False) # Убираем запятые как разделители тысяч\n",
"\n",
"# 2. Преобразуем строку в вещественное число, заменяя ошибки на NaN\n",
"df['24h Peak'] = pd.to_numeric(df['24h Peak'], errors='coerce')\n",
"\n",
"\n",
"#ДЛЯ СТОЛБЦА \"Current\"\n",
"# 1. Заменяем запятые на точки (для десятичных разделителей)\n",
"df['Current'] = df['Current'].str.replace(',', '', regex=False) # Убираем запятые как разделители тысяч\n",
"\n",
"# 2. Преобразуем строку в вещественное число, заменяя ошибки на NaN\n",
"df['Current'] = pd.to_numeric(df['Current'], errors='coerce')\n",
"#df['All-Time Peak'] = df['All-Time Peak'].fillna(0).astype(int)\n",
"\n",
"#ДЛЯ СТОЛБЦА \"Rank\"\n",
"df['Rank'] = df['Rank'].fillna(0).astype(int)\n",
"\n",
"df.to_csv('1 sample-dataset-a-b-modules.csv')\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 112,
"id": "77314f8e-ad83-449b-b4f9-1a3611aef75d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Количество дубликатов в данных: 0\n",
"Количество дубликатов после удаления: 0\n"
]
}
],
"source": [
"# Проверка на дубликаты\n",
"duplicates = df.duplicated().sum()\n",
"print(f\"Количество дубликатов в данных: {duplicates}\")\n",
"\n",
"# Удаление дубликатов\n",
"df_cleaned = df.drop_duplicates()\n",
"\n",
"# Проверяем снова количество дубликатов после удаления\n",
"duplicates_after = df_cleaned.duplicated().sum()\n",
"print(f\"Количество дубликатов после удаления: {duplicates_after}\")"
]
},
{
"cell_type": "code",
"execution_count": 114,
"id": "d1ec44e1-c01f-4d09-8fa9-01bbdd5bce12",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Rank</th>\n",
" <th>Title</th>\n",
" <th>Current</th>\n",
" <th>24h Peak</th>\n",
" <th>All-Time Peak</th>\n",
" <th>Genre</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>Counter-Strike 2</td>\n",
" <td>1015721</td>\n",
" <td>1276702</td>\n",
" <td>1818773</td>\n",
" <td>First-person Shooter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>Dota 2</td>\n",
" <td>702487</td>\n",
" <td>805624</td>\n",
" <td>1295114</td>\n",
" <td>MOBA (Multiplayer Online Battle Arena)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>Banana</td>\n",
" <td>409758</td>\n",
" <td>428540</td>\n",
" <td>917272</td>\n",
" <td>Unknown</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>PUBG: BATTLEGROUNDS</td>\n",
" <td>371000</td>\n",
" <td>688475</td>\n",
" <td>3257248</td>\n",
" <td>Battle Royale</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>ELDEN RING</td>\n",
" <td>294359</td>\n",
" <td>319707</td>\n",
" <td>953426</td>\n",
" <td>Action RPG</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>95</th>\n",
" <td>96</td>\n",
" <td>Soundpad</td>\n",
" <td>16028</td>\n",
" <td>17064</td>\n",
" <td>21920</td>\n",
" <td>Simulation / Tycoon</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96</th>\n",
" <td>97</td>\n",
" <td>Supermarket Simulator</td>\n",
" <td>15817</td>\n",
" <td>15875</td>\n",
" <td>51363</td>\n",
" <td>Vampire / Open world</td>\n",
" </tr>\n",
" <tr>\n",
" <th>97</th>\n",
" <td>98</td>\n",
" <td>V Rising</td>\n",
" <td>15803</td>\n",
" <td>16275</td>\n",
" <td>150645</td>\n",
" <td>Tactical Shooter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>98</th>\n",
" <td>99</td>\n",
" <td>Squad</td>\n",
" <td>15729</td>\n",
" <td>19288</td>\n",
" <td>35151</td>\n",
" <td>Grand Strategy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99</th>\n",
" <td>100</td>\n",
" <td>Victoria 3</td>\n",
" <td>15609</td>\n",
" <td>17598</td>\n",
" <td>70100</td>\n",
" <td>Unknown</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>100 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" Rank Title Current 24h Peak All-Time Peak \\\n",
"0 1 Counter-Strike 2 1015721 1276702 1818773 \n",
"1 2 Dota 2 702487 805624 1295114 \n",
"2 3 Banana 409758 428540 917272 \n",
"3 4 PUBG: BATTLEGROUNDS 371000 688475 3257248 \n",
"4 5 ELDEN RING 294359 319707 953426 \n",
".. ... ... ... ... ... \n",
"95 96 Soundpad 16028 17064 21920 \n",
"96 97 Supermarket Simulator 15817 15875 51363 \n",
"97 98 V Rising 15803 16275 150645 \n",
"98 99 Squad 15729 19288 35151 \n",
"99 100 Victoria 3 15609 17598 70100 \n",
"\n",
" Genre \n",
"0 First-person Shooter \n",
"1 MOBA (Multiplayer Online Battle Arena) \n",
"2 Unknown \n",
"3 Battle Royale \n",
"4 Action RPG \n",
".. ... \n",
"95 Simulation / Tycoon \n",
"96 Vampire / Open world \n",
"97 Tactical Shooter \n",
"98 Grand Strategy \n",
"99 Unknown \n",
"\n",
"[100 rows x 6 columns]"
]
},
"execution_count": 114,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9b0bdd60-6e93-4a14-99f0-96db1631cece",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "83d7bf65-b5c4-4fc7-a7d6-a58c6f160a3a",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "72b289dc-df51-4f2e-bc10-641d23c3a327",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "396ef48c-4493-4375-938a-b549803a7d20",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "2590d246-0fa2-4e2e-9409-9aa319564158",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "d5fd17df-0047-4af7-95be-fc160d7068d7",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "297a4789-4b65-442e-bcf3-ef95e4ca8477",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "0f3bb39a-b716-41d5-820c-96b884a4a90c",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "5b5b15e2-09e7-440c-983c-343fd3191edc",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "a1d78eac-7c7d-454e-b067-cc8d24fdfb8d",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "6083b2df-0edd-47a8-9ff6-e914f460af2e",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "46652822-e590-4df7-80cd-d261f00a04ce",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "69ba20a3-b5ee-4ce4-a06c-751e62394f6a",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "495817d8-4938-4232-8514-35211b5a769a",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "8c9907de-2322-4917-bb3a-a2e133c8b689",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "c356d01e-c92d-40b4-8d6b-0ebf379b2355",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "150c726c-b32f-4b0c-8035-985795abd168",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "46bbe37e-1295-4ff9-a8e2-d69ef0ef956b",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "e4c0d6fb-31d3-4bb9-9afe-50ec280652b2",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "cd199f05-2ed0-4b21-ad63-6b6607b68e87",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "032ed932-d424-4bf4-b4b4-a8bd2b06debd",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "42771134-5c3a-413e-af0f-203a1bf775bc",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "6ecc7141-4b0e-44de-9dbd-98b79faeed9d",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "3dbeb49a-f727-49db-8843-ed11d5fb0a94",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "96ff95f6-7d95-48ae-a89f-038e9e2af234",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "c06ed0fa-8de0-4ad9-8e28-2757d6181355",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "cafd05ea-f766-4835-bad4-cd0e3e5f3e57",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "ba5e0481-00eb-45e0-b9bd-313ea85fea72",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "6d88fadd-304c-49c2-8229-7d6b226ea75c",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "9438dc61-19ae-4048-9de0-c1a52d19a767",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "1cc41d04-4166-4185-ab4b-95d47132ead9",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "352a3263-c705-4070-bf2f-87c625f66017",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 24,
"id": "145f13d9-0abd-4696-8a99-8d4e0eec119b",
"metadata": {},
"outputs": [],
"source": [
"#КОМАНДЫ\n",
"\n",
"#df['All-Time Peak'] = pd.to_numeric(df['All-Time Peak'], errors='coerce').astype('Int64')\n",
"\n",
"\n",
"\n",
"## Заменяем запятые на точки\n",
"#df['All-Time Peak'] = df['All-Time Peak'].str.replace(',', '.', regex=False)\n",
"\n",
"# Преобразуем тип данных в вещественные числа\n",
"##df['All-Time Peak'] = df['All-Time Peak'].astype(float)\n",
"\n",
"## Преобразуем строку в вещественное число, заменяя ошибки на NaN\n",
"#df['All-Time Peak'] = pd.to_numeric(df['All-Time Peak'], errors='coerce')\n",
"\n",
"## Проверим результат\n",
"#print(df)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b3aa306a-76a4-4b16-968b-1b2a9f30fa10",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "2d86e9cb-e8a5-4e20-8c5a-bc6486c17219",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "5a70383b-7dfc-473c-aa1c-67bdd5f079ee",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "1206df8e-784d-4d26-a647-d560d352ac69",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "c4f0eb2b-ef3b-4f57-9196-3fc4145666a4",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "f3be3dda-d8d4-4246-8bab-5630c9f890d7",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "d6282011-c71e-46d2-8f3f-4e42cf2bc6bf",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "6b700b0f-16cf-49e9-82f4-3c817e1da1f0",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:base] *",
"language": "python",
"name": "conda-base-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}