Furst/Untitled.ipynb

810 lines
24 KiB
Plaintext
Raw Normal View History

2025-02-21 16:53:58 +00:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 110,
"id": "f939b9bd-d446-4af6-8035-5f31787811da",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'Rank': 'Рейтинг игры в списке', 'Image Link': 'Ссылка на изображение игры', 'Title': 'Название игры', 'Current': 'Текущее количество игроков', '24h Peak': 'Максимальное количество игроков за последние 24 часа', 'All-Time Peak': 'Максимальное количество игроков за все время', 'Genre': 'Жанр игры'}\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Rank</th>\n",
" <th>Title</th>\n",
" <th>Current</th>\n",
" <th>24h Peak</th>\n",
" <th>All-Time Peak</th>\n",
" <th>Genre</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>Counter-Strike 2</td>\n",
" <td>1015721</td>\n",
" <td>1276702</td>\n",
" <td>1818773</td>\n",
" <td>First-person Shooter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>Dota 2</td>\n",
" <td>702487</td>\n",
" <td>805624</td>\n",
" <td>1295114</td>\n",
" <td>MOBA (Multiplayer Online Battle Arena)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>Banana</td>\n",
" <td>409758</td>\n",
" <td>428540</td>\n",
" <td>917272</td>\n",
" <td>Unknown</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>PUBG: BATTLEGROUNDS</td>\n",
" <td>371000</td>\n",
" <td>688475</td>\n",
" <td>3257248</td>\n",
" <td>Battle Royale</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>ELDEN RING</td>\n",
" <td>294359</td>\n",
" <td>319707</td>\n",
" <td>953426</td>\n",
" <td>Action RPG</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>95</th>\n",
" <td>96</td>\n",
" <td>Soundpad</td>\n",
" <td>16028</td>\n",
" <td>17064</td>\n",
" <td>21920</td>\n",
" <td>Simulation / Tycoon</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96</th>\n",
" <td>97</td>\n",
" <td>Supermarket Simulator</td>\n",
" <td>15817</td>\n",
" <td>15875</td>\n",
" <td>51363</td>\n",
" <td>Vampire / Open world</td>\n",
" </tr>\n",
" <tr>\n",
" <th>97</th>\n",
" <td>98</td>\n",
" <td>V Rising</td>\n",
" <td>15803</td>\n",
" <td>16275</td>\n",
" <td>150645</td>\n",
" <td>Tactical Shooter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>98</th>\n",
" <td>99</td>\n",
" <td>Squad</td>\n",
" <td>15729</td>\n",
" <td>19288</td>\n",
" <td>35151</td>\n",
" <td>Grand Strategy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99</th>\n",
" <td>100</td>\n",
" <td>Victoria 3</td>\n",
" <td>15609</td>\n",
" <td>17598</td>\n",
" <td>70100</td>\n",
" <td>Unknown</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>100 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" Rank Title Current 24h Peak All-Time Peak \\\n",
"0 1 Counter-Strike 2 1015721 1276702 1818773 \n",
"1 2 Dota 2 702487 805624 1295114 \n",
"2 3 Banana 409758 428540 917272 \n",
"3 4 PUBG: BATTLEGROUNDS 371000 688475 3257248 \n",
"4 5 ELDEN RING 294359 319707 953426 \n",
".. ... ... ... ... ... \n",
"95 96 Soundpad 16028 17064 21920 \n",
"96 97 Supermarket Simulator 15817 15875 51363 \n",
"97 98 V Rising 15803 16275 150645 \n",
"98 99 Squad 15729 19288 35151 \n",
"99 100 Victoria 3 15609 17598 70100 \n",
"\n",
" Genre \n",
"0 First-person Shooter \n",
"1 MOBA (Multiplayer Online Battle Arena) \n",
"2 Unknown \n",
"3 Battle Royale \n",
"4 Action RPG \n",
".. ... \n",
"95 Simulation / Tycoon \n",
"96 Vampire / Open world \n",
"97 Tactical Shooter \n",
"98 Grand Strategy \n",
"99 Unknown \n",
"\n",
"[100 rows x 6 columns]"
]
},
"execution_count": 110,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"df = pd.read_csv('sample-dataset-a-b-modules.csv')\n",
"data_description = {\n",
" 'Rank': 'Рейтинг игры в списке',\n",
" 'Image Link': 'Ссылка на изображение игры',\n",
" 'Title': 'Название игры',\n",
" 'Current': 'Текущее количество игроков',\n",
" '24h Peak': 'Максимальное количество игроков за последние 24 часа',\n",
" 'All-Time Peak': 'Максимальное количество игроков за все время',\n",
" 'Genre': 'Жанр игры'\n",
"}\n",
"print(data_description)\n",
"df\n",
"\n",
"#Удаление столбцов\n",
"df = df.drop(columns = ['Image Link'])\n",
"\n",
"#ДЛЯ СТОЛБЦА \"All-Time Peak\"\n",
"# 1. Заменяем запятые на точки (для десятичных разделителей)\n",
"df['All-Time Peak'] = df['All-Time Peak'].str.replace(',', '', regex=False) # Убираем запятые как разделители тысяч\n",
"\n",
"# 2. Преобразуем строку в вещественное число, заменяя ошибки на NaN\n",
"df['All-Time Peak'] = pd.to_numeric(df['All-Time Peak'], errors='coerce')\n",
"\n",
"\n",
"#ДЛЯ СТОЛБЦА \"24h Peak\"\n",
"# 1. Заменяем запятые на точки (для десятичных разделителей)\n",
"df['24h Peak'] = df['24h Peak'].str.replace(',', '', regex=False) # Убираем запятые как разделители тысяч\n",
"\n",
"# 2. Преобразуем строку в вещественное число, заменяя ошибки на NaN\n",
"df['24h Peak'] = pd.to_numeric(df['24h Peak'], errors='coerce')\n",
"\n",
"\n",
"#ДЛЯ СТОЛБЦА \"Current\"\n",
"# 1. Заменяем запятые на точки (для десятичных разделителей)\n",
"df['Current'] = df['Current'].str.replace(',', '', regex=False) # Убираем запятые как разделители тысяч\n",
"\n",
"# 2. Преобразуем строку в вещественное число, заменяя ошибки на NaN\n",
"df['Current'] = pd.to_numeric(df['Current'], errors='coerce')\n",
"#df['All-Time Peak'] = df['All-Time Peak'].fillna(0).astype(int)\n",
"\n",
"#ДЛЯ СТОЛБЦА \"Rank\"\n",
"df['Rank'] = df['Rank'].fillna(0).astype(int)\n",
"\n",
"df.to_csv('1 sample-dataset-a-b-modules.csv')\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 112,
"id": "77314f8e-ad83-449b-b4f9-1a3611aef75d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Количество дубликатов в данных: 0\n",
"Количество дубликатов после удаления: 0\n"
]
}
],
"source": [
"# Проверка на дубликаты\n",
"duplicates = df.duplicated().sum()\n",
"print(f\"Количество дубликатов в данных: {duplicates}\")\n",
"\n",
"# Удаление дубликатов\n",
"df_cleaned = df.drop_duplicates()\n",
"\n",
"# Проверяем снова количество дубликатов после удаления\n",
"duplicates_after = df_cleaned.duplicated().sum()\n",
"print(f\"Количество дубликатов после удаления: {duplicates_after}\")"
]
},
{
"cell_type": "code",
"execution_count": 114,
"id": "d1ec44e1-c01f-4d09-8fa9-01bbdd5bce12",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Rank</th>\n",
" <th>Title</th>\n",
" <th>Current</th>\n",
" <th>24h Peak</th>\n",
" <th>All-Time Peak</th>\n",
" <th>Genre</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>Counter-Strike 2</td>\n",
" <td>1015721</td>\n",
" <td>1276702</td>\n",
" <td>1818773</td>\n",
" <td>First-person Shooter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>Dota 2</td>\n",
" <td>702487</td>\n",
" <td>805624</td>\n",
" <td>1295114</td>\n",
" <td>MOBA (Multiplayer Online Battle Arena)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>Banana</td>\n",
" <td>409758</td>\n",
" <td>428540</td>\n",
" <td>917272</td>\n",
" <td>Unknown</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>PUBG: BATTLEGROUNDS</td>\n",
" <td>371000</td>\n",
" <td>688475</td>\n",
" <td>3257248</td>\n",
" <td>Battle Royale</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>ELDEN RING</td>\n",
" <td>294359</td>\n",
" <td>319707</td>\n",
" <td>953426</td>\n",
" <td>Action RPG</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>95</th>\n",
" <td>96</td>\n",
" <td>Soundpad</td>\n",
" <td>16028</td>\n",
" <td>17064</td>\n",
" <td>21920</td>\n",
" <td>Simulation / Tycoon</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96</th>\n",
" <td>97</td>\n",
" <td>Supermarket Simulator</td>\n",
" <td>15817</td>\n",
" <td>15875</td>\n",
" <td>51363</td>\n",
" <td>Vampire / Open world</td>\n",
" </tr>\n",
" <tr>\n",
" <th>97</th>\n",
" <td>98</td>\n",
" <td>V Rising</td>\n",
" <td>15803</td>\n",
" <td>16275</td>\n",
" <td>150645</td>\n",
" <td>Tactical Shooter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>98</th>\n",
" <td>99</td>\n",
" <td>Squad</td>\n",
" <td>15729</td>\n",
" <td>19288</td>\n",
" <td>35151</td>\n",
" <td>Grand Strategy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99</th>\n",
" <td>100</td>\n",
" <td>Victoria 3</td>\n",
" <td>15609</td>\n",
" <td>17598</td>\n",
" <td>70100</td>\n",
" <td>Unknown</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>100 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" Rank Title Current 24h Peak All-Time Peak \\\n",
"0 1 Counter-Strike 2 1015721 1276702 1818773 \n",
"1 2 Dota 2 702487 805624 1295114 \n",
"2 3 Banana 409758 428540 917272 \n",
"3 4 PUBG: BATTLEGROUNDS 371000 688475 3257248 \n",
"4 5 ELDEN RING 294359 319707 953426 \n",
".. ... ... ... ... ... \n",
"95 96 Soundpad 16028 17064 21920 \n",
"96 97 Supermarket Simulator 15817 15875 51363 \n",
"97 98 V Rising 15803 16275 150645 \n",
"98 99 Squad 15729 19288 35151 \n",
"99 100 Victoria 3 15609 17598 70100 \n",
"\n",
" Genre \n",
"0 First-person Shooter \n",
"1 MOBA (Multiplayer Online Battle Arena) \n",
"2 Unknown \n",
"3 Battle Royale \n",
"4 Action RPG \n",
".. ... \n",
"95 Simulation / Tycoon \n",
"96 Vampire / Open world \n",
"97 Tactical Shooter \n",
"98 Grand Strategy \n",
"99 Unknown \n",
"\n",
"[100 rows x 6 columns]"
]
},
"execution_count": 114,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9b0bdd60-6e93-4a14-99f0-96db1631cece",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "83d7bf65-b5c4-4fc7-a7d6-a58c6f160a3a",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "72b289dc-df51-4f2e-bc10-641d23c3a327",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "396ef48c-4493-4375-938a-b549803a7d20",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "2590d246-0fa2-4e2e-9409-9aa319564158",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "d5fd17df-0047-4af7-95be-fc160d7068d7",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "297a4789-4b65-442e-bcf3-ef95e4ca8477",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "0f3bb39a-b716-41d5-820c-96b884a4a90c",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "5b5b15e2-09e7-440c-983c-343fd3191edc",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "a1d78eac-7c7d-454e-b067-cc8d24fdfb8d",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "6083b2df-0edd-47a8-9ff6-e914f460af2e",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "46652822-e590-4df7-80cd-d261f00a04ce",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "69ba20a3-b5ee-4ce4-a06c-751e62394f6a",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "495817d8-4938-4232-8514-35211b5a769a",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "8c9907de-2322-4917-bb3a-a2e133c8b689",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "c356d01e-c92d-40b4-8d6b-0ebf379b2355",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "150c726c-b32f-4b0c-8035-985795abd168",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "46bbe37e-1295-4ff9-a8e2-d69ef0ef956b",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "e4c0d6fb-31d3-4bb9-9afe-50ec280652b2",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "cd199f05-2ed0-4b21-ad63-6b6607b68e87",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "032ed932-d424-4bf4-b4b4-a8bd2b06debd",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "42771134-5c3a-413e-af0f-203a1bf775bc",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "6ecc7141-4b0e-44de-9dbd-98b79faeed9d",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "3dbeb49a-f727-49db-8843-ed11d5fb0a94",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "96ff95f6-7d95-48ae-a89f-038e9e2af234",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "c06ed0fa-8de0-4ad9-8e28-2757d6181355",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "cafd05ea-f766-4835-bad4-cd0e3e5f3e57",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "ba5e0481-00eb-45e0-b9bd-313ea85fea72",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "6d88fadd-304c-49c2-8229-7d6b226ea75c",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "9438dc61-19ae-4048-9de0-c1a52d19a767",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "1cc41d04-4166-4185-ab4b-95d47132ead9",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "352a3263-c705-4070-bf2f-87c625f66017",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 24,
"id": "145f13d9-0abd-4696-8a99-8d4e0eec119b",
"metadata": {},
"outputs": [],
"source": [
"#КОМАНДЫ\n",
"\n",
"#df['All-Time Peak'] = pd.to_numeric(df['All-Time Peak'], errors='coerce').astype('Int64')\n",
"\n",
"\n",
"\n",
"## Заменяем запятые на точки\n",
"#df['All-Time Peak'] = df['All-Time Peak'].str.replace(',', '.', regex=False)\n",
"\n",
"# Преобразуем тип данных в вещественные числа\n",
"##df['All-Time Peak'] = df['All-Time Peak'].astype(float)\n",
"\n",
"## Преобразуем строку в вещественное число, заменяя ошибки на NaN\n",
"#df['All-Time Peak'] = pd.to_numeric(df['All-Time Peak'], errors='coerce')\n",
"\n",
"## Проверим результат\n",
"#print(df)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b3aa306a-76a4-4b16-968b-1b2a9f30fa10",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "2d86e9cb-e8a5-4e20-8c5a-bc6486c17219",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "5a70383b-7dfc-473c-aa1c-67bdd5f079ee",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "1206df8e-784d-4d26-a647-d560d352ac69",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "c4f0eb2b-ef3b-4f57-9196-3fc4145666a4",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "f3be3dda-d8d4-4246-8bab-5630c9f890d7",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "d6282011-c71e-46d2-8f3f-4e42cf2bc6bf",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "6b700b0f-16cf-49e9-82f4-3c817e1da1f0",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:base] *",
"language": "python",
"name": "conda-base-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}