{ "cells": [ { "cell_type": "code", "execution_count": 110, "id": "f939b9bd-d446-4af6-8035-5f31787811da", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'Rank': 'Рейтинг игры в списке', 'Image Link': 'Ссылка на изображение игры', 'Title': 'Название игры', 'Current': 'Текущее количество игроков', '24h Peak': 'Максимальное количество игроков за последние 24 часа', 'All-Time Peak': 'Максимальное количество игроков за все время', 'Genre': 'Жанр игры'}\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RankTitleCurrent24h PeakAll-Time PeakGenre
01Counter-Strike 2101572112767021818773First-person Shooter
12Dota 27024878056241295114MOBA (Multiplayer Online Battle Arena)
23Banana409758428540917272Unknown
34PUBG: BATTLEGROUNDS3710006884753257248Battle Royale
45ELDEN RING294359319707953426Action RPG
.....................
9596Soundpad160281706421920Simulation / Tycoon
9697Supermarket Simulator158171587551363Vampire / Open world
9798V Rising1580316275150645Tactical Shooter
9899Squad157291928835151Grand Strategy
99100Victoria 3156091759870100Unknown
\n", "

100 rows × 6 columns

\n", "
" ], "text/plain": [ " Rank Title Current 24h Peak All-Time Peak \\\n", "0 1 Counter-Strike 2 1015721 1276702 1818773 \n", "1 2 Dota 2 702487 805624 1295114 \n", "2 3 Banana 409758 428540 917272 \n", "3 4 PUBG: BATTLEGROUNDS 371000 688475 3257248 \n", "4 5 ELDEN RING 294359 319707 953426 \n", ".. ... ... ... ... ... \n", "95 96 Soundpad 16028 17064 21920 \n", "96 97 Supermarket Simulator 15817 15875 51363 \n", "97 98 V Rising 15803 16275 150645 \n", "98 99 Squad 15729 19288 35151 \n", "99 100 Victoria 3 15609 17598 70100 \n", "\n", " Genre \n", "0 First-person Shooter \n", "1 MOBA (Multiplayer Online Battle Arena) \n", "2 Unknown \n", "3 Battle Royale \n", "4 Action RPG \n", ".. ... \n", "95 Simulation / Tycoon \n", "96 Vampire / Open world \n", "97 Tactical Shooter \n", "98 Grand Strategy \n", "99 Unknown \n", "\n", "[100 rows x 6 columns]" ] }, "execution_count": 110, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "df = pd.read_csv('sample-dataset-a-b-modules.csv')\n", "data_description = {\n", " 'Rank': 'Рейтинг игры в списке',\n", " 'Image Link': 'Ссылка на изображение игры',\n", " 'Title': 'Название игры',\n", " 'Current': 'Текущее количество игроков',\n", " '24h Peak': 'Максимальное количество игроков за последние 24 часа',\n", " 'All-Time Peak': 'Максимальное количество игроков за все время',\n", " 'Genre': 'Жанр игры'\n", "}\n", "print(data_description)\n", "df\n", "\n", "#Удаление столбцов\n", "df = df.drop(columns = ['Image Link'])\n", "\n", "#ДЛЯ СТОЛБЦА \"All-Time Peak\"\n", "# 1. Заменяем запятые на точки (для десятичных разделителей)\n", "df['All-Time Peak'] = df['All-Time Peak'].str.replace(',', '', regex=False) # Убираем запятые как разделители тысяч\n", "\n", "# 2. Преобразуем строку в вещественное число, заменяя ошибки на NaN\n", "df['All-Time Peak'] = pd.to_numeric(df['All-Time Peak'], errors='coerce')\n", "\n", "\n", "#ДЛЯ СТОЛБЦА \"24h Peak\"\n", "# 1. Заменяем запятые на точки (для десятичных разделителей)\n", "df['24h Peak'] = df['24h Peak'].str.replace(',', '', regex=False) # Убираем запятые как разделители тысяч\n", "\n", "# 2. Преобразуем строку в вещественное число, заменяя ошибки на NaN\n", "df['24h Peak'] = pd.to_numeric(df['24h Peak'], errors='coerce')\n", "\n", "\n", "#ДЛЯ СТОЛБЦА \"Current\"\n", "# 1. Заменяем запятые на точки (для десятичных разделителей)\n", "df['Current'] = df['Current'].str.replace(',', '', regex=False) # Убираем запятые как разделители тысяч\n", "\n", "# 2. Преобразуем строку в вещественное число, заменяя ошибки на NaN\n", "df['Current'] = pd.to_numeric(df['Current'], errors='coerce')\n", "#df['All-Time Peak'] = df['All-Time Peak'].fillna(0).astype(int)\n", "\n", "#ДЛЯ СТОЛБЦА \"Rank\"\n", "df['Rank'] = df['Rank'].fillna(0).astype(int)\n", "\n", "df.to_csv('1 sample-dataset-a-b-modules.csv')\n", "df" ] }, { "cell_type": "code", "execution_count": 112, "id": "77314f8e-ad83-449b-b4f9-1a3611aef75d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Количество дубликатов в данных: 0\n", "Количество дубликатов после удаления: 0\n" ] } ], "source": [ "# Проверка на дубликаты\n", "duplicates = df.duplicated().sum()\n", "print(f\"Количество дубликатов в данных: {duplicates}\")\n", "\n", "# Удаление дубликатов\n", "df_cleaned = df.drop_duplicates()\n", "\n", "# Проверяем снова количество дубликатов после удаления\n", "duplicates_after = df_cleaned.duplicated().sum()\n", "print(f\"Количество дубликатов после удаления: {duplicates_after}\")" ] }, { "cell_type": "code", "execution_count": 114, "id": "d1ec44e1-c01f-4d09-8fa9-01bbdd5bce12", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RankTitleCurrent24h PeakAll-Time PeakGenre
01Counter-Strike 2101572112767021818773First-person Shooter
12Dota 27024878056241295114MOBA (Multiplayer Online Battle Arena)
23Banana409758428540917272Unknown
34PUBG: BATTLEGROUNDS3710006884753257248Battle Royale
45ELDEN RING294359319707953426Action RPG
.....................
9596Soundpad160281706421920Simulation / Tycoon
9697Supermarket Simulator158171587551363Vampire / Open world
9798V Rising1580316275150645Tactical Shooter
9899Squad157291928835151Grand Strategy
99100Victoria 3156091759870100Unknown
\n", "

100 rows × 6 columns

\n", "
" ], "text/plain": [ " Rank Title Current 24h Peak All-Time Peak \\\n", "0 1 Counter-Strike 2 1015721 1276702 1818773 \n", "1 2 Dota 2 702487 805624 1295114 \n", "2 3 Banana 409758 428540 917272 \n", "3 4 PUBG: BATTLEGROUNDS 371000 688475 3257248 \n", "4 5 ELDEN RING 294359 319707 953426 \n", ".. ... ... ... ... ... \n", "95 96 Soundpad 16028 17064 21920 \n", "96 97 Supermarket Simulator 15817 15875 51363 \n", "97 98 V Rising 15803 16275 150645 \n", "98 99 Squad 15729 19288 35151 \n", "99 100 Victoria 3 15609 17598 70100 \n", "\n", " Genre \n", "0 First-person Shooter \n", "1 MOBA (Multiplayer Online Battle Arena) \n", "2 Unknown \n", "3 Battle Royale \n", "4 Action RPG \n", ".. ... \n", "95 Simulation / Tycoon \n", "96 Vampire / Open world \n", "97 Tactical Shooter \n", "98 Grand Strategy \n", "99 Unknown \n", "\n", "[100 rows x 6 columns]" ] }, "execution_count": 114, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": null, "id": "9b0bdd60-6e93-4a14-99f0-96db1631cece", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "83d7bf65-b5c4-4fc7-a7d6-a58c6f160a3a", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "72b289dc-df51-4f2e-bc10-641d23c3a327", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "396ef48c-4493-4375-938a-b549803a7d20", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "2590d246-0fa2-4e2e-9409-9aa319564158", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "d5fd17df-0047-4af7-95be-fc160d7068d7", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "297a4789-4b65-442e-bcf3-ef95e4ca8477", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "0f3bb39a-b716-41d5-820c-96b884a4a90c", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "5b5b15e2-09e7-440c-983c-343fd3191edc", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "a1d78eac-7c7d-454e-b067-cc8d24fdfb8d", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "6083b2df-0edd-47a8-9ff6-e914f460af2e", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "46652822-e590-4df7-80cd-d261f00a04ce", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "69ba20a3-b5ee-4ce4-a06c-751e62394f6a", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "495817d8-4938-4232-8514-35211b5a769a", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "8c9907de-2322-4917-bb3a-a2e133c8b689", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "c356d01e-c92d-40b4-8d6b-0ebf379b2355", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "150c726c-b32f-4b0c-8035-985795abd168", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "46bbe37e-1295-4ff9-a8e2-d69ef0ef956b", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "e4c0d6fb-31d3-4bb9-9afe-50ec280652b2", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "cd199f05-2ed0-4b21-ad63-6b6607b68e87", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "032ed932-d424-4bf4-b4b4-a8bd2b06debd", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "42771134-5c3a-413e-af0f-203a1bf775bc", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "6ecc7141-4b0e-44de-9dbd-98b79faeed9d", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "3dbeb49a-f727-49db-8843-ed11d5fb0a94", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "96ff95f6-7d95-48ae-a89f-038e9e2af234", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "c06ed0fa-8de0-4ad9-8e28-2757d6181355", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "cafd05ea-f766-4835-bad4-cd0e3e5f3e57", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "ba5e0481-00eb-45e0-b9bd-313ea85fea72", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "6d88fadd-304c-49c2-8229-7d6b226ea75c", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "9438dc61-19ae-4048-9de0-c1a52d19a767", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "1cc41d04-4166-4185-ab4b-95d47132ead9", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "352a3263-c705-4070-bf2f-87c625f66017", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 24, "id": "145f13d9-0abd-4696-8a99-8d4e0eec119b", "metadata": {}, "outputs": [], "source": [ "#КОМАНДЫ\n", "\n", "#df['All-Time Peak'] = pd.to_numeric(df['All-Time Peak'], errors='coerce').astype('Int64')\n", "\n", "\n", "\n", "## Заменяем запятые на точки\n", "#df['All-Time Peak'] = df['All-Time Peak'].str.replace(',', '.', regex=False)\n", "\n", "# Преобразуем тип данных в вещественные числа\n", "##df['All-Time Peak'] = df['All-Time Peak'].astype(float)\n", "\n", "## Преобразуем строку в вещественное число, заменяя ошибки на NaN\n", "#df['All-Time Peak'] = pd.to_numeric(df['All-Time Peak'], errors='coerce')\n", "\n", "## Проверим результат\n", "#print(df)" ] }, { "cell_type": "code", "execution_count": null, "id": "b3aa306a-76a4-4b16-968b-1b2a9f30fa10", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "2d86e9cb-e8a5-4e20-8c5a-bc6486c17219", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "5a70383b-7dfc-473c-aa1c-67bdd5f079ee", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "1206df8e-784d-4d26-a647-d560d352ac69", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "c4f0eb2b-ef3b-4f57-9196-3fc4145666a4", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "f3be3dda-d8d4-4246-8bab-5630c9f890d7", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "d6282011-c71e-46d2-8f3f-4e42cf2bc6bf", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "6b700b0f-16cf-49e9-82f4-3c817e1da1f0", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:base] *", "language": "python", "name": "conda-base-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" } }, "nbformat": 4, "nbformat_minor": 5 }