{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 1.Required packages" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import seaborn as sns\n", "import matplotlib.pylab as plt\n", "\n", "import re\n", "import string\n", "\n", "from nltk.stem import PorterStemmer\n", "from nltk.corpus import stopwords\n", "from nltk.stem import WordNetLemmatizer\n", "from nltk.tokenize import word_tokenize\n", "\n", "from sklearn.model_selection import train_test_split\n", "\n", "from sklearn.feature_extraction.text import TfidfVectorizer # tfidf\n", "from sklearn.decomposition import TruncatedSVD\n", "\n", "\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.metrics import classification_report\n", "\n", "from joblib import load, dump" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 2.Reading data" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idapp_idcontentauthor_idis_positive
0181331361100At least its a counter strike -1/1007.656120e+16Negative
1180872601100Uh... So far my playthrough has not been great...7.656120e+16Negative
2177836246100Better mechanics than cs27.656120e+16Negative
3177287444100buggy mess and NOT fun to play at all7.656120e+16Negative
4176678990100Whoever came up with this, is gonna fucking ge...7.656120e+16Negative
..................
201139118775009570This trash for sick anime clowns!7.656120e+16Negative
201140118771828570Not noob friendly7.656120e+16Negative
201141118771331570Total trash for anime clowns!7.656120e+16Negative
201142182234883730Unplayable. It keeps trying to update but then...7.656120e+16Negative
201143177842193730Bring back csgo7.656120e+16Negative
\n", "

201144 rows × 5 columns

\n", "
" ], "text/plain": [ " id app_id content \\\n", "0 181331361 100 At least its a counter strike -1/100 \n", "1 180872601 100 Uh... So far my playthrough has not been great... \n", "2 177836246 100 Better mechanics than cs2 \n", "3 177287444 100 buggy mess and NOT fun to play at all \n", "4 176678990 100 Whoever came up with this, is gonna fucking ge... \n", "... ... ... ... \n", "201139 118775009 570 This trash for sick anime clowns! \n", "201140 118771828 570 Not noob friendly \n", "201141 118771331 570 Total trash for anime clowns! \n", "201142 182234883 730 Unplayable. It keeps trying to update but then... \n", "201143 177842193 730 Bring back csgo \n", "\n", " author_id is_positive \n", "0 7.656120e+16 Negative \n", "1 7.656120e+16 Negative \n", "2 7.656120e+16 Negative \n", "3 7.656120e+16 Negative \n", "4 7.656120e+16 Negative \n", "... ... ... \n", "201139 7.656120e+16 Negative \n", "201140 7.656120e+16 Negative \n", "201141 7.656120e+16 Negative \n", "201142 7.656120e+16 Negative \n", "201143 7.656120e+16 Negative \n", "\n", "[201144 rows x 5 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "game_df=pd.read_csv('output.csv',encoding='ISO-8859-1')\n", "game_df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 3.EDA" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3.1 Drop unwanted columns" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "game_df.drop( [\"id\",\"app_id\",\"author_id\"], axis=1,inplace=True)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
contentis_positive
0At least its a counter strike -1/100Negative
1Uh... So far my playthrough has not been great...Negative
2Better mechanics than cs2Negative
3buggy mess and NOT fun to play at allNegative
4Whoever came up with this, is gonna fucking ge...Negative
.........
201139This trash for sick anime clowns!Negative
201140Not noob friendlyNegative
201141Total trash for anime clowns!Negative
201142Unplayable. It keeps trying to update but then...Negative
201143Bring back csgoNegative
\n", "

201144 rows × 2 columns

\n", "
" ], "text/plain": [ " content is_positive\n", "0 At least its a counter strike -1/100 Negative\n", "1 Uh... So far my playthrough has not been great... Negative\n", "2 Better mechanics than cs2 Negative\n", "3 buggy mess and NOT fun to play at all Negative\n", "4 Whoever came up with this, is gonna fucking ge... Negative\n", "... ... ...\n", "201139 This trash for sick anime clowns! Negative\n", "201140 Not noob friendly Negative\n", "201141 Total trash for anime clowns! Negative\n", "201142 Unplayable. It keeps trying to update but then... Negative\n", "201143 Bring back csgo Negative\n", "\n", "[201144 rows x 2 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "game_df" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "content 427\n", "is_positive 0\n", "dtype: int64" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "game_df.isnull().sum()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3.2 Drop null values " ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "content 0\n", "is_positive 0\n", "dtype: int64" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "game_df=game_df.dropna()\n", "game_df.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\USER\\AppData\\Local\\Temp\\ipykernel_5644\\320265834.py:1: FutureWarning: \n", "\n", "Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.\n", "\n", " ax=sns.countplot(x=game_df.is_positive,palette=\"viridis\")\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAHHCAYAAACWQK1nAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/H5lhTAAAACXBIWXMAAA9hAAAPYQGoP6dpAABRuElEQVR4nO3deVgV9f///wcgmyDggiBKQGgqRpqa+5okLlmmmZi5hUt+QHPLstzSytJccsmlTSt9i7aYqaGkpmbkmvtemqWClgKKCgjz+8Mv8/ME7qOA3m/Xda4r5vWc1zxn7MjDmTlz7AzDMAQAAIDbYp/XDQAAANwLCFUAAAAWIFQBAABYgFAFAABgAUIVAACABQhVAAAAFiBUAQAAWIBQBQAAYAFCFQAAgAUIVcA97NKlSxo8eLD8/f1lb2+v1q1b53VLptmzZ8vOzk5Hjhy5bu1PP/0kOzs7/fTTT3e8r7zSqFEjNWrUKK/buGvs7Ow0cuTIvG7jtjRq1EgPP/ywpXMGBgaqa9euls6Ju4dQhXve77//rl69eunBBx+Ui4uLPDw8VLduXX3wwQe6cOFCXrcnSfrwww81e/Zsy+f99NNPNW7cOD377LOaM2eO+vfvf9XaRo0ayc7OznwVK1ZMjz32mD799FNlZWVZ3ltu7tRxuJcEBgbKzs5Offr0yTGWHT6/+uqrPOgsp2XLluW74GRnZ6fo6Oi8bgP3qEJ53QBwJy1dulTt2rWTs7OzOnfurIcffljp6en6+eef9corr2j37t2aNWtWXrepDz/8UCVKlLD8X6irVq1S6dKlNXHixBuqL1OmjMaMGSNJOnXqlD7//HNFRkbqwIEDevfddy3trVOnToqIiJCzs7O57GrHoUGDBrpw4YKcnJws7aEg++ijjzRkyBD5+fnldStXtWzZMk2bNi3XYHXhwgUVKsSvINxb+D8a96zDhw8rIiJCAQEBWrVqlUqVKmWORUVF6dChQ1q6dGkednjnnTx5Ul5eXjdc7+npqRdeeMH8uVevXipfvrymTp2q0aNHy9HR0bLeHBwc5ODgcEO19vb2cnFxsWzbBV2lSpW0f/9+vfvuu5o8eXJet3NL+PPEvYjLf7hnjR07VufOndMnn3xiE6iylS1bVi+//LL586VLlzR69GgFBwfL2dlZgYGBev3115WWlmaz3tXuBfnvvRDZ9wytX79eAwYMkLe3t9zc3PTMM8/o1KlTNuvt3r1ba9asMS+9Xe/emtTUVA0cOFD+/v5ydnZW+fLl9f7778swDEnSkSNHZGdnp9WrV2v37t3mvDd7T1LhwoVVq1Ytpaammj3/8ccfateunYoVK2aO5xZOp0yZokqVKqlw4cIqWrSoqlevrnnz5uU4Ptn3VF3rOPz3nqro6Gi5u7vr/PnzObbboUMH+fr6KjMz01z2ww8/qH79+nJzc1ORIkXUsmVL7d69+7r7f/r0aQ0aNEihoaFyd3eXh4eHmjdvru3bt9vUZfe3YMECvf322ypTpoxcXFzUpEkTHTp0KMe8s2bNUnBwsFxdXVWjRg2tW7fuur1cKTAwUJ07d9ZHH32k48ePX7f+2LFjevHFF+Xj4yNnZ2dVqlRJn376aY66P//8U0899ZTc3NxUsmRJ9e/fX8uXL8/x/866devUrl07PfDAA3J2dpa/v7/69+9vczm9a9eumjZtmiTZXFbOduX76KuvvpKdnZ3WrFmTo6eZM2fKzs5Ou3btMpft27dPzz77rIoVKyYXFxdVr15dixcvvu5xuFHfffedWrZsKT8/Pzk7Oys4OFijR4+2+X/qSlu2bFGdOnXk6uqqoKAgzZgxI0dNWlqaRowYobJly5rHbPDgwTn+fvmvjIwMvfnmmypXrpxcXFxUvHhx1atXT3FxcZbsK6zFmSrcs77//ns9+OCDqlOnzg3Vd+/eXXPmzNGzzz6rgQMHasOGDRozZoz27t2rb7/99pb76NOnj4oWLaoRI0boyJEjmjRpkqKjoxUTEyNJmjRpkvr06SN3d3e98cYbkiQfH5+rzmcYhp566imtXr1akZGRqlKlipYvX65XXnlFx44d08SJE+Xt7a0vvvhCb7/9ts6dO2de0qtYseJN9//HH3/IwcFBXl5eSkxMVJ06dXT+/Hn17dtXxYsX15w5c/TUU0/pq6++0jPPPCPp8qWpvn376tlnn9XLL7+sixcvaseOHdqwYYOef/75XLdzM8ehffv2mjZtmnl5N9v58+f1/fffq2vXruZZsC+++EJdunRReHi43nvvPZ0/f17Tp09XvXr19NtvvykwMPCa+75o0SK1a9dOQUFBSkxM1MyZM9WwYUPt2bMnx6W3d999V/b29ho0aJCSk5M1duxYdezYURs2bDBrPvnkE/Xq1Ut16tRRv3799Mcff+ipp55SsWLF5O/vf/0/kP/njTfe0Oeff37ds1WJiYmqVauWeS+Rt7e3fvjhB0VGRiolJUX9+vWTdDmoP/744zpx4oRefvll+fr6at68eVq9enWOORcuXKjz58+rd+/eKl68uDZu3KgpU6bo77//1sKFCyVdPst5/PhxxcXF6YsvvrjmvrRs2VLu7u5asGCBGjZsaDMWExOjSpUqmTeE7969W3Xr1lXp0qX12muvyc3NTQsWLFDr1q319ddfm/8P3o7Zs2fL3d1dAwYMkLu7u1atWqXhw4crJSVF48aNs6k9c+aMWrRooeeee04dOnTQggUL1Lt3bzk5OenFF1+UJGVlZempp57Szz//rJ49e6pixYrauXOnJk6cqAMHDmjRokVX7WXkyJEaM2aMunfvrho1aiglJUWbN2/W1q1b9cQTT9z2vsJiBnAPSk5ONiQZTz/99A3Vb9u2zZBkdO/e3Wb5oEGDDEnGqlWrzGWSjBEjRuSYIyAgwOjSpYv582effWZIMsLCwoysrCxzef/+/Q0HBwcjKSnJXFapUiWjYcOGN9TrokWLDEnGW2+9ZbP82WefNezs7IxDhw6Zyxo2bGhUqlTphuZt2LChUaFCBePUqVPGqVOnjL179xp9+/Y1JBmtWrUyDMMw+vXrZ0gy1q1bZ6539uxZIygoyAgMDDQyMzMNwzCMp59++rrbzT4+hw8fNpdd7TisXr3akGSsXr3aMAzDyMrKMkqXLm20bdvWpm7BggWGJGPt2rVmb15eXkaPHj1s6hISEgxPT88cy//r4sWL5j5lO3z4sOHs7GyMGjUqR38VK1Y00tLSzOUffPCBIcnYuXOnYRiGkZ6ebpQsWdKoUqWKTd2sWbMMSTf0/0BAQIDRsmVLwzAMo1u3boaLi4tx/Phxmz4WLlxo1kdGRhqlSpUy/vnnH5t5IiIiDE9PT+P8+fOGYRjG+PHjDUnGokWLzJoLFy4YFSpUsDn2hmGY61xpzJgxhp2dnfHnn3+ay6Kiooyr/Zr57/uoQ4cORsmSJY1Lly6Zy06cOGHY29vbHOsmTZoYoaGhxsWLF81lWVlZRp06dYxy5crluq3/bjcqKuqaNbntX69evYzChQvbbLdhw4aGJGP8+PHmsrS0NKNKlSpGyZIljfT0dMMwDOOLL74w7O3tbd43hmEYM2bMMCQZ69evN5f99++RypUrm3/eyP+4/Id7UkpKiiSpSJEiN1S/bNkySdKAAQNslg8cOFCSbuveq549e9pc9qhfv74yMzP1559/3tJ8y5Ytk4ODg/r27ZujV8Mw9MMPP9xyr/v27ZO3t7e8vb1VsWJFTZkyRS1btjQvFS1btkw1atRQvXr1zHXc3d3Vs2dPHTlyRHv27JEkeXl56e+//9amTZtuuZdrsbOzU7t27bRs2TKdO3fOXB4TE6PSpUub/cXFxSkpKUkdOnTQP//8Y74cHBxUs2bNXM/CXMnZ2Vn29pf/mszMzNS///4rd3d3lS9fXlu3bs1R361bN5ub6evXry/p8hkvSdq8ebNOnjypl156yaaua9eu8vT0vOnjMHToUF26dOmqHyIwDENff/21WrVqJcMwbI5BeHi4kpOTzf2IjY1V6dKl9dRTT5nru7i4qEePHjnmdXV1Nf87NTVV//zzj+rUqSPDMPTbb7/d9H5Il88+njx50uYy41dffaWsrCy1b99e0uXLsatWrdJzzz2ns2fPmvvy77//Kjw8XAcPHtSxY8duaftXunL/srdTv359nT9/Xvv27bOpLVSokHr16mX+7OTkpF69eunkyZPasmWLpMtn9ipWrKgKFSrY/Bk8/vjjknTN/w+9vLy0e/duHTx48Lb3C3ceoQr3JA8PD0mX/0K8EX/++afs7e1VtmxZm+W+vr7y8vK65QAkSQ888IDNz0WLFpV0+bLBrfjzzz/l5+eXIzBmX9q7nV4DAwMVFxenH3/8UT///LMSEhK0ZMkSlShRwpy7fPnyOdb777ZfffVVubu7q0aNGipXrpyioqK0fv36W+4rN+3bt9eFCxfMe2nOnTunZcuWqV27dmaIzf5F9Pjjj5thMfu1YsUKnTx58prbyMrK0sSJE1WuXDk5OzurRIkS8vb21o4dO5ScnJyj/np/1tnHp1y5cjZ1jo6OevDBB2/2EOjBBx9Up06dNGvWLJ04cSLH+KlTp5SUlKRZs2bl2P9u3bpJknkM/vzzTwUHB9v8A0BSjveEJB09elRdu3ZVsWLF5O7uLm9vb/OyXW7H5UY0a9ZMnp6e5mVx6XJIrlKlih566CFJ0qFDh2QYhoYNG5Zjf0aMGGGzP7dj9+7deuaZZ+Tp6SkPDw95e3ubH+D47/75+fnJzc3NZll2v9n3Cx48eFC7d+/O0XN23bV6HjVqlJKSkvTQQw8pNDRUr7zyinbs2HHb+4g7g3uqcE/y8PCQn5+fzc2tN+K/v1BuxtVuYr3aJ9yM/3dTeX7i5uamsLCw256nYsWK2r9/v5YsWaLY2Fh9/fXX+vDDDzV8+HC9+eabFnQq1apVS4GBgVqwYIGef/55ff/997pw4YJ5VkOS+XytL774Qr6+vjnmuN5H+t955x0NGzZML774okaPHq1ixYrJ3t5e/fr1y/XZXXnxZ/3GG2/oiy++0HvvvZfj4a7ZPb7wwgvq0qVLrus/8sgjN7W9zMxMPfHEEzp9+rReffVVVahQQW5ubjp27Ji6du16y880c3Z2VuvWrfXtt9/qww8/VGJiotavX6933nknx/4MGjRI4eHhuc6TWwi8GUlJSWrYsKE8PDw0atQoBQcHy8XFRVu3btWrr756S/uXlZWl0NBQTZgwIdfxa91L16BBA/3+++/67rvvtGLFCn388ceaOHGiZsyYoe7du990L7izCFW4Zz355JOaNWuW4uPjVbt27WvWBgQEKCsrSwcPHrS5mTsxMVFJSUkKCAgwlxUtWlRJSUk266enp+d6puBG3UyYCwgI0I8//qizZ8/anK3KvixxZa9WCwgI0P79+3Msz23bbm5uat++vdq3b6/09HS1adNGb7/9toYMGXLVj9PfbKh97rnn9MEHHyglJUUxMTEKDAxUrVq1zPHg4GBJUsmSJW8pLH711Vdq3LixPvnkE5vlSUlJ5tm7m5F9fA4ePGhe+pEuf8Lr8OHDqly58k3PGRwcrBdeeEEzZ85UzZo1bca8vb1VpEgRZWZmXnf/AwICtGfPHhmGYfPn8N9PL+7cuVMHDhzQnDlz1LlzZ3N5bp9Gu9k/z/bt22vOnDlauXKl9u7dK8MwbEJy9tk8R0dHS8J/bn766Sf9+++/+uabb9SgQQNz+eHDh3OtP378uFJTU23OVh04cECSzA9BBAcHa/v27WrSpMkt/cOtWLFi6tatm7p166Zz586pQYMGGjlyJKEqH+LyH+5ZgwcPlpubm7p3767ExMQc47///rs++OADSVKLFi0kXf4E2pWy/2XZsmVLc1lwcLDWrl1rUzdr1qyrnqm6EW5ubjmC2tW0aNFCmZmZmjp1qs3yiRMnys7OTs2bN7/lPm5k2xs3blR8fLy5LDU1VbNmzVJgYKBCQkIkSf/++6/Nek5OTgoJCZFhGMrIyLjq/DdzHKTLv4TT0tI0Z84cxcbG6rnnnrMZDw8Pl4eHh955551ct3vloy1y4+DgkOMs08KFC2/5vp3q1avL29tbM2bMUHp6url89uzZN7Xf/zV06FBlZGRo7NixNssdHBzUtm1bff3117metb1y/8PDw3Xs2DGbRxNcvHhRH330UY45Jduzb4ZhmO+lK2UHjRvdt7CwMBUrVkwxMTGKiYlRjRo1FBQUZI6XLFlSjRo10syZM696ufN25bZ/6enp+vDDD3Otv3TpkmbOnGlTO3PmTHl7e6tatWqSLof/Y8eO5TiW0uWHoKampl61n/++l9zd3VW2bNnrPooBeYMzVbhnBQcHa968eWrfvr0qVqxo80T1X375RQsXLjSfK1W5cmV16dJFs2bNMk//b9y4UXPmzFHr1q3VuHFjc97u3bvrpZdeUtu2bfXEE09o+/btWr58+S2duchWrVo1TZ8+XW+99ZbKli2rkiVL2pzJuFKrVq3UuHFjvfHGGzpy5IgqV66sFStW6LvvvlO/fv3MszN3wmuvvab//e9/at68ufr27atixYppzpw5Onz4sL7++mvzpu6mTZvK19dXdevWlY+Pj/bu3aupU6eqZcuW1/zwwM0cB0mqWrWqypYtqzfeeENpaWk2ZzWky5eBp0+frk6dOqlq1aqKiIiQt7e3jh49qqVLl6pu3bo5wumVnnzySY0aNUrdunVTnTp1tHPnTs2dO/eW7n+SLp9heeutt9SrVy89/vjjat++vQ4fPqzPPvvslueU/v+zVXPmzMkx9u6772r16tWqWbOmevTooZCQEJ0+fVpbt27Vjz/+qNOnT0u6/AiEqVOnqkOHDnr55ZdVqlQpzZ071zyrmH2GpUKFCgoODtagQYN07NgxeXh46Ouvv871HsHsUNG3b1+Fh4fLwcFBERER1zw+bdq00fz585Wamqr3338/R820adNUr149hYaGqkePHnrwwQeVmJio+Ph4/f333zmeIZabzZs366233sqxvFGjRqpTp46KFi2qLl26qG/fvrKzs9MXX3xx1Uu4fn5+eu+993TkyBE99NBDiomJ0bZt2zRr1izzYbmdOnXSggUL9NJLL2n16tWqW7euMjMztW/fPi1YsEDLly9X9erVc50/JCREjRo1UrVq1VSsWDFt3rxZX331FV+1k1/lwScOgbvqwIEDRo8ePYzAwEDDycnJKFKkiFG3bl1jypQpNh+PzsjIMN58800jKCjIcHR0NPz9/Y0hQ4bY1BiGYWRmZhqvvvqqUaJECaNw4cJGeHi4cejQoas+UmHTpk026//38QCGcfkj/i1btjSKFClyQx+tP3v2rNG/f3/Dz8/PcHR0NMqVK2eMGzfO5tENhnHzj1S4kdrff//dePbZZw0vLy/DxcXFqFGjhrFkyRKbmpkzZxoNGjQwihcvbjg7OxvBwcHGK6+8YiQnJ5s1uT1S4WrHIbdjlu2NN94wJBlly5a9as+rV682wsPDDU9PT8PFxcUIDg42unbtamzevPma+3rx4kVj4MCBRqlSpQxXV1ejbt26Rnx8vNGwYUObP6PcHmVgGJcfvyDJ+Oyzz2yWf/jhh0ZQUJDh7OxsVK9e3Vi7dm2OOa/mykcqXOngwYOGg4NDrn0kJiYaUVFRhr+/v+Ho6Gj4+voaTZo0MWbNmmVT98cffxgtW7Y0XF1dDW9vb2PgwIHG119/bUgyfv31V7Nuz549RlhYmOHu7m6UKFHC6NGjh7F9+/Yc+3rp0iWjT58+hre3t2FnZ2fzeAVd5dEkcXFxhiTDzs7O+Ouvv3I9Br///rvRuXNnw9fX13B0dDRKly5tPPnkk8ZXX3113eMn6aqv0aNHG4ZhGOvXrzdq1apluLq6Gn5+fsbgwYON5cuX5/h/MPs9s3nzZqN27dqGi4uLERAQYEydOjXHdtPT04333nvPqFSpkuHs7GwULVrUqFatmvHmm2/avC/++/fIW2+9ZdSoUcPw8vIyXF1djQoVKhhvv/22+bgG5C92hpEP75YFAOQLkyZNUv/+/fX333+rdOnSed0OkK8RqgAAki7f33PlM5ouXryoRx99VJmZmebN1wCujnuqAACSpDZt2uiBBx5QlSpVlJycrC+//FL79u3T3Llz87o1oEAgVAEAJF3+BODHH3+suXPnKjMzUyEhIZo/f36ODwAAyB2X/wAAACzAc6oAAAAsQKgCAACwAPdU3UVZWVk6fvy4ihQpclvfMQcAAO4ewzB09uxZ+fn5mQ85zg2h6i46fvz4Nb84EwAA5F9//fWXypQpc9VxQtVdlP31HH/99Zc8PDzyuBsAAHAjUlJS5O/vf82v2ZIIVXdV9iU/Dw8PQhUAAAXM9W7d4UZ1AAAACxCqAAAALECoAgDcc9auXatWrVrJz89PdnZ2WrRokc24YRgaPny4SpUqJVdXV4WFhengwYPm+JEjRxQZGamgoCC5uroqODhYI0aMUHp6ulmzf/9+NW7cWD4+PnJxcdGDDz6ooUOHKiMjw6xp1KiR7Ozscrxatmxp1uQ2bmdnp3Hjxt25A4Q7gnuqAAD3nNTUVFWuXFkvvvii2rRpk2N87Nixmjx5subMmaOgoCANGzZM4eHh2rNnj1xcXLRv3z5lZWVp5syZKlu2rHbt2qUePXooNTVV77//viTJ0dFRnTt3VtWqVeXl5aXt27erR48eysrK0jvvvCNJ+uabb2yC2L///qvKlSurXbt25rITJ07Y9PbDDz8oMjJSbdu2vROHBncQX1NzF6WkpMjT01PJycncqA4Ad4mdnZ2+/fZbtW7dWtLls1R+fn4aOHCgBg0aJElKTk6Wj4+PZs+erYiIiFznGTdunKZPn64//vjjqtsaMGCANm3apHXr1uU6PmnSJA0fPlwnTpyQm5tbrjWtW7fW2bNntXLlypvYS9xJN/r7m8t/AID7yuHDh5WQkKCwsDBzmaenp2rWrKn4+PirrpecnKxixYpddfzQoUOKjY1Vw4YNr1rzySefKCIi4qqBKjExUUuXLlVkZOQN7AnyG0IVAOC+kpCQIEny8fGxWe7j42OO/dehQ4c0ZcoU9erVK8dYnTp15OLionLlyql+/foaNWpUrnNs3LhRu3btUvfu3a/a25w5c1SkSJFcL1ki/yNUAQBwDceOHVOzZs3Url079ejRI8d4TEyMtm7dqnnz5mnp0qXmPVf/9cknnyg0NFQ1atS46rY+/fRTdezYUS4uLpb1j7uHG9UBAPcVX19fSZcvtZUqVcpcnpiYqCpVqtjUHj9+XI0bN1adOnU0a9asXOfL/vqxkJAQZWZmqmfPnho4cKAcHBzMmtTUVM2fP/+qZ7Ekad26ddq/f79iYmJuddeQxzhTBQC4rwQFBcnX19fmRvCUlBRt2LBBtWvXNpcdO3ZMjRo1UrVq1fTZZ59d84t0s2VlZSkjI0NZWVk2yxcuXKi0tDS98MILV133k08+UbVq1VS5cuVb2CvkB5ypAgDcc86dO6dDhw6ZPx8+fFjbtm1TsWLF9MADD6hfv3566623VK5cOfORCn5+fuYnBLMDVUBAgN5//32dOnXKnCv7TNfcuXPl6Oio0NBQOTs7a/PmzRoyZIjat28vR0dHm34++eQTtW7dWsWLF8+135SUFC1cuFDjx4+3+EjgbiJUAQDuOZs3b1bjxo3NnwcMGCBJ6tKli2bPnq3BgwcrNTVVPXv2VFJSkurVq6fY2FjzXqa4uDgdOnRIhw4dUpkyZWzmzn4SUaFChfTee+/pwIEDMgxDAQEBio6OVv/+/W3q9+/fr59//lkrVqy4ar/z58+XYRjq0KGDJfuPvMFzqu4inlMFAEDBw3OqAAAA7iJCFQAAgAW4pwoACpCm84fkdQtAvrMiYkxetyCJM1WAjbNnz6pfv34KCAiQq6ur6tSpo02bNpnj586dU3R0tMqUKSNXV1eFhIRoxowZNnP06tVLwcHBcnV1lbe3t55++mnt27cv1+39+++/KlOmjOzs7JSUlGQz9tNPP6lq1apydnZW2bJlNXv2bKt3FwBgIUIVcIXu3bsrLi5OX3zxhXbu3KmmTZsqLCxMx44dk3T5E0SxsbH68ssvtXfvXvXr10/R0dFavHixOUf2M2327t2r5cuXyzAMNW3aVJmZmTm2FxkZqUceeSTH8sOHD6tly5Zq3Lixtm3bpn79+ql79+5avnz5ndt5AMBt4dN/dxGf/svfLly4oCJFiui7775Ty5YtzeXVqlVT8+bN9dZbb+nhhx9W+/btNWzYsFzHc7Njxw5VrlxZhw4dUnBwsLl8+vTpiomJ0fDhw9WkSROdOXNGXl5ekqRXX31VS5cu1a5du8z6iIgIJSUlKTY21uI9R0HC5T8gpzt9+a9AfPpv7dq1atWqlfz8/GRnZ6dFixbZjBuGoeHDh6tUqVJydXVVWFiYDh48aFNz+vRpdezYUR4eHvLy8lJkZKTOnTtnU7Njxw7Vr19fLi4u8vf319ixY3P0snDhQlWoUEEuLi4KDQ3VsmXLbroXFGyXLl1SZmZmju/ccnV11c8//yzp8henLl68WMeOHZNhGFq9erUOHDigpk2b5jpnamqqPvvsMwUFBZlfZSFJe/bs0ahRo/T555/n+pTm+Ph4hYWF2SwLDw9XfHz87e4mAOAOydNQlZqaqsqVK2vatGm5jo8dO1aTJ0/WjBkztGHDBrm5uSk8PFwXL140azp27Kjdu3crLi5OS5Ys0dq1a9WzZ09zPCUlRU2bNlVAQIC2bNmicePGaeTIkTbf4fTLL7+oQ4cOioyM1G+//abWrVurdevWNmcJbqQXFGxFihRR7dq1NXr0aB0/flyZmZn68ssvFR8frxMnTkiSpkyZopCQEJUpU0ZOTk5q1qyZpk2bpgYNGtjM9eGHH8rd3V3u7u764YcfFBcXJycnJ0lSWlqaOnTooHHjxumBBx7ItZeEhAT5+PjYLPPx8VFKSoouXLhwB/YeAHC78jRUZV8yeeaZZ3KMGYahSZMmaejQoXr66af1yCOP6PPPP9fx48fNM1p79+5VbGysPv74Y9WsWVP16tXTlClTNH/+fB0/flzS5a8RSE9P16effqpKlSopIiJCffv21YQJE8xtffDBB2rWrJleeeUVVaxYUaNHj1bVqlU1derUG+4F94YvvvhChmGodOnScnZ21uTJk9WhQwfzbNKUKVP066+/avHixdqyZYvGjx+vqKgo/fjjjzbzdOzYUb/99pvWrFmjhx56SM8995wZwIcMGaKKFSte8zvAAAAFT769Uf3w4cNKSEiwuQTi6empmjVrmpdA4uPj5eXlperVq5s1YWFhsre314YNG8yaBg0amGcJpMuXUfbv368zZ86YNde61HIjveQmLS1NKSkpNi/kb8HBwVqzZo3OnTunv/76Sxs3blRGRoYefPBBXbhwQa+//romTJigVq1a6ZFHHlF0dLTat2+v999/32YeT09PlStXTg0aNNBXX32lffv26dtvv5UkrVq1SgsXLlShQoVUqFAhNWnSRJJUokQJjRgxQtLl7xZLTEy0mTMxMVEeHh5ydXW9C0cCAHCz8u1zqhISEiQp10sg2WMJCQkqWbKkzXihQoVUrFgxm5qgoKAcc2SPFS1a9KqXWq6c43q95GbMmDF68803r7+zyHfc3Nzk5uamM2fOaPny5Ro7dqwyMjKUkZGR4x4oBweHHN9IfyXDMGQYhtLS0iRJX3/9tc0lvE2bNunFF1/UunXrzBvZa9euneO+vri4ONWuXduqXQQAWCzfhqp7wZAhQ8wv8ZQu39915c3KyH+yH4FQvnx5HTp0SK+88ooqVKigbt26ydHRUQ0bNtQrr7wiV1dXBQQEaM2aNfr888/Ny8l//PGHYmJi1LRpU3l7e+vvv//Wu+++K1dXV7Vo0UKSbD4BKEn//POPJKlixYrmp/9eeuklTZ06VYMHD9aLL76oVatWacGCBVq6dOndOxgAgJuSby//+fr6SlKul0Cyx3x9fXXy5Emb8UuXLun06dM2NbnNceU2rlZz5fj1esmNs7OzPDw8bF7I35KTkxUVFaUKFSqoc+fOqlevnpYvXy5HR0dJl79J/rHHHlPHjh0VEhKid999V2+//bZeeuklSZKLi4vWrVunFi1aqGzZsmrfvr2KFCmiX375JcdZ1WsJCgrS0qVLFRcXp8qVK2v8+PH6+OOPFR4efkf2GwBw+/LtmaqgoCD5+vpq5cqVqlKliqTLZ3o2bNig3r17S7p8iSQpKUlbtmxRtWrVJF2+XyUrK0s1a9Y0a9544w1lZGSYvxjj4uJUvnx5FS1a1KxZuXKl+vXrZ27/ykstN9IL7g3PPfecnnvuuauO+/r66rPPPrvquJ+fX47LdtfTqFEj5fa4uEaNGum33367qbkAAHknT89UnTt3Ttu2bdO2bdskXb4hfNu2bTp69Kjs7OzUr18/vfXWW1q8eLF27typzp07y8/PT61bt5Z0+XJJs2bN1KNHD23cuFHr169XdHS0IiIi5OfnJ0l6/vnn5eTkpMjISO3evVsxMTH64IMPbC7Lvfzyy4qNjdX48eO1b98+jRw5Ups3b1Z0dLQk3VAvAADg/panZ6o2b96sxo0bmz9nB50uXbpo9uzZGjx4sFJTU9WzZ08lJSWpXr16io2NtXk449y5cxUdHa0mTZrI3t5ebdu21eTJk81xT09PrVixQlFRUapWrZpKlCih4cOH2zzLqk6dOpo3b56GDh2q119/XeXKldOiRYv08MMPmzU30gsAALh/8TU1d9Hd+pqa+r1G37G5gYJq3cxh1y8qAPiaGiAnvqYGAADgHkKoAgAAsAChCgAAwAKEKgAAAAsQqgAAACxAqAIAALAAoQoAAMAChCoAAAALEKoAAAAsQKgCAACwAKEKAADAAoQqAAAACxCqAAAALECoAgAAsAChCgAAwAKEKgAAAAsQqgAAACxAqAIAALAAoQoAAMAChCoAAAALEKoAAAAsQKgCAACwAKEKAADAAoQqAAAACxCqAAAALECoAgAAsAChCgAAwAKEKgAAAAsQqgAAACxAqAIAALAAoQoAAMAChCoAAAALEKoAAAAsQKgCAACwAKEKAADAAoQqAAAACxCqAAAALECoAgAAsAChCgAAwAKEKgAAAAsQqgAAACxAqAIAALAAoQoAAMAChCoAAAALEKoAAAAsQKgCAACwAKEKAADAAoQqAAAACxCqAAAALECoAgAAsAChCgAAwAKEKgAAAAsQqgAAACxAqAIAALAAoQoAAMAChCoAAAALEKoAAAAskK9DVWZmpoYNG6agoCC5uroqODhYo0ePlmEYZo1hGBo+fLhKlSolV1dXhYWF6eDBgzbznD59Wh07dpSHh4e8vLwUGRmpc+fO2dTs2LFD9evXl4uLi/z9/TV27Ngc/SxcuFAVKlSQi4uLQkNDtWzZsjuz4wAAoMDJ16Hqvffe0/Tp0zV16lTt3btX7733nsaOHaspU6aYNWPHjtXkyZM1Y8YMbdiwQW5ubgoPD9fFixfNmo4dO2r37t2Ki4vTkiVLtHbtWvXs2dMcT0lJUdOmTRUQEKAtW7Zo3LhxGjlypGbNmmXW/PLLL+rQoYMiIyP122+/qXXr1mrdurV27dp1dw4GAADI1+yMK0/75DNPPvmkfHx89Mknn5jL2rZtK1dXV3355ZcyDEN+fn4aOHCgBg0aJElKTk6Wj4+PZs+erYiICO3du1chISHatGmTqlevLkmKjY1VixYt9Pfff8vPz0/Tp0/XG2+8oYSEBDk5OUmSXnvtNS1atEj79u2TJLVv316pqalasmSJ2UutWrVUpUoVzZgx44b2JyUlRZ6enkpOTpaHh4clxyg39XuNvmNzAwXVupnD8roFSzSdPySvWwDynRURY+7o/Df6+ztfn6mqU6eOVq5cqQMHDkiStm/frp9//lnNmzeXJB0+fFgJCQkKCwsz1/H09FTNmjUVHx8vSYqPj5eXl5cZqCQpLCxM9vb22rBhg1nToEEDM1BJUnh4uPbv368zZ86YNVduJ7smezsAAOD+ViivG7iW1157TSkpKapQoYIcHByUmZmpt99+Wx07dpQkJSQkSJJ8fHxs1vPx8THHEhISVLJkSZvxQoUKqVixYjY1QUFBOebIHitatKgSEhKuuZ3cpKWlKS0tzfw5JSXlhvcdAAAULPn6TNWCBQs0d+5czZs3T1u3btWcOXP0/vvva86cOXnd2g0ZM2aMPD09zZe/v39etwQAAO6QfB2qXnnlFb322muKiIhQaGioOnXqpP79+2vMmMvXTn19fSVJiYmJNuslJiaaY76+vjp58qTN+KVLl3T69GmbmtzmuHIbV6vJHs/NkCFDlJycbL7++uuvm9p/AABQcOTrUHX+/HnZ29u26ODgoKysLElSUFCQfH19tXLlSnM8JSVFGzZsUO3atSVJtWvXVlJSkrZs2WLWrFq1SllZWapZs6ZZs3btWmVkZJg1cXFxKl++vIoWLWrWXLmd7Jrs7eTG2dlZHh4eNi8AAHBvytehqlWrVnr77be1dOlSHTlyRN9++60mTJigZ555RpJkZ2enfv366a233tLixYu1c+dOde7cWX5+fmrdurUkqWLFimrWrJl69OihjRs3av369YqOjlZERIT8/PwkSc8//7ycnJwUGRmp3bt3KyYmRh988IEGDBhg9vLyyy8rNjZW48eP1759+zRy5Eht3rxZ0dHRd/24AACA/Cdf36g+ZcoUDRs2TP/3f/+nkydPys/PT7169dLw4cPNmsGDBys1NVU9e/ZUUlKS6tWrp9jYWLm4uJg1c+fOVXR0tJo0aSJ7e3u1bdtWkydPNsc9PT21YsUKRUVFqVq1aipRooSGDx9u8yyrOnXqaN68eRo6dKhef/11lStXTosWLdLDDz98dw4GAADI1/L1c6ruNTynCsg7PKcKuHfxnCoAAIB7CKEKAADAAoQqAAAACxCqAAAALECoAgAAsAChCgAAwAKEKgAAAAsQqgAAACxAqAIAALAAoQoAAMAChCoAAAALEKoAAAAsQKgCAACwAKEKAADAAoQqAAAACxCqAAAALECoAgAAsAChCgAAwAKEKgAAAAsQqgAAACxAqAIAALAAoQoAAMAChCoAAAALEKoAAAAsQKgCAACwAKEKAADAAoQqAAAACxCqAAAALECoAgAAsAChCgAAwAKEKgAAAAsQqgAAACxAqAIAALAAoQoAAMAChCoAAAALEKoAAAAsQKgCAACwAKEKAADAAoQqAAAACxCqAAAALECoAgAAsAChCgAAwAKEKgAAAAsQqgAAACxAqAIAALAAoQoAAMAChCoAAAALEKoAAAAsQKgCAACwAKEKAADAAoQqAAAACxCqAAAALECoAgAAsAChCgAAwAKEKgAAAAsQqgAAACxAqAIAALAAoQoAAMAC+T5UHTt2TC+88IKKFy8uV1dXhYaGavPmzea4YRgaPny4SpUqJVdXV4WFhengwYM2c5w+fVodO3aUh4eHvLy8FBkZqXPnztnU7NixQ/Xr15eLi4v8/f01duzYHL0sXLhQFSpUkIuLi0JDQ7Vs2bI7s9MAAKDAydeh6syZM6pbt64cHR31ww8/aM+ePRo/fryKFi1q1owdO1aTJ0/WjBkztGHDBrm5uSk8PFwXL140azp27Kjdu3crLi5OS5Ys0dq1a9WzZ09zPCUlRU2bNlVAQIC2bNmicePGaeTIkZo1a5ZZ88svv6hDhw6KjIzUb7/9ptatW6t169batWvX3TkYAAAgX7MzDMPI6yau5rXXXtP69eu1bt26XMcNw5Cfn58GDhyoQYMGSZKSk5Pl4+Oj2bNnKyIiQnv37lVISIg2bdqk6tWrS5JiY2PVokUL/f333/Lz89P06dP1xhtvKCEhQU5OTua2Fy1apH379kmS2rdvr9TUVC1ZssTcfq1atVSlShXNmDHjhvYnJSVFnp6eSk5OloeHxy0fl+up32v0HZsbKKjWzRyW1y1Youn8IXndApDvrIgYc0fnv9Hf3/n6TNXixYtVvXp1tWvXTiVLltSjjz6qjz76yBw/fPiwEhISFBYWZi7z9PRUzZo1FR8fL0mKj4+Xl5eXGagkKSwsTPb29tqwYYNZ06BBAzNQSVJ4eLj279+vM2fOmDVXbie7Jns7uUlLS1NKSorNCwAA3Jvydaj6448/NH36dJUrV07Lly9X79691bdvX82ZM0eSlJCQIEny8fGxWc/Hx8ccS0hIUMmSJW3GCxUqpGLFitnU5DbHldu4Wk32eG7GjBkjT09P8+Xv739T+w8AAAqOfB2qsrKyVLVqVb3zzjt69NFH1bNnT/Xo0eOGL7fltSFDhig5Odl8/fXXX3ndEgAAuEPydagqVaqUQkJCbJZVrFhRR48elST5+vpKkhITE21qEhMTzTFfX1+dPHnSZvzSpUs6ffq0TU1uc1y5javVZI/nxtnZWR4eHjYvAABwb8rXoapu3brav3+/zbIDBw4oICBAkhQUFCRfX1+tXLnSHE9JSdGGDRtUu3ZtSVLt2rWVlJSkLVu2mDWrVq1SVlaWatasadasXbtWGRkZZk1cXJzKly9vftKwdu3aNtvJrsneDgAAuL/dUqh68MEH9e+//+ZYnpSUpAcffPC2m8rWv39//frrr3rnnXd06NAhzZs3T7NmzVJUVJQkyc7OTv369dNbb72lxYsXa+fOnercubP8/PzUunVrSZfPbDVr1kw9evTQxo0btX79ekVHRysiIkJ+fn6SpOeff15OTk6KjIzU7t27FRMTow8++EADBgwwe3n55ZcVGxur8ePHa9++fRo5cqQ2b96s6Ohoy/YXAAAUXIVuZaUjR44oMzMzx/K0tDQdO3bstpvK9thjj+nbb7/VkCFDNGrUKAUFBWnSpEnq2LGjWTN48GClpqaqZ8+eSkpKUr169RQbGysXFxezZu7cuYqOjlaTJk1kb2+vtm3bavLkyea4p6enVqxYoaioKFWrVk0lSpTQ8OHDbZ5lVadOHc2bN09Dhw7V66+/rnLlymnRokV6+OGHLdtfAABQcN3Uc6oWL14sSWrdurXmzJkjT09PcywzM1MrV65UXFxcjkt2uIznVAF5h+dUAfeu/PKcqps6U5V9Sc3Ozk5dunSxGXN0dFRgYKDGjx9/890CAAAUcDcVqrKysiRdvkF806ZNKlGixB1pCgAAoKC5pXuqDh8+bHUfAAAABdothSpJWrlypVauXKmTJ0+aZ7Cyffrpp7fdGAAAQEFyS6HqzTff1KhRo1S9enWVKlVKdnZ2VvcFAABQoNxSqJoxY4Zmz56tTp06Wd0PAABAgXRLD/9MT09XnTp1rO4FAACgwLqlUNW9e3fNmzfP6l4AAAAKrFu6/Hfx4kXNmjVLP/74ox555BE5OjrajE+YMMGS5gAAAAqKWwpVO3bsUJUqVSRJu3btshnjpnUAAHA/uqVQtXr1aqv7AAAAKNBu6Z4qAAAA2LqlM1WNGze+5mW+VatW3XJDAAAABdEthars+6myZWRkaNu2bdq1a1eOL1oGAAC4H9xSqJo4cWKuy0eOHKlz587dVkMAAAAFkaX3VL3wwgt87x8AALgvWRqq4uPj5eLiYuWUAAAABcItXf5r06aNzc+GYejEiRPavHmzhg0bZkljAAAABckthSpPT0+bn+3t7VW+fHmNGjVKTZs2taQxAACAguSWQtVnn31mdR8AAAAF2i2FqmxbtmzR3r17JUmVKlXSo48+aklTAAAABc0thaqTJ08qIiJCP/30k7y8vCRJSUlJaty4sebPny9vb28rewQAAMj3bunTf3369NHZs2e1e/dunT59WqdPn9auXbuUkpKivn37Wt0jAABAvndLZ6piY2P1448/qmLFiuaykJAQTZs2jRvVAQDAfemWzlRlZWXJ0dExx3JHR0dlZWXddlMAAAAFzS2Fqscff1wvv/yyjh8/bi47duyY+vfvryZNmljWHAAAQEFxS6Fq6tSpSklJUWBgoIKDgxUcHKygoCClpKRoypQpVvcIAACQ793SPVX+/v7aunWrfvzxR+3bt0+SVLFiRYWFhVnaHAAAQEFxU2eqVq1apZCQEKWkpMjOzk5PPPGE+vTpoz59+uixxx5TpUqVtG7dujvVKwAAQL51U6Fq0qRJ6tGjhzw8PHKMeXp6qlevXpowYYJlzQEAABQUNxWqtm/frmbNml11vGnTptqyZcttNwUAAFDQ3FSoSkxMzPVRCtkKFSqkU6dO3XZTAAAABc1NharSpUtr165dVx3fsWOHSpUqddtNAQAAFDQ3FapatGihYcOG6eLFiznGLly4oBEjRujJJ5+0rDkAAICC4qYeqTB06FB98803euihhxQdHa3y5ctLkvbt26dp06YpMzNTb7zxxh1pFAAAID+7qVDl4+OjX375Rb1799aQIUNkGIYkyc7OTuHh4Zo2bZp8fHzuSKMAAAD52U0//DMgIEDLli3TmTNndOjQIRmGoXLlyqlo0aJ3oj8AAIAC4ZaeqC5JRYsW1WOPPWZlLwAAAAXWLX33HwAAAGwRqgAAACxAqAIAALAAoQoAAMAChCoAAAALEKoAAAAsQKgCAACwAKEKAADAAoQqAAAACxCqAAAALECoAgAAsAChCgAAwAKEKgAAAAsQqgAAACxAqAIAALAAoQoAAMAChCoAAAALEKoAAAAsQKgCAACwAKEKAADAAgUqVL377ruys7NTv379zGUXL15UVFSUihcvLnd3d7Vt21aJiYk26x09elQtW7ZU4cKFVbJkSb3yyiu6dOmSTc1PP/2kqlWrytnZWWXLltXs2bNzbH/atGkKDAyUi4uLatasqY0bN96J3QQAAAVQgQlVmzZt0syZM/XII4/YLO/fv7++//57LVy4UGvWrNHx48fVpk0bczwzM1MtW7ZUenq6fvnlF82ZM0ezZ8/W8OHDzZrDhw+rZcuWaty4sbZt26Z+/fqpe/fuWr58uVkTExOjAQMGaMSIEdq6dasqV66s8PBwnTx58s7vPAAAyPcKRKg6d+6cOnbsqI8++khFixY1lycnJ+uTTz7RhAkT9Pjjj6tatWr67LPP9Msvv+jXX3+VJK1YsUJ79uzRl19+qSpVqqh58+YaPXq0pk2bpvT0dEnSjBkzFBQUpPHjx6tixYqKjo7Ws88+q4kTJ5rbmjBhgnr06KFu3bopJCREM2bMUOHChfXpp5/e3YMBAADypQIRqqKiotSyZUuFhYXZLN+yZYsyMjJslleoUEEPPPCA4uPjJUnx8fEKDQ2Vj4+PWRMeHq6UlBTt3r3brPnv3OHh4eYc6enp2rJli02Nvb29wsLCzJrcpKWlKSUlxeYFAADuTYXyuoHrmT9/vrZu3apNmzblGEtISJCTk5O8vLxslvv4+CghIcGsuTJQZY9nj12rJiUlRRcuXNCZM2eUmZmZa82+ffuu2vuYMWP05ptv3tiOAgCAAi1fn6n666+/9PLLL2vu3LlycXHJ63Zu2pAhQ5ScnGy+/vrrr7xuCQAA3CH5OlRt2bJFJ0+eVNWqVVWoUCEVKlRIa9as0eTJk1WoUCH5+PgoPT1dSUlJNuslJibK19dXkuTr65vj04DZP1+vxsPDQ66uripRooQcHBxyrcmeIzfOzs7y8PCweQEAgHtTvg5VTZo00c6dO7Vt2zbzVb16dXXs2NH8b0dHR61cudJcZ//+/Tp69Khq164tSapdu7Z27txp8ym9uLg4eXh4KCQkxKy5co7smuw5nJycVK1aNZuarKwsrVy50qwBAAD3t3x9T1WRIkX08MMP2yxzc3NT8eLFzeWRkZEaMGCAihUrJg8PD/Xp00e1a9dWrVq1JElNmzZVSEiIOnXqpLFjxyohIUFDhw5VVFSUnJ2dJUkvvfSSpk6dqsGDB+vFF1/UqlWrtGDBAi1dutTc7oABA9SlSxdVr15dNWrU0KRJk5Samqpu3brdpaMBAADys3wdqm7ExIkTZW9vr7Zt2yotLU3h4eH68MMPzXEHBwctWbJEvXv3Vu3ateXm5qYuXbpo1KhRZk1QUJCWLl2q/v3764MPPlCZMmX08ccfKzw83Kxp3769Tp06peHDhyshIUFVqlRRbGxsjpvXAQDA/cnOMAwjr5u4X6SkpMjT01PJycl39P6q+r1G37G5gYJq3cxhed2CJZrOH5LXLQD5zoqIMXd0/hv9/Z2v76kCAAAoKAhVAAAAFiBUAQAAWIBQBQAAYAFCFQAAgAUIVQAAABYgVAEAAFiAUAUAAGABQhUAAIAFCFUAAAAWIFQBAABYgFAFAABgAUIVAACABQhVAAAAFiBUAQAAWIBQBQAAYAFCFQAAgAUIVQAAABYgVAEAAFiAUAUAAGABQhUAAIAFCFUAAAAWIFQBAABYgFAFAABgAUIVAACABQhVAAAAFiBUAQAAWIBQBQAAYAFCFQAAgAUIVQAAABYgVAEAAFiAUAUAAGABQhUAAIAFCFUAAAAWIFQBAABYgFAFAABgAUIVAACABQhVAAAAFiBUAQAAWIBQBQAAYAFCFQAAgAUIVQAAABYgVAEAAFiAUAUAAGABQhUAAIAFCFUAAAAWIFQBAABYgFAFAABgAUIVAACABQhVAAAAFiBUAQAAWIBQBQAAYAFCFQAAgAUIVQAAABYgVAEAAFiAUAUAAGABQhUAAIAFCFUAAAAWyNehasyYMXrsscdUpEgRlSxZUq1bt9b+/fttai5evKioqCgVL15c7u7uatu2rRITE21qjh49qpYtW6pw4cIqWbKkXnnlFV26dMmm5qefflLVqlXl7OyssmXLavbs2Tn6mTZtmgIDA+Xi4qKaNWtq48aNlu8zAAAomPJ1qFqzZo2ioqL066+/Ki4uThkZGWratKlSU1PNmv79++v777/XwoULtWbNGh0/flxt2rQxxzMzM9WyZUulp6frl19+0Zw5czR79mwNHz7crDl8+LBatmypxo0ba9u2berXr5+6d++u5cuXmzUxMTEaMGCARowYoa1bt6py5coKDw/XyZMn787BAAAA+ZqdYRhGXjdxo06dOqWSJUtqzZo1atCggZKTk+Xt7a158+bp2WeflSTt27dPFStWVHx8vGrVqqUffvhBTz75pI4fPy4fHx9J0owZM/Tqq6/q1KlTcnJy0quvvqqlS5dq165d5rYiIiKUlJSk2NhYSVLNmjX12GOPaerUqZKkrKws+fv7q0+fPnrttdduqP+UlBR5enoqOTlZHh4eVh4aG/V7jb5jcwMF1bqZw/K6BUs0nT8kr1sA8p0VEWPu6Pw3+vs7X5+p+q/k5GRJUrFixSRJW7ZsUUZGhsLCwsyaChUq6IEHHlB8fLwkKT4+XqGhoWagkqTw8HClpKRo9+7dZs2Vc2TXZM+Rnp6uLVu22NTY29srLCzMrMlNWlqaUlJSbF4AAODeVGBCVVZWlvr166e6devq4YcfliQlJCTIyclJXl5eNrU+Pj5KSEgwa64MVNnj2WPXqklJSdGFCxf0zz//KDMzM9ea7DlyM2bMGHl6epovf3//m99xAABQIBSYUBUVFaVdu3Zp/vz5ed3KDRsyZIiSk5PN119//ZXXLQEAgDukUF43cCOio6O1ZMkSrV27VmXKlDGX+/r6Kj09XUlJSTZnqxITE+Xr62vW/PdTetmfDryy5r+fGExMTJSHh4dcXV3l4OAgBweHXGuy58iNs7OznJ2db36HAQBAgZOvz1QZhqHo6Gh9++23WrVqlYKCgmzGq1WrJkdHR61cudJctn//fh09elS1a9eWJNWuXVs7d+60+ZReXFycPDw8FBISYtZcOUd2TfYcTk5Oqlatmk1NVlaWVq5cadYAAID7W74+UxUVFaV58+bpu+++U5EiRcz7lzw9PeXq6ipPT09FRkZqwIABKlasmDw8PNSnTx/Vrl1btWrVkiQ1bdpUISEh6tSpk8aOHauEhAQNHTpUUVFR5lmkl156SVOnTtXgwYP14osvatWqVVqwYIGWLl1q9jJgwAB16dJF1atXV40aNTRp0iSlpqaqW7dud//AAACAfCdfh6rp06dLkho1amSz/LPPPlPXrl0lSRMnTpS9vb3atm2rtLQ0hYeH68MPPzRrHRwctGTJEvXu3Vu1a9eWm5ubunTpolGjRpk1QUFBWrp0qfr3768PPvhAZcqU0ccff6zw8HCzpn379jp16pSGDx+uhIQEValSRbGxsTluXgcAAPenAvWcqoKO51QBeYfnVAH3Lp5TBQAAcA8hVAEAAFiAUAUAAGABQhUAAIAFCFUAAAAWIFQBAABYgFAFAABgAUIVAACABQhVAAAAFiBUAQAAWIBQBQAAYAFCFQAAgAUIVQAAABYgVAEAAFiAUAUAAGABQhUAAIAFCFUAAAAWIFQBAABYgFAFAABgAUIVAACABQhVAAAAFiBUAQAAWIBQBQAAYAFCFQAAgAUIVQAAABYgVAEAAFiAUAUAAGABQhUAAIAFCFUAAAAWIFQBAABYgFAFAABgAUIVAACABQhVAAAAFiBUAQAAWIBQBQAAYAFCFQAAgAUIVQAAABYgVAEAAFiAUAUAAGABQhUAAIAFCFUAAAAWIFQBAABYgFAFAABgAUIVAACABQhVAAAAFiBUAQAAWIBQBQAAYAFCFQAAgAUIVQAAABYgVAEAAFiAUAUAAGABQhUAAIAFCFUAAAAWIFQBAABYgFAFAABgAUIVAACABQhVAAAAFiBU3aRp06YpMDBQLi4uqlmzpjZu3JjXLQEAgHyAUHUTYmJiNGDAAI0YMUJbt25V5cqVFR4erpMnT+Z1awAAII8Rqm7ChAkT1KNHD3Xr1k0hISGaMWOGChcurE8//TSvWwMAAHmMUHWD0tPTtWXLFoWFhZnL7O3tFRYWpvj4+DzsDAAA5AeF8rqBguKff/5RZmamfHx8bJb7+Pho3759ua6TlpamtLQ08+fk5GRJUkpKyp1rVNKl9It3dH6gILrT77u75dL5tOsXAfeZO/3+zp7fMIxr1hGq7qAxY8bozTffzLHc398/D7oB7m+es9/J6xYA3CGekRPvynbOnj0rT0/Pq44Tqm5QiRIl5ODgoMTERJvliYmJ8vX1zXWdIUOGaMCAAebPWVlZOn36tIoXLy47O7s72i/yXkpKivz9/fXXX3/Jw8Mjr9sBYCHe3/cXwzB09uxZ+fn5XbOOUHWDnJycVK1aNa1cuVKtW7eWdDkkrVy5UtHR0bmu4+zsLGdnZ5tlXl5ed7hT5DceHh78pQvco3h/3z+udYYqG6HqJgwYMEBdunRR9erVVaNGDU2aNEmpqanq1q1bXrcGAADyGKHqJrRv316nTp3S8OHDlZCQoCpVqig2NjbHzesAAOD+Q6i6SdHR0Ve93AdcydnZWSNGjMhxCRhAwcf7G7mxM673+UAAAABcFw//BAAAsAChCgAAwAKEKgAAAAsQqoB8IjAwUJMmTcrrNgBcw08//SQ7OzslJSVds4738/2JUIX7QteuXWVnZ6d3333XZvmiRYvu+tPtZ8+enetDYDdt2qSePXve1V6Ae1X2e97Ozk5OTk4qW7asRo0apUuXLt3WvHXq1NGJEyfMB0HyfsaVCFW4b7i4uOi9997TmTNn8rqVXHl7e6tw4cJ53QZwz2jWrJlOnDihgwcPauDAgRo5cqTGjRt3W3M6OTnJ19f3uv8Y4/18fyJU4b4RFhYmX19fjRkz5qo1P//8s+rXry9XV1f5+/urb9++Sk1NNcdPnDihli1bytXVVUFBQZo3b16O0/wTJkxQaGio3Nzc5O/vr//7v//TuXPnJF2+dNCtWzclJyeb/4oeOXKkJNvLBc8//7zat29v01tGRoZKlCihzz//XNLlr0kaM2aMgoKC5OrqqsqVK+urr76y4EgB9wZnZ2f5+voqICBAvXv3VlhYmBYvXqwzZ86oc+fOKlq0qAoXLqzmzZvr4MGD5np//vmnWrVqpaJFi8rNzU2VKlXSsmXLJNle/uP9jP8iVOG+4eDgoHfeeUdTpkzR33//nWP8999/V7NmzdS2bVvt2LFDMTEx+vnnn20e9tq5c2cdP35cP/30k77++mvNmjVLJ0+etJnH3t5ekydP1u7duzVnzhytWrVKgwcPlnT50sGkSZPk4eGhEydO6MSJExo0aFCOXjp27Kjvv//eDGOStHz5cp0/f17PPPOMJGnMmDH6/PPPNWPGDO3evVv9+/fXCy+8oDVr1lhyvIB7jaurq9LT09W1a1dt3rxZixcvVnx8vAzDUIsWLZSRkSFJioqKUlpamtauXaudO3fqvffek7u7e475eD8jBwO4D3Tp0sV4+umnDcMwjFq1ahkvvviiYRiG8e233xrZb4PIyEijZ8+eNuutW7fOsLe3Ny5cuGDs3bvXkGRs2rTJHD948KAhyZg4ceJVt71w4UKjePHi5s+fffaZ4enpmaMuICDAnCcjI8MoUaKE8fnnn5vjHTp0MNq3b28YhmFcvHjRKFy4sPHLL7/YzBEZGWl06NDh2gcDuA9c+Z7Pysoy4uLiDGdnZ6N169aGJGP9+vVm7T///GO4uroaCxYsMAzDMEJDQ42RI0fmOu/q1asNScaZM2cMw+D9DFt8TQ3uO++9954ef/zxHP+i3L59u3bs2KG5c+eaywzDUFZWlg4fPqwDBw6oUKFCqlq1qjletmxZFS1a1GaeH3/8UWPGjNG+ffuUkpKiS5cu6eLFizp//vwN32NRqFAhPffcc5o7d646deqk1NRUfffdd5o/f74k6dChQzp//ryeeOIJm/XS09P16KOP3tTxAO5VS5Yskbu7uzIyMpSVlaXnn39ebdq00ZIlS1SzZk2zrnjx4ipfvrz27t0rSerbt6969+6tFStWKCwsTG3bttUjjzxyy33wfr5/EKpw32nQoIHCw8M1ZMgQde3a1Vx+7tw59erVS3379s2xzgMPPKADBw5cd+4jR47oySefVO/evfX222+rWLFi+vnnnxUZGan09PSbunG1Y8eOatiwoU6ePKm4uDi5urqqWbNmZq+StHTpUpUuXdpmPb6LDLiscePGmj59upycnOTn56dChQpp8eLF112ve/fuCg8P19KlS7VixQqNGTNG48ePV58+fW65F97P9wdCFe5L7777rqpUqaLy5cuby6pWrao9e/aobNmyua5Tvnx5Xbp0Sb/99puqVasm6fK/MK/8NOGWLVuUlZWl8ePHy97+8i2LCxYssJnHyclJmZmZ1+2xTp068vf3V0xMjH744Qe1a9dOjo6OkqSQkBA5Ozvr6NGjatiw4c3tPHCfcHNzy/F+rlixoi5duqQNGzaoTp06kqR///1X+/fvV0hIiFnn7++vl156SS+99JKGDBmijz76KNdQxfsZVyJU4b4UGhqqjh07avLkyeayV199VbVq1VJ0dLS6d+8uNzc37dmzR3FxcZo6daoqVKigsLAw9ezZU9OnT5ejo6MGDhwoV1dX8+PVZcuWVUZGhqZMmaJWrVpp/fr1mjFjhs22AwMDde7cOa1cuVKVK1dW4cKFr3oG6/nnn9eMGTN04MABrV692lxepEgRDRo0SP3791dWVpbq1aun5ORkrV+/Xh4eHurSpcsdOGpAwVeuXDk9/fTT6tGjh2bOnKkiRYrotddeU+nSpfX0009Lkvr166fmzZvroYce0pkzZ7R69WpVrFgx1/l4P8NGXt/UBdwNV960mu3w4cOGk5OTceXbYOPGjcYTTzxhuLu7G25ubsYjjzxivP322+b48ePHjebNmxvOzs5GQECAMW/ePKNkyZLGjBkzzJoJEyYYpUqVMlxdXY3w8HDj888/t7mx1TAM46WXXjKKFy9uSDJGjBhhGIbtja3Z9uzZY0gyAgICjKysLJuxrKwsY9KkSUb58uUNR0dHw9vb2wgPDzfWrFlzewcLuAfk9p7Pdvr0aaNTp06Gp6en+T49cOCAOR4dHW0EBwcbzs7Ohre3t9GpUyfjn3/+MQwj543qhsH7Gf8/O8MwjDzMdECB9vfff8vf318//vijmjRpktftAADyEKEKuAmrVq3SuXPnFBoaqhMnTmjw4ME6duyYDhw4YN4fAQC4P3FPFXATMjIy9Prrr+uPP/5QkSJFVKdOHc2dO5dABQDgTBUAAIAV+JoaAAAACxCqAAAALECoAgAAsAChCgAAwAKEKgC4gxo1aqR+/fpds2b27Nny8vK6K/0AuHMIVQAKvK5du6p169a3tO6RI0dkZ2dnvooXL66mTZvqt99+s6S3b775RqNHjzZ/DgwM1KRJk2xq2rdvf0Nf2A0gfyNUAYCkH3/8USdOnNDy5ct17tw5NW/eXElJSbc9b7FixVSkSJFr1ri6uqpkyZK3vS0AeYtQBeCe89VXXyk0NFSurq4qXry4wsLClJqaes11ihcvLl9fX1WvXl3vv/++EhMTtWHDBknS119/rUqVKsnZ2VmBgYEaP368zboffvihypUrJxcXF/n4+OjZZ581x668/NeoUSP9+eef6t+/v3lmTLK9/HfgwAHZ2dlp3759NtuYOHGigoODzZ937dql5s2by93dXT4+PurUqZP++eefWzpeAKxBqAJwTzlx4oQ6dOigF198UXv37tVPP/2kNm3a6Gaec+zq6ipJSk9P15YtW/Tcc88pIiJCO3fu1MiRIzVs2DDNnj1bkrR582b17dtXo0aN0v79+xUbG6sGDRrkOu8333yjMmXKaNSoUTpx4oROnDiRo+ahhx5S9erVNXfuXJvlc+fO1fPPPy9JSkpK0uOPP65HH31UmzdvVmxsrBITE/Xcc8/d8D4CsB5fUwPgnnLixAldunRJbdq0UUBAgCQpNDT0htdPSkrS6NGj5e7urho1amjAgAFq0qSJhg0bJuly6NmzZ4/GjRunrl276ujRo3Jzc9OTTz6pIkWKKCAgQI8++miucxcrVkwODg4qUqSIfH19r9pDx44dNXXqVPNerAMHDmjLli368ssvJUlTp07Vo48+qnfeecdc59NPP5W/v78OHDighx566Ib3F4B1OFMF4J5SuXJlNWnSRKGhoWrXrp0++ugjnTlz5rrr1alTR+7u7ipatKi2b9+umJgY+fj4aO/evapbt65Nbd26dXXw4EFlZmbqiSeeUEBAgB588EF16tRJc+fO1fnz529rHyIiInTkyBH9+uuvki6fpapataoqVKggSdq+fbtWr14td3d385U99vvvv9/WtgHcOkIVgHuKg4OD4uLi9MMPPygkJERTpkxR+fLldfjw4WuuFxMTo+3bt+vMmTP6/fff1aJFixvaXpEiRbR161b973//U6lSpTR8+HBVrlz5tm5y9/X11eOPP6558+ZJkubNm6eOHTua4+fOnVOrVq20bds2m9fBgweveukRwJ1HqAJwz7Gzs1PdunX15ptv6rfffpOTk5O+/fbba67j7++v4ODgHM+LqlixotavX2+zbP369XrooYfk4OAgSSpUqJDCwsI0duxY7dixQ0eOHNGqVaty3Y6Tk5MyMzOvuw8dO3ZUTEyM4uPj9ccffygiIsIcq1q1qnbv3q3AwECVLVvW5uXm5nbduQHcGYQqAPeUDRs26J133tHmzZt19OhRffPNNzp16pQqVqx4S/MNHDhQK1eu1OjRo3XgwAHNmTNHU6dO1aBBgyRJS5Ys0eTJk7Vt2zb9+eef+vzzz5WVlaXy5cvnOl9gYKDWrl2rY8eOXfPTem3atNHZs2fVu3dvNW7cWH5+fuZYVFSUTp8+rQ4dOmjTpk36/ffftXz5cnXr1u2GAhuAO4NQBeCe4uHhobVr16pFixZ66KGHNHToUI0fP17Nmze/pfmqVq2qBQsWaP78+Xr44Yc1fPhwjRo1Sl27dpUkeXl56ZtvvtHjjz+uihUrasaMGfrf//6nSpUq5TrfqFGjdOTIEQUHB8vb2/uq2y1SpIhatWql7du321z6kyQ/Pz+tX79emZmZatq0qUJDQ9WvXz95eXnJ3p6/1oG8YmfczOeMAQAAkCv+SQMAAGABQhUAAIAFCFUAAAAWIFQBAABYgFAFAABgAUIVAACABQhVAAAAFiBUAQAAWIBQBQAAYAFCFQAAgAUIVQAAABYgVAEAAFjg/wO99jzl2lXTAgAAAABJRU5ErkJggg==", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "ax=sns.countplot(x=game_df.is_positive,palette=\"viridis\")\n", "# Add counts on top of each bar\n", "for p in ax.patches:\n", " count = int(p.get_height()) # Get the height of the bar (count)\n", " ax.annotate(f'{count}', (p.get_x() + p.get_width() / 2, p.get_height()), \n", " ha='center', va='bottom', fontsize=10, color='black')\n", "\n", "# Customize the plot\n", "plt.title('Count of Positive and Negative Labels')\n", "plt.xlabel('Is Positive')\n", "plt.ylabel('Count')\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3.3 preprocesing" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "1. lowercase all the words\n", "2. Remove numbers\n", "3. Remove punctuation\n", "4. Remove stopwords\n", "5. Stemming/Lemmatization\n" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# pre processing function\n", "def preprocess_text(text):\n", " # Convert text to lowercase\n", " text = text.lower()\n", " \n", " # Remove numbers\n", " text = re.sub(r'\\d+', '', text)\n", " \n", " # Remove punctuation\n", " text = re.sub(r'[^\\w\\s]', '', text)\n", " \n", " # Tokenize text\n", " tokens = word_tokenize(text)\n", " \n", " # Remove stopwords\n", " stop_words = set(stopwords.words('english'))\n", " filtered_tokens = [word for word in tokens if word not in stop_words]\n", " \n", " # Lemmatization\n", " lemmatizer = WordNetLemmatizer()\n", " lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]\n", " \n", " # Join tokens back into a string\n", " preprocessed_text = ' '.join(lemmatized_tokens)\n", " \n", " return preprocessed_text" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\USER\\AppData\\Local\\Temp\\ipykernel_5644\\1109731849.py:1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " game_df['clean_content']=game_df.content.apply(preprocess_text)\n" ] } ], "source": [ "game_df['clean_content']=game_df.content.apply(preprocess_text)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
contentis_positiveclean_content
0At least its a counter strike -1/100Negativeleast counter strike
1Uh... So far my playthrough has not been great...Negativeuh far playthrough great glitched texture cont...
2Better mechanics than cs2Negativebetter mechanic c
3buggy mess and NOT fun to play at allNegativebuggy mess fun play
4Whoever came up with this, is gonna fucking ge...Negativewhoever came gon na fucking get negative revie...
............
201139This trash for sick anime clowns!Negativetrash sick anime clown
201140Not noob friendlyNegativenoob friendly
201141Total trash for anime clowns!Negativetotal trash anime clown
201142Unplayable. It keeps trying to update but then...Negativeunplayable keep trying update immediately stop...
201143Bring back csgoNegativebring back csgo
\n", "

200717 rows × 3 columns

\n", "
" ], "text/plain": [ " content is_positive \\\n", "0 At least its a counter strike -1/100 Negative \n", "1 Uh... So far my playthrough has not been great... Negative \n", "2 Better mechanics than cs2 Negative \n", "3 buggy mess and NOT fun to play at all Negative \n", "4 Whoever came up with this, is gonna fucking ge... Negative \n", "... ... ... \n", "201139 This trash for sick anime clowns! Negative \n", "201140 Not noob friendly Negative \n", "201141 Total trash for anime clowns! Negative \n", "201142 Unplayable. It keeps trying to update but then... Negative \n", "201143 Bring back csgo Negative \n", "\n", " clean_content \n", "0 least counter strike \n", "1 uh far playthrough great glitched texture cont... \n", "2 better mechanic c \n", "3 buggy mess fun play \n", "4 whoever came gon na fucking get negative revie... \n", "... ... \n", "201139 trash sick anime clown \n", "201140 noob friendly \n", "201141 total trash anime clown \n", "201142 unplayable keep trying update immediately stop... \n", "201143 bring back csgo \n", "\n", "[200717 rows x 3 columns]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "game_df" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\USER\\AppData\\Local\\Temp\\ipykernel_5644\\1073721943.py:1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " game_df['is_positive']=np.where(game_df.is_positive=='Negative',0,1)\n" ] } ], "source": [ "game_df['is_positive']=np.where(game_df.is_positive=='Negative',0,1)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
contentis_positiveclean_content
0At least its a counter strike -1/1000least counter strike
1Uh... So far my playthrough has not been great...0uh far playthrough great glitched texture cont...
2Better mechanics than cs20better mechanic c
3buggy mess and NOT fun to play at all0buggy mess fun play
4Whoever came up with this, is gonna fucking ge...0whoever came gon na fucking get negative revie...
\n", "
" ], "text/plain": [ " content is_positive \\\n", "0 At least its a counter strike -1/100 0 \n", "1 Uh... So far my playthrough has not been great... 0 \n", "2 Better mechanics than cs2 0 \n", "3 buggy mess and NOT fun to play at all 0 \n", "4 Whoever came up with this, is gonna fucking ge... 0 \n", "\n", " clean_content \n", "0 least counter strike \n", "1 uh far playthrough great glitched texture cont... \n", "2 better mechanic c \n", "3 buggy mess fun play \n", "4 whoever came gon na fucking get negative revie... " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "game_df.head(5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 4. Train Test split" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(140501, 1)\n", "(140501, 1)\n", "(60216, 1)\n", "(60216, 1)\n" ] } ], "source": [ "X=game_df[['clean_content']]\n", "y=game_df[['is_positive']]\n", "X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0)\n", "print(X_train.shape)\n", "print(y_train.shape)\n", "print(X_test.shape)\n", "print(y_test.shape)\n" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "corpus=[i for i in X_train.clean_content]\n", "# corpus" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 5. TF_IDF construction" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "TF_IDF=TfidfVectorizer(max_features=50000,ngram_range=(1,3))\n", "tfidf_matrix=TF_IDF.fit_transform(corpus)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(140501, 50000)" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tfidf_matrix.shape" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "from sklearn.decomposition import TruncatedSVD\n", "# Apply TruncatedSVD for dimensionality reduction\n", "svd = TruncatedSVD(n_components=300, random_state=42) # n_components = Number of components to retain\n", "reduced_matrix = svd.fit_transform(tfidf_matrix)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 6. Model building" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\USER\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python311\\site-packages\\sklearn\\base.py:1474: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", " return fit_method(estimator, *args, **kwargs)\n" ] }, { "data": { "text/html": [ "
RandomForestClassifier()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "RandomForestClassifier()" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "RF=RandomForestClassifier()\n", "RF.fit(reduced_matrix,y_train)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 7. Test data convert into vectors" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(60216, 50000)\n", "(60216, 300)\n" ] } ], "source": [ "test_metrix=TF_IDF.transform(X_test.clean_content)\n", "reduced_test_matrix = svd.transform(test_metrix)\n", "\n", "print(test_metrix.shape)\n", "print(reduced_test_matrix.shape)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 8. Predictions and clasification report" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "predict = RF.predict(reduced_test_matrix)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " precision recall f1-score support\n", "\n", " 0 0.77 0.82 0.79 27553\n", " 1 0.84 0.79 0.81 32663\n", "\n", " accuracy 0.80 60216\n", " macro avg 0.80 0.81 0.80 60216\n", "weighted avg 0.81 0.80 0.81 60216\n", "\n" ] } ], "source": [ "print(classification_report(predict,y_test))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 9. Save Model and Vectorizer" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['random_forest_model.pkl']" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import joblib\n", "\n", "# Save the TF-IDF Vectorizer\n", "joblib.dump(TF_IDF, 'tfidf_vectorizer.pkl')\n", "\n", "# Save the Dimensionality Reduction Transformer (e.g., SVD)\n", "joblib.dump(svd, 'dimensionality_reduction.pkl')\n", "\n", "# Save the Trained Model\n", "joblib.dump(RF, 'random_forest_model.pkl')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 10. Load model and vectorizer" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "# Load model\n", "tf_idf = load('tfidf_vectorizer.pkl')\n", "isa_pipeline =load('dimensionality_reduction.pkl')\n", "model = load('random_forest_model.pkl')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 11. Sample prompt" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([1])" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "prompt=\"nice game\"\n", "tfidf_matrix = tf_idf.transform([prompt])\n", "reduced_matrix=isa_pipeline.transform(tfidf_matrix)\n", "model.predict(reduced_matrix)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.9" } }, "nbformat": 4, "nbformat_minor": 2 }