{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 1.Required packages" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import seaborn as sns\n", "import matplotlib.pylab as plt\n", "\n", "import re\n", "import string\n", "\n", "from nltk.stem import PorterStemmer\n", "from nltk.corpus import stopwords\n", "from nltk.stem import WordNetLemmatizer\n", "from nltk.tokenize import word_tokenize\n", "\n", "from sklearn.model_selection import train_test_split\n", "\n", "from sklearn.feature_extraction.text import TfidfVectorizer # tfidf\n", "from sklearn.decomposition import TruncatedSVD\n", "\n", "\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.metrics import classification_report\n", "\n", "from joblib import load, dump" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 2.Reading data" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | id | \n", "app_id | \n", "content | \n", "author_id | \n", "is_positive | \n", "
|---|---|---|---|---|---|
| 0 | \n", "181331361 | \n", "100 | \n", "At least its a counter strike -1/100 | \n", "7.656120e+16 | \n", "Negative | \n", "
| 1 | \n", "180872601 | \n", "100 | \n", "Uh... So far my playthrough has not been great... | \n", "7.656120e+16 | \n", "Negative | \n", "
| 2 | \n", "177836246 | \n", "100 | \n", "Better mechanics than cs2 | \n", "7.656120e+16 | \n", "Negative | \n", "
| 3 | \n", "177287444 | \n", "100 | \n", "buggy mess and NOT fun to play at all | \n", "7.656120e+16 | \n", "Negative | \n", "
| 4 | \n", "176678990 | \n", "100 | \n", "Whoever came up with this, is gonna fucking ge... | \n", "7.656120e+16 | \n", "Negative | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 201139 | \n", "118775009 | \n", "570 | \n", "This trash for sick anime clowns! | \n", "7.656120e+16 | \n", "Negative | \n", "
| 201140 | \n", "118771828 | \n", "570 | \n", "Not noob friendly | \n", "7.656120e+16 | \n", "Negative | \n", "
| 201141 | \n", "118771331 | \n", "570 | \n", "Total trash for anime clowns! | \n", "7.656120e+16 | \n", "Negative | \n", "
| 201142 | \n", "182234883 | \n", "730 | \n", "Unplayable. It keeps trying to update but then... | \n", "7.656120e+16 | \n", "Negative | \n", "
| 201143 | \n", "177842193 | \n", "730 | \n", "Bring back csgo | \n", "7.656120e+16 | \n", "Negative | \n", "
201144 rows × 5 columns
\n", "| \n", " | content | \n", "is_positive | \n", "
|---|---|---|
| 0 | \n", "At least its a counter strike -1/100 | \n", "Negative | \n", "
| 1 | \n", "Uh... So far my playthrough has not been great... | \n", "Negative | \n", "
| 2 | \n", "Better mechanics than cs2 | \n", "Negative | \n", "
| 3 | \n", "buggy mess and NOT fun to play at all | \n", "Negative | \n", "
| 4 | \n", "Whoever came up with this, is gonna fucking ge... | \n", "Negative | \n", "
| ... | \n", "... | \n", "... | \n", "
| 201139 | \n", "This trash for sick anime clowns! | \n", "Negative | \n", "
| 201140 | \n", "Not noob friendly | \n", "Negative | \n", "
| 201141 | \n", "Total trash for anime clowns! | \n", "Negative | \n", "
| 201142 | \n", "Unplayable. It keeps trying to update but then... | \n", "Negative | \n", "
| 201143 | \n", "Bring back csgo | \n", "Negative | \n", "
201144 rows × 2 columns
\n", "| \n", " | content | \n", "is_positive | \n", "clean_content | \n", "
|---|---|---|---|
| 0 | \n", "At least its a counter strike -1/100 | \n", "Negative | \n", "least counter strike | \n", "
| 1 | \n", "Uh... So far my playthrough has not been great... | \n", "Negative | \n", "uh far playthrough great glitched texture cont... | \n", "
| 2 | \n", "Better mechanics than cs2 | \n", "Negative | \n", "better mechanic c | \n", "
| 3 | \n", "buggy mess and NOT fun to play at all | \n", "Negative | \n", "buggy mess fun play | \n", "
| 4 | \n", "Whoever came up with this, is gonna fucking ge... | \n", "Negative | \n", "whoever came gon na fucking get negative revie... | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "
| 201139 | \n", "This trash for sick anime clowns! | \n", "Negative | \n", "trash sick anime clown | \n", "
| 201140 | \n", "Not noob friendly | \n", "Negative | \n", "noob friendly | \n", "
| 201141 | \n", "Total trash for anime clowns! | \n", "Negative | \n", "total trash anime clown | \n", "
| 201142 | \n", "Unplayable. It keeps trying to update but then... | \n", "Negative | \n", "unplayable keep trying update immediately stop... | \n", "
| 201143 | \n", "Bring back csgo | \n", "Negative | \n", "bring back csgo | \n", "
200717 rows × 3 columns
\n", "| \n", " | content | \n", "is_positive | \n", "clean_content | \n", "
|---|---|---|---|
| 0 | \n", "At least its a counter strike -1/100 | \n", "0 | \n", "least counter strike | \n", "
| 1 | \n", "Uh... So far my playthrough has not been great... | \n", "0 | \n", "uh far playthrough great glitched texture cont... | \n", "
| 2 | \n", "Better mechanics than cs2 | \n", "0 | \n", "better mechanic c | \n", "
| 3 | \n", "buggy mess and NOT fun to play at all | \n", "0 | \n", "buggy mess fun play | \n", "
| 4 | \n", "Whoever came up with this, is gonna fucking ge... | \n", "0 | \n", "whoever came gon na fucking get negative revie... | \n", "
RandomForestClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomForestClassifier()