{ "cells": [ { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import re\n", "import nltk\n", "from nltk.corpus import stopwords\n", "from nltk.stem import WordNetLemmatizer\n", "import tensorflow as tf\n", "from tensorflow.keras.preprocessing.text import Tokenizer\n", "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", "from tensorflow.keras.models import Sequential\n", "from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import LabelEncoder" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[nltk_data] Downloading package stopwords to\n", "[nltk_data] C:\\Users\\gouth\\AppData\\Roaming\\nltk_data...\n", "[nltk_data] Unzipping corpora\\stopwords.zip.\n", "[nltk_data] Downloading package wordnet to\n", "[nltk_data] C:\\Users\\gouth\\AppData\\Roaming\\nltk_data...\n", "[nltk_data] Package wordnet is already up-to-date!\n", "[nltk_data] Downloading package omw-1.4 to\n", "[nltk_data] C:\\Users\\gouth\\AppData\\Roaming\\nltk_data...\n", "[nltk_data] Package omw-1.4 is already up-to-date!\n", "[nltk_data] Downloading package punkt to\n", "[nltk_data] C:\\Users\\gouth\\AppData\\Roaming\\nltk_data...\n", "[nltk_data] Package punkt is already up-to-date!\n" ] }, { "data": { "text/plain": [ "True" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nltk.download('stopwords')\n", "nltk.download('wordnet')\n", "nltk.download('omw-1.4')\n", "nltk.download('punkt')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | review | \n", "sentiment | \n", "
|---|---|---|
| 0 | \n", "One of the other reviewers has mentioned that ... | \n", "positive | \n", "
| 1 | \n", "A wonderful little production. <br /><br />The... | \n", "positive | \n", "
| 2 | \n", "I thought this was a wonderful way to spend ti... | \n", "positive | \n", "
| 3 | \n", "Basically there's a family where a little boy ... | \n", "negative | \n", "
| 4 | \n", "Petter Mattei's \"Love in the Time of Money\" is... | \n", "positive | \n", "
| ... | \n", "... | \n", "... | \n", "
| 49995 | \n", "I thought this movie did a down right good job... | \n", "positive | \n", "
| 49996 | \n", "Bad plot, bad dialogue, bad acting, idiotic di... | \n", "negative | \n", "
| 49997 | \n", "I am a Catholic taught in parochial elementary... | \n", "negative | \n", "
| 49998 | \n", "I'm going to have to disagree with the previou... | \n", "negative | \n", "
| 49999 | \n", "No one expects the Star Trek movies to be high... | \n", "negative | \n", "
50000 rows × 2 columns
\n", "| \n", " | review | \n", "sentiment | \n", "c_review | \n", "n_sentiment | \n", "
|---|---|---|---|---|
| 0 | \n", "One of the other reviewers has mentioned that ... | \n", "positive | \n", "one of the other reviewers has mentioned that ... | \n", "1 | \n", "
| 1 | \n", "A wonderful little production. <br /><br />The... | \n", "positive | \n", "a wonderful little production the filming tech... | \n", "1 | \n", "
| 2 | \n", "I thought this was a wonderful way to spend ti... | \n", "positive | \n", "i thought this was a wonderful way to spend ti... | \n", "1 | \n", "
| 3 | \n", "Basically there's a family where a little boy ... | \n", "negative | \n", "basically theres a family where a little boy j... | \n", "0 | \n", "
| 4 | \n", "Petter Mattei's \"Love in the Time of Money\" is... | \n", "positive | \n", "petter matteis love in the time of money is a ... | \n", "1 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 49995 | \n", "I thought this movie did a down right good job... | \n", "positive | \n", "i thought this movie did a down right good job... | \n", "1 | \n", "
| 49996 | \n", "Bad plot, bad dialogue, bad acting, idiotic di... | \n", "negative | \n", "bad plot bad dialogue bad acting idiotic direc... | \n", "0 | \n", "
| 49997 | \n", "I am a Catholic taught in parochial elementary... | \n", "negative | \n", "i am a catholic taught in parochial elementary... | \n", "0 | \n", "
| 49998 | \n", "I'm going to have to disagree with the previou... | \n", "negative | \n", "im going to have to disagree with the previous... | \n", "0 | \n", "
| 49999 | \n", "No one expects the Star Trek movies to be high... | \n", "negative | \n", "no one expects the star trek movies to be high... | \n", "0 | \n", "
50000 rows × 4 columns
\n", "