{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "***Importing Libraries***" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import preprocess_kgptalkie as ps" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | review | \n", "sentiment | \n", "
|---|---|---|
| 0 | \n", "One of the other reviewers has mentioned that ... | \n", "positive | \n", "
| 1 | \n", "A wonderful little production. <br /><br />The... | \n", "positive | \n", "
| 2 | \n", "I thought this was a wonderful way to spend ti... | \n", "positive | \n", "
| 3 | \n", "Basically there's a family where a little boy ... | \n", "negative | \n", "
| 4 | \n", "Petter Mattei's \"Love in the Time of Money\" is... | \n", "positive | \n", "
| ... | \n", "... | \n", "... | \n", "
| 49995 | \n", "I thought this movie did a down right good job... | \n", "positive | \n", "
| 49996 | \n", "Bad plot, bad dialogue, bad acting, idiotic di... | \n", "negative | \n", "
| 49997 | \n", "I am a Catholic taught in parochial elementary... | \n", "negative | \n", "
| 49998 | \n", "I'm going to have to disagree with the previou... | \n", "negative | \n", "
| 49999 | \n", "No one expects the Star Trek movies to be high... | \n", "negative | \n", "
50000 rows × 2 columns
\n", "| \n", " | review | \n", "sentiment | \n", "
|---|---|---|
| 0 | \n", "One of the other reviewers has mentioned that ... | \n", "positive | \n", "
| 1 | \n", "A wonderful little production. <br /><br />The... | \n", "positive | \n", "
| 2 | \n", "I thought this was a wonderful way to spend ti... | \n", "positive | \n", "
| 3 | \n", "Basically there's a family where a little boy ... | \n", "negative | \n", "
| 4 | \n", "Petter Mattei's \"Love in the Time of Money\" is... | \n", "positive | \n", "
| \n", " | review | \n", "sentiment | \n", "word_counts | \n", "char_counts | \n", "avg_wordlength | \n", "stopwords_counts | \n", "review_without_stopwords | \n", "
|---|---|---|---|---|---|---|---|
| 0 | \n", "one of the other reviewers has mentioned that ... | \n", "positive | \n", "307 | \n", "1455 | \n", "4.739414 | \n", "135 | \n", "reviewers mentioned watching 1 oz episode you'... | \n", "
| 1 | \n", "a wonderful little production. the filming tec... | \n", "positive | \n", "162 | \n", "837 | \n", "5.166667 | \n", "71 | \n", "wonderful little production. filming technique... | \n", "
| 2 | \n", "i thought this was a wonderful way to spend ti... | \n", "positive | \n", "166 | \n", "761 | \n", "4.584337 | \n", "81 | \n", "thought wonderful way spend time hot summer we... | \n", "
| 3 | \n", "basically there's a family where a little boy ... | \n", "negative | \n", "138 | \n", "611 | \n", "4.427536 | \n", "63 | \n", "basically there's family little boy (jake) thi... | \n", "
| 4 | \n", "petter mattei's \"love in the time of money\" is... | \n", "positive | \n", "230 | \n", "1088 | \n", "4.730435 | \n", "107 | \n", "petter mattei's \"love time money\" visually stu... | \n", "
Pipeline(steps=[('tfidf', TfidfVectorizer()), ('clf', LinearSVC())])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. Pipeline(steps=[('tfidf', TfidfVectorizer()), ('clf', LinearSVC())])TfidfVectorizer()
LinearSVC()