Upload Emotion Detection in Text.ipynb
Browse files- Emotion Detection in Text.ipynb +785 -0
Emotion Detection in Text.ipynb
ADDED
|
@@ -0,0 +1,785 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"id": "56cccab6",
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"source": [
|
| 8 |
+
"# Emotions Detection in Text"
|
| 9 |
+
]
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"cell_type": "code",
|
| 13 |
+
"execution_count": 1,
|
| 14 |
+
"id": "f0814628-3d83-4fd6-a511-2eccf79f9f1e",
|
| 15 |
+
"metadata": {},
|
| 16 |
+
"outputs": [],
|
| 17 |
+
"source": [
|
| 18 |
+
"# EDA\n",
|
| 19 |
+
"import pandas as pd\n",
|
| 20 |
+
"import numpy as np\n",
|
| 21 |
+
"\n",
|
| 22 |
+
"# Load Data Viz Pkgs\n",
|
| 23 |
+
"import seaborn as sns\n",
|
| 24 |
+
"\n",
|
| 25 |
+
"# Load Text Cleaning Pkgs\n",
|
| 26 |
+
"import neattext.functions as nfx\n",
|
| 27 |
+
"\n",
|
| 28 |
+
"# Load ML Pkgs\n",
|
| 29 |
+
"# Estimators\n",
|
| 30 |
+
"from sklearn.linear_model import LogisticRegression\n",
|
| 31 |
+
"from sklearn.naive_bayes import MultinomialNB\n",
|
| 32 |
+
"\n",
|
| 33 |
+
"# Transformers\n",
|
| 34 |
+
"from sklearn.feature_extraction.text import CountVectorizer\n",
|
| 35 |
+
"from sklearn.model_selection import train_test_split\n",
|
| 36 |
+
"from sklearn.metrics import accuracy_score,classification_report,confusion_matrix"
|
| 37 |
+
]
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"cell_type": "code",
|
| 41 |
+
"execution_count": 2,
|
| 42 |
+
"id": "b209e004-ab77-4407-8689-b4318944d47f",
|
| 43 |
+
"metadata": {},
|
| 44 |
+
"outputs": [],
|
| 45 |
+
"source": [
|
| 46 |
+
"# Load Dataset\n",
|
| 47 |
+
"df = pd.read_csv(\"../data/emotion_dataset_raw.csv\")"
|
| 48 |
+
]
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"cell_type": "code",
|
| 52 |
+
"execution_count": 3,
|
| 53 |
+
"id": "fea2d4c0-3bdd-405e-ab69-507ceaac36cb",
|
| 54 |
+
"metadata": {},
|
| 55 |
+
"outputs": [
|
| 56 |
+
{
|
| 57 |
+
"data": {
|
| 58 |
+
"text/html": [
|
| 59 |
+
"<div>\n",
|
| 60 |
+
"<style scoped>\n",
|
| 61 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 62 |
+
" vertical-align: middle;\n",
|
| 63 |
+
" }\n",
|
| 64 |
+
"\n",
|
| 65 |
+
" .dataframe tbody tr th {\n",
|
| 66 |
+
" vertical-align: top;\n",
|
| 67 |
+
" }\n",
|
| 68 |
+
"\n",
|
| 69 |
+
" .dataframe thead th {\n",
|
| 70 |
+
" text-align: right;\n",
|
| 71 |
+
" }\n",
|
| 72 |
+
"</style>\n",
|
| 73 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 74 |
+
" <thead>\n",
|
| 75 |
+
" <tr style=\"text-align: right;\">\n",
|
| 76 |
+
" <th></th>\n",
|
| 77 |
+
" <th>Emotion</th>\n",
|
| 78 |
+
" <th>Text</th>\n",
|
| 79 |
+
" </tr>\n",
|
| 80 |
+
" </thead>\n",
|
| 81 |
+
" <tbody>\n",
|
| 82 |
+
" <tr>\n",
|
| 83 |
+
" <th>0</th>\n",
|
| 84 |
+
" <td>neutral</td>\n",
|
| 85 |
+
" <td>Why ?</td>\n",
|
| 86 |
+
" </tr>\n",
|
| 87 |
+
" <tr>\n",
|
| 88 |
+
" <th>1</th>\n",
|
| 89 |
+
" <td>joy</td>\n",
|
| 90 |
+
" <td>Sage Act upgrade on my to do list for tommorow.</td>\n",
|
| 91 |
+
" </tr>\n",
|
| 92 |
+
" <tr>\n",
|
| 93 |
+
" <th>2</th>\n",
|
| 94 |
+
" <td>sadness</td>\n",
|
| 95 |
+
" <td>ON THE WAY TO MY HOMEGIRL BABY FUNERAL!!! MAN ...</td>\n",
|
| 96 |
+
" </tr>\n",
|
| 97 |
+
" <tr>\n",
|
| 98 |
+
" <th>3</th>\n",
|
| 99 |
+
" <td>joy</td>\n",
|
| 100 |
+
" <td>Such an eye ! The true hazel eye-and so brill...</td>\n",
|
| 101 |
+
" </tr>\n",
|
| 102 |
+
" <tr>\n",
|
| 103 |
+
" <th>4</th>\n",
|
| 104 |
+
" <td>joy</td>\n",
|
| 105 |
+
" <td>@Iluvmiasantos ugh babe.. hugggzzz for u .! b...</td>\n",
|
| 106 |
+
" </tr>\n",
|
| 107 |
+
" </tbody>\n",
|
| 108 |
+
"</table>\n",
|
| 109 |
+
"</div>"
|
| 110 |
+
],
|
| 111 |
+
"text/plain": [
|
| 112 |
+
" Emotion Text\n",
|
| 113 |
+
"0 neutral Why ? \n",
|
| 114 |
+
"1 joy Sage Act upgrade on my to do list for tommorow.\n",
|
| 115 |
+
"2 sadness ON THE WAY TO MY HOMEGIRL BABY FUNERAL!!! MAN ...\n",
|
| 116 |
+
"3 joy Such an eye ! The true hazel eye-and so brill...\n",
|
| 117 |
+
"4 joy @Iluvmiasantos ugh babe.. hugggzzz for u .! b..."
|
| 118 |
+
]
|
| 119 |
+
},
|
| 120 |
+
"execution_count": 3,
|
| 121 |
+
"metadata": {},
|
| 122 |
+
"output_type": "execute_result"
|
| 123 |
+
}
|
| 124 |
+
],
|
| 125 |
+
"source": [
|
| 126 |
+
"df.head()"
|
| 127 |
+
]
|
| 128 |
+
},
|
| 129 |
+
{
|
| 130 |
+
"cell_type": "code",
|
| 131 |
+
"execution_count": 4,
|
| 132 |
+
"id": "430565a3-cf3b-4c6f-afa5-bafd084f5676",
|
| 133 |
+
"metadata": {},
|
| 134 |
+
"outputs": [
|
| 135 |
+
{
|
| 136 |
+
"data": {
|
| 137 |
+
"text/plain": [
|
| 138 |
+
"joy 11045\n",
|
| 139 |
+
"sadness 6722\n",
|
| 140 |
+
"fear 5410\n",
|
| 141 |
+
"anger 4297\n",
|
| 142 |
+
"surprise 4062\n",
|
| 143 |
+
"neutral 2254\n",
|
| 144 |
+
"disgust 856\n",
|
| 145 |
+
"shame 146\n",
|
| 146 |
+
"Name: Emotion, dtype: int64"
|
| 147 |
+
]
|
| 148 |
+
},
|
| 149 |
+
"execution_count": 4,
|
| 150 |
+
"metadata": {},
|
| 151 |
+
"output_type": "execute_result"
|
| 152 |
+
}
|
| 153 |
+
],
|
| 154 |
+
"source": [
|
| 155 |
+
"# Value Counts\n",
|
| 156 |
+
"df['Emotion'].value_counts()"
|
| 157 |
+
]
|
| 158 |
+
},
|
| 159 |
+
{
|
| 160 |
+
"cell_type": "code",
|
| 161 |
+
"execution_count": 5,
|
| 162 |
+
"id": "531d3449-a959-4a19-bff0-3ffed551e619",
|
| 163 |
+
"metadata": {},
|
| 164 |
+
"outputs": [
|
| 165 |
+
{
|
| 166 |
+
"data": {
|
| 167 |
+
"text/plain": [
|
| 168 |
+
"<Axes: xlabel='Emotion', ylabel='count'>"
|
| 169 |
+
]
|
| 170 |
+
},
|
| 171 |
+
"execution_count": 5,
|
| 172 |
+
"metadata": {},
|
| 173 |
+
"output_type": "execute_result"
|
| 174 |
+
},
|
| 175 |
+
{
|
| 176 |
+
"data": {
|
| 177 |
+
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAk0AAAGwCAYAAAC0HlECAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA9IUlEQVR4nO3de1yUdf7//+cIMg6nSVBOK3lIRFRS01K01PLUAc12V9tsSTdTy5JIzXItIzd1szyttqZuq62H7LuVbQdlPZSu5jGUPBGaYeoKYS2CmgHC+/dHH6+fIx6uEATscb/d5lbzvl7XNa/3cA3z9JprLhzGGCMAAABcUo3KbgAAAKA6IDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAG7wru4FrSUlJiY4ePaqAgAA5HI7KbgcAANhgjNGJEycUERGhGjUufjyJ0FSOjh49qsjIyMpuAwAAlMHhw4dVr169iy4nNJWjgIAAST896YGBgZXcDQAAsCM/P1+RkZHW+/jFEJrK0dmP5AIDAwlNAABUM5c7tYYTwQEAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAG7wruwFc2w6Nj63sFq7Y9eN2VXYLAIAqgCNNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsKFSQ9N//vMf9erVSxEREXI4HHr//fc9lhtjlJycrIiICLlcLnXp0kV79uzxqCkoKNDw4cNVp04d+fn5qXfv3jpy5IhHTW5urhISEuR2u+V2u5WQkKDjx4971Bw6dEi9evWSn5+f6tSpo8TERBUWFlbEtAEAQDVUqaHp1KlTatmypWbNmnXB5ZMnT9bUqVM1a9Ysbdu2TWFhYerevbtOnDhh1SQlJWnZsmVaunSpNmzYoJMnTyo+Pl7FxcVWTf/+/ZWWlqaUlBSlpKQoLS1NCQkJ1vLi4mLdc889OnXqlDZs2KClS5fq3Xff1ciRIytu8gAAoFpxGGNMZTchSQ6HQ8uWLVOfPn0k/XSUKSIiQklJSXrmmWck/XRUKTQ0VC+//LKGDh2qvLw81a1bVwsXLtT9998vSTp69KgiIyO1fPly9ezZU+np6WrWrJk2b96sdu3aSZI2b96suLg4ffnll4qOjtaKFSsUHx+vw4cPKyIiQpK0dOlSDRw4UDk5OQoMDLQ1h/z8fLndbuXl5dle51p3aHxsZbdwxa4ft6uyWwAAVCC7799V9pymzMxMZWdnq0ePHtaY0+lU586dtXHjRklSamqqioqKPGoiIiLUokULq2bTpk1yu91WYJKk9u3by+12e9S0aNHCCkyS1LNnTxUUFCg1NfWiPRYUFCg/P9/jBgAArk1VNjRlZ2dLkkJDQz3GQ0NDrWXZ2dny8fFR7dq1L1kTEhJSavshISEeNec/Tu3ateXj42PVXMikSZOs86TcbrciIyN/5iwBAEB1UWVD01kOh8PjvjGm1Nj5zq+5UH1Zas43ZswY5eXlWbfDhw9fsi8AAFB9VdnQFBYWJkmljvTk5ORYR4XCwsJUWFio3NzcS9Z8++23pbZ/7Ngxj5rzHyc3N1dFRUWljkCdy+l0KjAw0OMGAACuTVU2NDVs2FBhYWFatWqVNVZYWKh169apQ4cOkqQ2bdqoZs2aHjVZWVnavXu3VRMXF6e8vDxt3brVqtmyZYvy8vI8anbv3q2srCyrZuXKlXI6nWrTpk2FzhMAAFQP3pX54CdPntRXX31l3c/MzFRaWpqCgoJ0/fXXKykpSRMnTlRUVJSioqI0ceJE+fr6qn///pIkt9utQYMGaeTIkQoODlZQUJBGjRql2NhYdevWTZIUExOjO++8U4MHD9acOXMkSUOGDFF8fLyio6MlST169FCzZs2UkJCgV155Rf/73/80atQoDR48mKNHAABAUiWHps8//1y33367dX/EiBGSpAEDBmjBggUaPXq0Tp8+rWHDhik3N1ft2rXTypUrFRAQYK0zbdo0eXt7q1+/fjp9+rS6du2qBQsWyMvLy6pZvHixEhMTrW/Z9e7d2+PaUF5eXvr44481bNgwdezYUS6XS/3799err75a0U8BAACoJqrMdZquBVynqTSu0wQAqOqq/XWaAAAAqhJCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGyo0qHpzJkzeu6559SwYUO5XC41atRI48ePV0lJiVVjjFFycrIiIiLkcrnUpUsX7dmzx2M7BQUFGj58uOrUqSM/Pz/17t1bR44c8ajJzc1VQkKC3G633G63EhISdPz48asxTQAAUA1U6dD08ssv6/XXX9esWbOUnp6uyZMn65VXXtHMmTOtmsmTJ2vq1KmaNWuWtm3bprCwMHXv3l0nTpywapKSkrRs2TItXbpUGzZs0MmTJxUfH6/i4mKrpn///kpLS1NKSopSUlKUlpamhISEqzpfAABQdTmMMaaym7iY+Ph4hYaG6o033rDGfvOb38jX11cLFy6UMUYRERFKSkrSM888I+mno0qhoaF6+eWXNXToUOXl5alu3bpauHCh7r//fknS0aNHFRkZqeXLl6tnz55KT09Xs2bNtHnzZrVr106StHnzZsXFxenLL79UdHS0rX7z8/PldruVl5enwMDAcn42qqdD42Mru4Urdv24XZXdAgCgAtl9/67SR5puvfVWrVmzRvv27ZMkffHFF9qwYYPuvvtuSVJmZqays7PVo0cPax2n06nOnTtr48aNkqTU1FQVFRV51ERERKhFixZWzaZNm+R2u63AJEnt27eX2+22ai6koKBA+fn5HjcAAHBt8q7sBi7lmWeeUV5enpo2bSovLy8VFxdrwoQJeuCBByRJ2dnZkqTQ0FCP9UJDQ/XNN99YNT4+Pqpdu3apmrPrZ2dnKyQkpNTjh4SEWDUXMmnSJL344otlnyAAAKg2qvSRprfffluLFi3SkiVLtH37dr355pt69dVX9eabb3rUORwOj/vGmFJj5zu/5kL1l9vOmDFjlJeXZ90OHz5sZ1oAAKAaqtJHmp5++mk9++yz+t3vfidJio2N1TfffKNJkyZpwIABCgsLk/TTkaLw8HBrvZycHOvoU1hYmAoLC5Wbm+txtCknJ0cdOnSwar799ttSj3/s2LFSR7HO5XQ65XQ6r3yiAACgyqvSR5p++OEH1ajh2aKXl5d1yYGGDRsqLCxMq1atspYXFhZq3bp1ViBq06aNatas6VGTlZWl3bt3WzVxcXHKy8vT1q1brZotW7YoLy/PqgEAAL9sVfpIU69evTRhwgRdf/31at68uXbs2KGpU6fq4YcflvTTR2pJSUmaOHGioqKiFBUVpYkTJ8rX11f9+/eXJLndbg0aNEgjR45UcHCwgoKCNGrUKMXGxqpbt26SpJiYGN15550aPHiw5syZI0kaMmSI4uPjbX9zDgAAXNuqdGiaOXOmnn/+eQ0bNkw5OTmKiIjQ0KFDNW7cOKtm9OjROn36tIYNG6bc3Fy1a9dOK1euVEBAgFUzbdo0eXt7q1+/fjp9+rS6du2qBQsWyMvLy6pZvHixEhMTrW/Z9e7dW7Nmzbp6kwUAAFValb5OU3XDdZpK4zpNAICq7pq4ThMAAEBVQWgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2OBd2Q0A16KOMztWdgtX7LPhn1V2CwBQpXCkCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2lCk03XHHHTp+/Hip8fz8fN1xxx1X2hMAAECVU6bQtHbtWhUWFpYa//HHH7V+/forbgoAAKCq8f45xTt37rT+f+/evcrOzrbuFxcXKyUlRb/61a/KrzsAAIAq4meFplatWsnhcMjhcFzwYziXy6WZM2eWW3MAAABVxc8KTZmZmTLGqFGjRtq6davq1q1rLfPx8VFISIi8vLzKvUkAAIDK9rNCU/369SVJJSUlFdIMAABAVfWzQtO59u3bp7Vr1yonJ6dUiBo3btwVNwYAAFCVlOnbc/PmzVOzZs00btw4vfPOO1q2bJl1e//998u1wf/+97/6/e9/r+DgYPn6+qpVq1ZKTU21lhtjlJycrIiICLlcLnXp0kV79uzx2EZBQYGGDx+uOnXqyM/PT71799aRI0c8anJzc5WQkCC32y23262EhIQLXlYBAAD8MpUpNL300kuaMGGCsrOzlZaWph07dli37du3l1tzubm56tixo2rWrKkVK1Zo7969mjJliq677jqrZvLkyZo6dapmzZqlbdu2KSwsTN27d9eJEyesmqSkJC1btkxLly7Vhg0bdPLkScXHx6u4uNiq6d+/v9LS0pSSkqKUlBSlpaUpISGh3OYCAACqtzJ9PJebm6u+ffuWdy+lvPzyy4qMjNT8+fOtsQYNGlj/b4zR9OnTNXbsWP3617+WJL355psKDQ3VkiVLNHToUOXl5emNN97QwoUL1a1bN0nSokWLFBkZqdWrV6tnz55KT09XSkqKNm/erHbt2kn66WhaXFycMjIyFB0dXeFzBQAAVVuZjjT17dtXK1euLO9eSvnggw/Utm1b9e3bVyEhIWrdurXmzZtnLc/MzFR2drZ69OhhjTmdTnXu3FkbN26UJKWmpqqoqMijJiIiQi1atLBqNm3aJLfbbQUmSWrfvr3cbrdVcyEFBQXKz8/3uAEAgGtTmY40NW7cWM8//7w2b96s2NhY1axZ02N5YmJiuTT39ddfa/bs2RoxYoT++Mc/auvWrUpMTJTT6dRDDz1kXVwzNDTUY73Q0FB98803kqTs7Gz5+Piodu3apWrOrp+dna2QkJBSjx8SEuJxAc/zTZo0SS+++OIVzREAAFQPZQpNc+fOlb+/v9atW6d169Z5LHM4HOUWmkpKStS2bVtNnDhRktS6dWvt2bNHs2fP1kMPPeTxmOcyxpQaO9/5NReqv9x2xowZoxEjRlj38/PzFRkZeelJAQCAaqlMoSkzM7O8+7ig8PBwNWvWzGMsJiZG7777riQpLCxM0k9HisLDw62anJwc6+hTWFiYCgsLlZub63G0KScnRx06dLBqvv3221KPf+zYsVJHsc7ldDrldDrLODsAAFCdlOmcpqulY8eOysjI8Bjbt2+fdZHNhg0bKiwsTKtWrbKWFxYWat26dVYgatOmjWrWrOlRk5WVpd27d1s1cXFxysvL09atW62aLVu2KC8vz6oBAAC/bGU60vTwww9fcvnf//73MjVzvqeeekodOnTQxIkT1a9fP23dulVz587V3LlzJf30kVpSUpImTpyoqKgoRUVFaeLEifL19VX//v0lSW63W4MGDdLIkSMVHBysoKAgjRo1SrGxsda36WJiYnTnnXdq8ODBmjNnjiRpyJAhio+P55tzAABA0hVccuBcRUVF2r17t44fP37BP+RbVjfffLOWLVumMWPGaPz48WrYsKGmT5+uBx980KoZPXq0Tp8+rWHDhik3N1ft2rXTypUrFRAQYNVMmzZN3t7e6tevn06fPq2uXbtqwYIFHn8nb/HixUpMTLS+Zde7d2/NmjWr3OYCAACqN4cxxpTHhkpKSjRs2DA1atRIo0ePLo9NVjv5+flyu93Ky8tTYGBgZbdTJRwaH1vZLVyx68ft+tnrdJzZsQI6ubo+G/5ZZbcAAFeF3ffvcjunqUaNGnrqqac0bdq08tokAABAlVGuJ4IfOHBAZ86cKc9NAgAAVAllOqfp3GsTST9dzygrK0sff/yxBgwYUC6NAQAAVCVlCk07duzwuF+jRg3VrVtXU6ZMuew36wAAAKqjMoWmTz/9tLz7AAAAqNLKFJrOOnbsmDIyMuRwONSkSRPVrVu3vPoCAACoUsp0IvipU6f08MMPKzw8XJ06ddJtt92miIgIDRo0SD/88EN59wgAAFDpyhSaRowYoXXr1unDDz/U8ePHdfz4cf3rX//SunXrNHLkyPLuEQAAoNKV6eO5d999V++88466dOlijd19991yuVzq16+fZs+eXV79AQAAVAllOtL0ww8/KDQ0tNR4SEgIH88BAIBrUplCU1xcnF544QX9+OOP1tjp06f14osvKi4urtyaAwAAqCrK9PHc9OnTddddd6levXpq2bKlHA6H0tLS5HQ6tXLlyvLuEUA1sa5T58pu4Yp1/s+6ym4BQBVVptAUGxur/fv3a9GiRfryyy9ljNHvfvc7Pfjgg3K5XOXdIwAAQKUrU2iaNGmSQkNDNXjwYI/xv//97zp27JieeeaZcmkOAACgqijTOU1z5sxR06ZNS403b95cr7/++hU3BQAAUNWUKTRlZ2crPDy81HjdunWVlZV1xU0BAABUNWUKTZGRkfrss89KjX/22WeKiIi44qYAAACqmjKd0/TII48oKSlJRUVFuuOOOyRJa9as0ejRo7kiOAAAuCaVKTSNHj1a//vf/zRs2DAVFhZKkmrVqqVnnnlGY8aMKdcGAQAAqoIyhSaHw6GXX35Zzz//vNLT0+VyuRQVFSWn01ne/QEAAFQJZQpNZ/n7++vmm28ur14AAACqrDKdCA4AAPBLQ2gCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwwbuyGwAAVE8Tfv/bym7hio1d9E5lt4BqhCNNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYEO1Ck2TJk2Sw+FQUlKSNWaMUXJysiIiIuRyudSlSxft2bPHY72CggINHz5cderUkZ+fn3r37q0jR4541OTm5iohIUFut1tut1sJCQk6fvz4VZgVAACoDqrNJQe2bdumuXPn6sYbb/QYnzx5sqZOnaoFCxaoSZMmeumll9S9e3dlZGQoICBAkpSUlKQPP/xQS5cuVXBwsEaOHKn4+HilpqbKy8tLktS/f38dOXJEKSkpkqQhQ4YoISFBH3744dWdKIBqZ9bI6v974okpvSq7BaDKqxZHmk6ePKkHH3xQ8+bNU+3ata1xY4ymT5+usWPH6te//rVatGihN998Uz/88IOWLFkiScrLy9Mbb7yhKVOmqFu3bmrdurUWLVqkXbt2afXq1ZKk9PR0paSk6G9/+5vi4uIUFxenefPm6aOPPlJGRsZF+yooKFB+fr7HDQAAXJuqRWh6/PHHdc8996hbt24e45mZmcrOzlaPHj2sMafTqc6dO2vjxo2SpNTUVBUVFXnUREREqEWLFlbNpk2b5Ha71a5dO6umffv2crvdVs2FTJo0yfo4z+12KzIyslzmCwAAqp4qH5qWLl2q1NRUTZo0qdSy7OxsSVJoaKjHeGhoqLUsOztbPj4+HkeoLlQTEhJSavshISFWzYWMGTNGeXl51u3w4cM/b3IAAKDaqNLnNB0+fFhPPvmkVq5cqVq1al20zuFweNw3xpQaO9/5NReqv9x2nE6nnE7nJR8HAABcG6r0kabU1FTl5OSoTZs28vb2lre3t9atW6e//OUv8vb2to4wnX80KCcnx1oWFhamwsJC5ebmXrLm22+/LfX4x44dK3UUCwAA/DJV6dDUtWtX7dq1S2lpadatbdu2evDBB5WWlqZGjRopLCxMq1atstYpLCzUunXr1KFDB0lSmzZtVLNmTY+arKws7d6926qJi4tTXl6etm7datVs2bJFeXl5Vg0AAPhlq9IfzwUEBKhFixYeY35+fgoODrbGk5KSNHHiREVFRSkqKkoTJ06Ur6+v+vfvL0lyu90aNGiQRo4cqeDgYAUFBWnUqFGKjY21TiyPiYnRnXfeqcGDB2vOnDmSfrrkQHx8vKKjo6/ijAEAQFVVpUOTHaNHj9bp06c1bNgw5ebmql27dlq5cqV1jSZJmjZtmry9vdWvXz+dPn1aXbt21YIFC6xrNEnS4sWLlZiYaH3Lrnfv3po1a9ZVnw8AAKiaql1oWrt2rcd9h8Oh5ORkJScnX3SdWrVqaebMmZo5c+ZFa4KCgrRo0aJy6hIAAFxrqvQ5TQAAAFUFoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALDBu7Ib+KVo8/Q/KruFK5b6ykOV3QIAAJWGI00AAAA2EJoAAABsIDQBAADYUKVD06RJk3TzzTcrICBAISEh6tOnjzIyMjxqjDFKTk5WRESEXC6XunTpoj179njUFBQUaPjw4apTp478/PzUu3dvHTlyxKMmNzdXCQkJcrvdcrvdSkhI0PHjxyt6igAAoJqo0qFp3bp1evzxx7V582atWrVKZ86cUY8ePXTq1CmrZvLkyZo6dapmzZqlbdu2KSwsTN27d9eJEyesmqSkJC1btkxLly7Vhg0bdPLkScXHx6u4uNiq6d+/v9LS0pSSkqKUlBSlpaUpISHhqs4XAABUXVX623MpKSke9+fPn6+QkBClpqaqU6dOMsZo+vTpGjt2rH79619Lkt58802FhoZqyZIlGjp0qPLy8vTGG29o4cKF6tatmyRp0aJFioyM1OrVq9WzZ0+lp6crJSVFmzdvVrt27SRJ8+bNU1xcnDIyMhQdHX11Jw4AAKqcKn2k6Xx5eXmSpKCgIElSZmamsrOz1aNHD6vG6XSqc+fO2rhxoyQpNTVVRUVFHjURERFq0aKFVbNp0ya53W4rMElS+/bt5Xa7rZoLKSgoUH5+vscNAABcm6pNaDLGaMSIEbr11lvVokULSVJ2drYkKTQ01KM2NDTUWpadnS0fHx/Vrl37kjUhISGlHjMkJMSquZBJkyZZ50C53W5FRkaWfYIAAKBKqzah6YknntDOnTv11ltvlVrmcDg87htjSo2d7/yaC9VfbjtjxoxRXl6edTt8+PDlpgEAAKqpahGahg8frg8++ECffvqp6tWrZ42HhYVJUqmjQTk5OdbRp7CwMBUWFio3N/eSNd9++22pxz127Fipo1jncjqdCgwM9LgBAIBrU5UOTcYYPfHEE3rvvff0ySefqGHDhh7LGzZsqLCwMK1atcoaKyws1Lp169ShQwdJUps2bVSzZk2PmqysLO3evduqiYuLU15enrZu3WrVbNmyRXl5eVYNAAD4ZavS3557/PHHtWTJEv3rX/9SQECAdUTJ7XbL5XLJ4XAoKSlJEydOVFRUlKKiojRx4kT5+vqqf//+Vu2gQYM0cuRIBQcHKygoSKNGjVJsbKz1bbqYmBjdeeedGjx4sObMmSNJGjJkiOLj4/nmHAAAkFTFQ9Ps2bMlSV26dPEYnz9/vgYOHChJGj16tE6fPq1hw4YpNzdX7dq108qVKxUQEGDVT5s2Td7e3urXr59Onz6trl27asGCBfLy8rJqFi9erMTEROtbdr1799asWbMqdoIAAKDaqNKhyRhz2RqHw6Hk5GQlJydftKZWrVqaOXOmZs6cedGaoKAgLVq0qCxtAgCAX4AqfU4TAABAVUFoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA3eld0AAACo+pKTkyu7hSt2pXPgSBMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALDBu7IbAACgOkmf8Ellt3DFYsbeUdktVEscaTrPX//6VzVs2FC1atVSmzZttH79+spuCQAAVAGEpnO8/fbbSkpK0tixY7Vjxw7ddtttuuuuu3To0KHKbg0AAFQyQtM5pk6dqkGDBumRRx5RTEyMpk+frsjISM2ePbuyWwMAAJWMc5r+T2FhoVJTU/Xss896jPfo0UMbN2684DoFBQUqKCiw7ufl5UmS8vPzS9UWF5wux24rx4XmdTknfiyugE6urrLM+8zpMxXQydVVlnmfOvPLnPfpgh8qoJOrqyzz/rGoqAI6ubrKMu+TP56qgE6urrLM+9z3u+rqYvM+O26MufQGDIwxxvz3v/81ksxnn33mMT5hwgTTpEmTC67zwgsvGEncuHHjxo0bt2vgdvjw4UtmBY40ncfhcHjcN8aUGjtrzJgxGjFihHW/pKRE//vf/xQcHHzRdSpKfn6+IiMjdfjwYQUGBl7Vx65MzJt5/xIwb+b9S1CZ8zbG6MSJE4qIiLhkHaHp/9SpU0deXl7Kzs72GM/JyVFoaOgF13E6nXI6nR5j1113XUW1aEtgYOAv6kV2FvP+ZWHevyzM+5elsubtdrsvW8OJ4P/Hx8dHbdq00apVqzzGV61apQ4dOlRSVwAAoKrgSNM5RowYoYSEBLVt21ZxcXGaO3euDh06pEcffbSyWwMAAJWM0HSO+++/X99//73Gjx+vrKwstWjRQsuXL1f9+vUru7XLcjqdeuGFF0p9XHitY97M+5eAeTPvX4LqMG+HMZf7fh0AAAA4pwkAAMAGQhMAAIANhCYAAAAbCE24pAYNGmj69OmV9vgDBw5Unz59Ku3xK5vD4dD7779f2W1UGGOMhgwZoqCgIDkcDqWlpVV2S9VWcnKyWrVqVdlt4P/8kn53denSRUlJSZIq/z2jovHtuWtMly5d1KpVq2tmp50xY8bl/xYQqq2UlBQtWLBAa9euVaNGjVSnTp3KbqnaGjVqlIYPH17ZbeAXbtu2bfLz86vsNiRJBw8eVMOGDbVjx45y+wcFoekXyBij4uJieXtX/R+/nSu0ovo6cOCAwsPDK/QCsoWFhfLx8amw7ZeXsvZ59vXs7+8vf3//Cujs2lFUVKSaNWtWdhvXtLp161Z2CxWKj+euoi5duigxMVGjR49WUFCQwsLClJycbC3Py8vTkCFDFBISosDAQN1xxx364osvrOUXOtyblJSkLl26WMvXrVunGTNmyOFwyOFw6ODBg1q7dq0cDof+/e9/q23btnI6nVq/fr0OHDige++9V6GhofL399fNN9+s1atXX4Vnwr5z51xQUKDExESFhISoVq1auvXWW7Vt2zZJP71xNG7cWK+++qrH+rt371aNGjV04MCBq9LvO++8o9jYWLlcLgUHB6tbt246deqUtm3bpu7du6tOnTpyu93q3Lmztm/f7rHu/v371alTJ9WqVUvNmjUrdXX6gwcPyuFw6L333tPtt98uX19ftWzZUps2bfKo27hxozp16iSXy6XIyEglJibq1Kn//6+y//Wvf1VUVJRq1aql0NBQ/fa3v71s/xVh4MCBGj58uA4dOiSHw6EGDRrIGKPJkyerUaNGcrlcatmypd555x1rneLiYg0aNEgNGzaUy+VSdHS0ZsyYUWq7ffr00aRJkxQREaEmTZpUSP/SxZ+vcz+uOKtPnz4aOHCgdb9BgwZ66aWXNHDgQLndbg0ePNj6GS9dulQdOnRQrVq11Lx5c61du9Za72Kv5/M/nlu7dq1uueUW+fn56brrrlPHjh31zTffWMs//PBDtWnTRrVq1VKjRo304osv6syZM+XyvKSkpOjWW2/Vddddp+DgYMXHx1uvQbv78bx58xQZGSlfX1/dd999mjp1aqk/U3W5OTgcDr3++uu699575efnp5deeqlc5neuy71mXn31VYWHhys4OFiPP/64ioqKrGWLFi1S27ZtFRAQoLCwMPXv3185OTnW8nN/1q1bt5bL5dIdd9yhnJwcrVixQjExMQoMDNQDDzygH374wVrvcq+jK3Hq1Ck99NBD8vf3V3h4uKZMmeKx/PyP55KTk3X99dfL6XQqIiJCiYmJ1rKsrCzdc889crlcatiwoZYsWeKx/tl95dyP7Y8fPy6Hw2G9JnJzc/Xggw+qbt26crlcioqK0vz58yVJDRs2lCS1bt1aDofDeq+8Ipf8c74oV507dzaBgYEmOTnZ7Nu3z7z55pvG4XCYlStXmpKSEtOxY0fTq1cvs23bNrNv3z4zcuRIExwcbL7//ntjjDEDBgww9957r8c2n3zySdO5c2djjDHHjx83cXFxZvDgwSYrK8tkZWWZM2fOmE8//dRIMjfeeKNZuXKl+eqrr8x3331n0tLSzOuvv2527txp9u3bZ8aOHWtq1aplvvnmG2v79evXN9OmTbtKz1Bp5845MTHRREREmOXLl5s9e/aYAQMGmNq1a1vPz4QJE0yzZs081n/qqadMp06drkqvR48eNd7e3mbq1KkmMzPT7Ny507z22mvmxIkTZs2aNWbhwoVm7969Zu/evWbQoEEmNDTU5OfnG2OMKS4uNi1atDBdunQxO3bsMOvWrTOtW7c2ksyyZcuMMcZkZmYaSaZp06bmo48+MhkZGea3v/2tqV+/vikqKjLGGLNz507j7+9vpk2bZvbt22c+++wz07p1azNw4EBjjDHbtm0zXl5eZsmSJebgwYNm+/btZsaMGZftvyIcP37cjB8/3tSrV89kZWWZnJwc88c//tE0bdrUpKSkmAMHDpj58+cbp9Np1q5da4wxprCw0IwbN85s3brVfP3112bRokXG19fXvP3229Z2BwwYYPz9/U1CQoLZvXu32bVrV4X0f6nnq3PnzubJJ5/0qL/33nvNgAEDrPv169c3gYGB5pVXXjH79+83+/fvt37G9erVM++8847Zu3eveeSRR0xAQID57rvvjDHmoq/nF154wbRs2dIYY0xRUZFxu91m1KhR5quvvjJ79+41CxYssF7bKSkpJjAw0CxYsMAcOHDArFy50jRo0MAkJyeXy3PzzjvvmHfffdfs27fP7Nixw/Tq1cvExsaa4uJiW/vxhg0bTI0aNcwrr7xiMjIyzGuvvWaCgoKM2+22HsPOHCSZkJAQ88Ybb5gDBw6YgwcPlsv8zrrUPjBgwAATGBhoHn30UZOenm4+/PBD4+vra+bOnWut/8Ybb5jly5ebAwcOmE2bNpn27dubu+66y1p+9mfdvn17s2HDBrN9+3bTuHFj07lzZ9OjRw+zfft285///McEBwebP//5z9Z6l3sdXYnHHnvM1KtXz6xcudLs3LnTxMfHG39/f2t/P/c945///KcJDAw0y5cvN998843ZsmWLx/y7detmWrVqZTZv3mxSU1NN586djcvlstY/u6/s2LHDWic3N9dIMp9++qkxxpjHH3/ctGrVymzbts1kZmaaVatWmQ8++MAYY8zWrVuNJLN69WqTlZVlvVdcCULTVdS5c2dz6623eozdfPPN5plnnjFr1qwxgYGB5scff/RYfsMNN5g5c+YYYy4fms4+xvm/rM++8N5///3L9tisWTMzc+ZM635VCU0nT540NWvWNIsXL7aWFRYWmoiICDN58mRjzE+/wLy8vMyWLVus5XXr1jULFiy4Kr2mpqYaSbZ+MZ85c8YEBASYDz/80BhjzL///W/j5eVlDh8+bNWsWLHigqHpb3/7m1WzZ88eI8mkp6cbY4xJSEgwQ4YM8Xis9evXmxo1apjTp0+bd9991wQGBlphraz9l5dp06aZ+vXrG2OMOXnypKlVq5bZuHGjR82gQYPMAw88cNFtDBs2zPzmN7+x7g8YMMCEhoaagoKCCun5rEs9X3ZDU58+fTxqzv6Mz30DLCoqMvXq1TMvv/yyMebir+dzQ9P3339vJF30TfK2224zEydO9BhbuHChCQ8Pv+ScyyonJ8dIMrt27bK1H99///3mnnvu8djGgw8+6BGa7MxBkklKSqqAGf3kUvvAgAEDTP369c2ZM2essb59+5r777//ots7+yZ/9h8qZ3/Wq1evtmomTZpkJJkDBw5YY0OHDjU9e/Y0xpT9dWTHiRMnjI+Pj1m6dKk19v333xuXy3XB0DRlyhTTpEkTU1hYWGpb6enpRpLZtm2bNbZ//34j6WeFpl69epk//OEPF+z3QutfKT6eu8puvPFGj/vh4eHKyclRamqqTp48qeDgYOvcBH9/f2VmZpbbR0tt27b1uH/q1CmNHj1azZo103XXXSd/f399+eWXOnToULk8Xnk6cOCAioqK1LFjR2usZs2auuWWW5Seni7pp+fynnvu0d///ndJ0kcffaQff/xRffv2vSo9tmzZUl27dlVsbKz69u2refPmKTc3V5KUk5OjRx99VE2aNJHb7Zbb7dbJkyet5zo9PV3XX3+96tWrZ20vLi7ugo9z7j4UHh5ubV+SUlNTtWDBAo99qGfPniopKVFmZqa6d++u+vXrq1GjRkpISNDixYutw/qX6v9q2Lt3r3788Ud1797do/9//OMfHq+B119/XW3btlXdunXl7++vefPmldpnY2NjK/w8pvJ4vs5/TZ517s/e29tbbdu2tfbzy60rSUFBQRo4cKB69uypXr16acaMGcrKyrKWp6amavz48R7P8+DBg5WVleXxMU9ZHThwQP3791ejRo0UGBhofUxy7s/pUvtxRkaGbrnlFo9tnn/f7hwu9TxdqcvtA82bN5eXl5d1/+zv+7N27Nihe++9V/Xr11dAQID18dH5+/O5z1VoaKh8fX3VqFEjj7Gz27X7OiqLAwcOqLCw0GP/DAoKUnR09AXr+/btq9OnT6tRo0YaPHiwli1bZn18mpGRIW9vb910001WfePGjVW7du2f1dNjjz2mpUuXqlWrVho9erQ2btxYhpnZR2i6ys4/CdHhcKikpEQlJSUKDw9XWlqaxy0jI0NPP/20JKlGjRqlvkl27ufjl3P+Nxqefvppvfvuu5owYYLWr1+vtLQ0xcbGqrCwsIyzqzhn5+1wOEqNnzv2yCOPaOnSpTp9+rTmz5+v+++/X76+vlelRy8vL61atUorVqxQs2bNNHPmTEVHRyszM1MDBw5Uamqqpk+fro0bNyotLU3BwcHWc33+z1UqPdezzt2HztaUlJRY/x06dKjHPvTFF19o//79uuGGGxQQEKDt27frrbfeUnh4uMaNG6eWLVvq+PHjl+z/ajg7h48//tij/71791rnY/y///f/9NRTT+nhhx/WypUrlZaWpj/84Q+l9tmr8e2dSz1fdl+rP6fP8/eHy607f/58bdq0SR06dNDbb7+tJk2aaPPmzZJ+eq5ffPFFj+d5165d2r9/v2rVqmW7p4vp1auXvv/+e82bN09btmzRli1bJMnj53Sp/fj81/XZsXPZnUNF7guXe81c7Pe99NM/Wnv06CF/f38tWrRI27Zt07JlyySp1P58/nN1qe3aeR2V1YV+T11KZGSkMjIy9Nprr8nlcmnYsGHq1KmTioqKLrqtc8dr1KhRauz819Fdd92lb775RklJSTp69Ki6du2qUaNG/aw+fw5CUxVx0003KTs7W97e3mrcuLHH7ezXsOvWrevxr0VJpa5r4+Pjo+LiYluPuX79eg0cOFD33XefYmNjFRYWpoMHD5bHdMpd48aN5ePjow0bNlhjRUVF+vzzzxUTE2ON3X333fLz89Ps2bO1YsUKPfzww1e1T4fDoY4dO+rFF1/Ujh075OPjo2XLlmn9+vVKTEzU3XffrebNm8vpdOq7776z1mvWrJkOHTqko0ePWmPnnxhrx0033aQ9e/aU2ofOPn/ST0cuunXrpsmTJ2vnzp06ePCgPvnkk0v2fzU0a9ZMTqdThw4dKtV7ZGSkpJ/22Q4dOmjYsGFq3bq1GjdufNVO8r+Qiz1f579Wi4uLtXv3btvbPRtuJOnMmTNKTU1V06ZNf3Z/rVu31pgxY7Rx40a1aNFCS5YskfTTfpKRkXHB/eTsG1VZff/990pPT9dzzz2nrl27KiYm5mcfgWvatKm2bt3qMfb555973K/IOfwcZX3NfPnll/ruu+/05z//WbfddpuaNm3qcRSqrOy8jsqqcePGqlmzpsf+mZubq3379l10HZfLpd69e+svf/mL1q5dq02bNmnXrl1q2rSpzpw5ox07dli1X331lY4fP27dP/tNvHNfSxe6llvdunU1cOBALVq0SNOnT9fcuXMlyfqdZ/c90Y6q/53zX4hu3bopLi5Offr00csvv6zo6GgdPXpUy5cvV58+fdS2bVvdcccdeuWVV/SPf/xDcXFxWrRokXbv3q3WrVtb22nQoIG2bNmigwcPyt/fX0FBQRd9zMaNG+u9995Tr1695HA49Pzzz1v/Sqlq/Pz89Nhjj+npp59WUFCQrr/+ek2ePFk//PCDBg0aZNV5eXlp4MCBGjNmjBo3bnzRj7gqwpYtW7RmzRr16NFDISEh2rJli44dO6aYmBg1btxYCxcuVNu2bZWfn6+nn35aLpfLWrdbt26Kjo7WQw89pClTpig/P19jx4792T0888wzat++vR5//HENHjxYfn5+Sk9P16pVqzRz5kx99NFH+vrrr9WpUyfVrl1by5cvV0lJiaKjoy/Z/9UQEBCgUaNG6amnnlJJSYluvfVW5efna+PGjfL399eAAQPUuHFj/eMf/9C///1vNWzYUAsXLtS2bdusj3+upks9X35+fhoxYoQ+/vhj3XDDDZo2bZrHm8HlvPbaa4qKilJMTIymTZum3Nzcn/UPgMzMTM2dO1e9e/dWRESEMjIytG/fPj300EOSpHHjxik+Pl6RkZHq27evatSooZ07d2rXrl1X/A2z2rVrKzg4WHPnzlV4eLgOHTqkZ5999mdtY/jw4erUqZOmTp2qXr166ZNPPtGKFSs8jj5V5BzsutQ+sHPnzkuue/3118vHx0czZ87Uo48+qt27d+tPf/rTFfdk53VUVv7+/ho0aJCefvppBQcHKzQ0VGPHjr1oSF2wYIGKi4vVrl07+fr6auHChXK5XKpfv771TcMhQ4Zo9uzZqlmzpkaOHCmXy2X9nF0ul9q3b68///nPatCggb777js999xzHo8xbtw4tWnTRs2bN1dBQYE++ugj63dWSEiIXC6XUlJSVK9ePdWqVevKL2NTbmdH4bIud3Jofn6+GT58uImIiDA1a9Y0kZGR5sEHHzSHDh2y6seNG2dCQ0ON2+02Tz31lHniiSc8TgTPyMgw7du3Ny6Xy0gymZmZ1smEubm5Ho+dmZlpbr/9duNyuUxkZKSZNWtWqR6ryongxhhz+vRpM3z4cFOnTh3jdDpNx44dzdatW0utc+DAASPJOkH8atm7d6/p2bOnqVu3rnE6naZJkybWSfXbt283bdu2NU6n00RFRZl//vOfpZ7bjIwMc+uttxofHx/TpEkTk5KScsETwS91UqQxP51M2r17d+Pv72/8/PzMjTfeaCZMmGCM+emk8M6dO5vatWsbl8tlbrzxRuubZ5fqv6KceyK4McaUlJSYGTNmmOjoaFOzZk1Tt25d07NnT7Nu3TpjjDE//vijGThwoHG73ea6664zjz32mHn22WetE6CNufAXJirCpZ6vwsJC89hjj5mgoCATEhJiJk2adMETwc9/bZ39GS9ZssS0a9fO+Pj4mJiYGLNmzRqr5mKv53NPBM/OzjZ9+vQx4eHhxsfHx9SvX9+MGzfOFBcXW/UpKSmmQ4cOxuVymcDAQHPLLbd4fLPpSqxatcrExMQYp9NpbrzxRrN27VprX7a7H8+dO9f86le/Mi6Xy/Tp08e89NJLJiwszONxLjeHc18/FeFS+4CdL+4sWbLENGjQwDidThMXF2c++OADj+fmQj/r+fPne5wQb4znz96Yy7+OrsSJEyfM73//e+Pr62tCQ0PN5MmTPd43zt2vly1bZtq1a2cCAwONn5+fad++vcdJ7UePHjV33XWXcTqdpn79+mbJkiUmJCTEvP7661bN3r17rfe0Vq1amZUrV3rsK3/6059MTEyMcblcJigoyNx7773m66+/ttafN2+eiYyMNDVq1PB47svKYQyXW0bV9cADD8jLy0uLFi2yvc5nn32mLl266MiRIwoNDa3A7oDyVRFXML5WDB48WF9++aXWr19f2a2gghw5ckSRkZFavXq1unbtWtntXBAfz6FKOnPmjPbt26dNmzZp6NChttYpKCjQ4cOH9fzzz6tfv34EJqAae/XVV9W9e3f5+flpxYoVevPNN/XXv/61sttCOfrkk0908uRJxcbGKisrS6NHj1aDBg3UqVOnym7tojgRHFXS7t271bZtWzVv3lyPPvqorXXeeustRUdHKy8vT5MnT67gDgFUpK1bt6p79+6KjY3V66+/rr/85S965JFHKrstlKOioiL98Y9/VPPmzXXfffepbt26Wrt2bZX+Uzd8PAcAAGADR5oAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAJSD5ORkLkgJXOMITQCqrYEDB8rhcJS63XnnnRX6uA6HQ++//77H2KhRo7RmzZoKfVwAlYsrggOo1u68807Nnz/fY8zpdF71Pvz9/eXv73/VHxfA1cORJgDVmtPpVFhYmMetdu3akn46IjRnzhzFx8fL19dXMTEx2rRpk7766it16dJFfn5+iouL04EDBzy2OXv2bN1www3y8fFRdHS0Fi5caC1r0KCBJOm+++6Tw+Gw7p//8VxJSYnGjx+vevXqyel0qlWrVkpJSbGWHzx4UA6HQ++9955uv/12+fr6qmXLltq0aVPFPFEArhihCcA17U9/+pMeeughpaWlqWnTpurfv7+GDh2qMWPG6PPPP5ckPfHEE1b9smXL9OSTT2rkyJHavXu3hg4dqj/84Q/69NNPJUnbtm2TJM2fP19ZWVnW/fPNmDFDU6ZM0auvvqqdO3eqZ8+e6t27t/bv3+9RN3bsWI0aNUppaWlq0qSJHnjgAZ05c6YingoAV8oAQDU1YMAA4+XlZfz8/Dxu48ePN8YYI8k899xzVv2mTZuMJPPGG29YY2+99ZapVauWdb9Dhw5m8ODBHo/Tt29fc/fdd1v3JZlly5Z51LzwwgumZcuW1v2IiAgzYcIEj5qbb77ZDBs2zBhjTGZmppFk/va3v1nL9+zZYySZ9PT0n/lMALgaOKcJQLV2++23a/bs2R5jQUFB1v/feOON1v+HhoZKkmJjYz3GfvzxR+Xn5yswMFDp6ekaMmSIx/Y6duyoGTNm2O4pPz9fR48eVceOHUtt54svvvAYO7e/8PBwSVJOTo6aNm1q+/EAXB2EJgDVmp+fnxo3bnzR5ef+xXSHw3HRsZKSklJjZxljSo3ZYWc7l+sFQNXBOU0AcI6YmBht2LDBY2zjxo2KiYmx7tesWVPFxcUX3UZgYKAiIiIuux0A1QtHmgBUawUFBcrOzvYY8/b2Vp06dcq0vaefflr9+vXTTTfdpK5du+rDDz/Ue++9p9WrV1s1DRo00Jo1a9SxY0c5nU7r23rnb+eFF17QDTfcoFatWmn+/PlKS0vT4sWLy9QXgMpHaAJQraWkpFjnAp0VHR2tL7/8skzb69Onj2bMmKFXXnlFiYmJatiwoebPn68uXbpYNVOmTNGIESM0b948/epXv9LBgwdLbScxMVH5+fkaOXKkcnJy1KxZM33wwQeKiooqU18AKp/DGGMquwkAAICqjnOaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALDh/wNHGcb1uChEfwAAAABJRU5ErkJggg==\n",
|
| 178 |
+
"text/plain": [
|
| 179 |
+
"<Figure size 640x480 with 1 Axes>"
|
| 180 |
+
]
|
| 181 |
+
},
|
| 182 |
+
"metadata": {},
|
| 183 |
+
"output_type": "display_data"
|
| 184 |
+
}
|
| 185 |
+
],
|
| 186 |
+
"source": [
|
| 187 |
+
"# Plot\n",
|
| 188 |
+
"sns.countplot(x='Emotion',data=df)"
|
| 189 |
+
]
|
| 190 |
+
},
|
| 191 |
+
{
|
| 192 |
+
"cell_type": "code",
|
| 193 |
+
"execution_count": 6,
|
| 194 |
+
"id": "40f991d0-952f-40c1-bf00-f3476ce0436d",
|
| 195 |
+
"metadata": {
|
| 196 |
+
"jupyter": {
|
| 197 |
+
"outputs_hidden": true
|
| 198 |
+
},
|
| 199 |
+
"scrolled": false,
|
| 200 |
+
"tags": []
|
| 201 |
+
},
|
| 202 |
+
"outputs": [
|
| 203 |
+
{
|
| 204 |
+
"data": {
|
| 205 |
+
"text/plain": [
|
| 206 |
+
"['BTC_ADDRESS_REGEX',\n",
|
| 207 |
+
" 'CURRENCY_REGEX',\n",
|
| 208 |
+
" 'CURRENCY_SYMB_REGEX',\n",
|
| 209 |
+
" 'Counter',\n",
|
| 210 |
+
" 'DATE_REGEX',\n",
|
| 211 |
+
" 'EMAIL_REGEX',\n",
|
| 212 |
+
" 'EMOJI_REGEX',\n",
|
| 213 |
+
" 'HASTAG_REGEX',\n",
|
| 214 |
+
" 'MASTERCard_REGEX',\n",
|
| 215 |
+
" 'MD5_SHA_REGEX',\n",
|
| 216 |
+
" 'MOST_COMMON_PUNCT_REGEX',\n",
|
| 217 |
+
" 'NUMBERS_REGEX',\n",
|
| 218 |
+
" 'PHONE_REGEX',\n",
|
| 219 |
+
" 'PoBOX_REGEX',\n",
|
| 220 |
+
" 'SPECIAL_CHARACTERS_REGEX',\n",
|
| 221 |
+
" 'STOPWORDS',\n",
|
| 222 |
+
" 'STOPWORDS_de',\n",
|
| 223 |
+
" 'STOPWORDS_en',\n",
|
| 224 |
+
" 'STOPWORDS_es',\n",
|
| 225 |
+
" 'STOPWORDS_fr',\n",
|
| 226 |
+
" 'STOPWORDS_ru',\n",
|
| 227 |
+
" 'STOPWORDS_yo',\n",
|
| 228 |
+
" 'STREET_ADDRESS_REGEX',\n",
|
| 229 |
+
" 'TextFrame',\n",
|
| 230 |
+
" 'URL_PATTERN',\n",
|
| 231 |
+
" 'USER_HANDLES_REGEX',\n",
|
| 232 |
+
" 'VISACard_REGEX',\n",
|
| 233 |
+
" '__builtins__',\n",
|
| 234 |
+
" '__cached__',\n",
|
| 235 |
+
" '__doc__',\n",
|
| 236 |
+
" '__file__',\n",
|
| 237 |
+
" '__generate_text',\n",
|
| 238 |
+
" '__loader__',\n",
|
| 239 |
+
" '__name__',\n",
|
| 240 |
+
" '__numbers_dict',\n",
|
| 241 |
+
" '__package__',\n",
|
| 242 |
+
" '__spec__',\n",
|
| 243 |
+
" '_lex_richness_herdan',\n",
|
| 244 |
+
" '_lex_richness_maas_ttr',\n",
|
| 245 |
+
" 'clean_text',\n",
|
| 246 |
+
" 'defaultdict',\n",
|
| 247 |
+
" 'digit2words',\n",
|
| 248 |
+
" 'extract_btc_address',\n",
|
| 249 |
+
" 'extract_currencies',\n",
|
| 250 |
+
" 'extract_currency_symbols',\n",
|
| 251 |
+
" 'extract_dates',\n",
|
| 252 |
+
" 'extract_emails',\n",
|
| 253 |
+
" 'extract_emojis',\n",
|
| 254 |
+
" 'extract_hashtags',\n",
|
| 255 |
+
" 'extract_html_tags',\n",
|
| 256 |
+
" 'extract_mastercard_addr',\n",
|
| 257 |
+
" 'extract_md5sha',\n",
|
| 258 |
+
" 'extract_numbers',\n",
|
| 259 |
+
" 'extract_pattern',\n",
|
| 260 |
+
" 'extract_phone_numbers',\n",
|
| 261 |
+
" 'extract_postoffice_box',\n",
|
| 262 |
+
" 'extract_shortwords',\n",
|
| 263 |
+
" 'extract_special_characters',\n",
|
| 264 |
+
" 'extract_stopwords',\n",
|
| 265 |
+
" 'extract_street_address',\n",
|
| 266 |
+
" 'extract_terms_in_bracket',\n",
|
| 267 |
+
" 'extract_urls',\n",
|
| 268 |
+
" 'extract_userhandles',\n",
|
| 269 |
+
" 'extract_visacard_addr',\n",
|
| 270 |
+
" 'fix_contractions',\n",
|
| 271 |
+
" 'generate_sentence',\n",
|
| 272 |
+
" 'hamming_distance',\n",
|
| 273 |
+
" 'inverse_df',\n",
|
| 274 |
+
" 'lexical_richness',\n",
|
| 275 |
+
" 'markov_chain',\n",
|
| 276 |
+
" 'math',\n",
|
| 277 |
+
" 'nlargest',\n",
|
| 278 |
+
" 'normalize',\n",
|
| 279 |
+
" 'num2words',\n",
|
| 280 |
+
" 'random',\n",
|
| 281 |
+
" 're',\n",
|
| 282 |
+
" 'read_txt',\n",
|
| 283 |
+
" 'remove_accents',\n",
|
| 284 |
+
" 'remove_bad_quotes',\n",
|
| 285 |
+
" 'remove_btc_address',\n",
|
| 286 |
+
" 'remove_currencies',\n",
|
| 287 |
+
" 'remove_currency_symbols',\n",
|
| 288 |
+
" 'remove_custom_pattern',\n",
|
| 289 |
+
" 'remove_custom_words',\n",
|
| 290 |
+
" 'remove_dates',\n",
|
| 291 |
+
" 'remove_emails',\n",
|
| 292 |
+
" 'remove_emojis',\n",
|
| 293 |
+
" 'remove_hashtags',\n",
|
| 294 |
+
" 'remove_html_tags',\n",
|
| 295 |
+
" 'remove_mastercard_addr',\n",
|
| 296 |
+
" 'remove_md5sha',\n",
|
| 297 |
+
" 'remove_multiple_spaces',\n",
|
| 298 |
+
" 'remove_non_ascii',\n",
|
| 299 |
+
" 'remove_numbers',\n",
|
| 300 |
+
" 'remove_phone_numbers',\n",
|
| 301 |
+
" 'remove_postoffice_box',\n",
|
| 302 |
+
" 'remove_puncts',\n",
|
| 303 |
+
" 'remove_punctuations',\n",
|
| 304 |
+
" 'remove_shortwords',\n",
|
| 305 |
+
" 'remove_special_characters',\n",
|
| 306 |
+
" 'remove_stopwords',\n",
|
| 307 |
+
" 'remove_street_address',\n",
|
| 308 |
+
" 'remove_terms_in_bracket',\n",
|
| 309 |
+
" 'remove_urls',\n",
|
| 310 |
+
" 'remove_userhandles',\n",
|
| 311 |
+
" 'remove_visacard_addr',\n",
|
| 312 |
+
" 'replace_bad_quotes',\n",
|
| 313 |
+
" 'replace_currencies',\n",
|
| 314 |
+
" 'replace_currency_symbols',\n",
|
| 315 |
+
" 'replace_dates',\n",
|
| 316 |
+
" 'replace_emails',\n",
|
| 317 |
+
" 'replace_emojis',\n",
|
| 318 |
+
" 'replace_numbers',\n",
|
| 319 |
+
" 'replace_phone_numbers',\n",
|
| 320 |
+
" 'replace_special_characters',\n",
|
| 321 |
+
" 'replace_term',\n",
|
| 322 |
+
" 'replace_urls',\n",
|
| 323 |
+
" 'string',\n",
|
| 324 |
+
" 'term_freq',\n",
|
| 325 |
+
" 'to_txt',\n",
|
| 326 |
+
" 'unicodedata',\n",
|
| 327 |
+
" 'word_freq',\n",
|
| 328 |
+
" 'word_length_freq']"
|
| 329 |
+
]
|
| 330 |
+
},
|
| 331 |
+
"execution_count": 6,
|
| 332 |
+
"metadata": {},
|
| 333 |
+
"output_type": "execute_result"
|
| 334 |
+
}
|
| 335 |
+
],
|
| 336 |
+
"source": [
|
| 337 |
+
"# Data Cleaning\n",
|
| 338 |
+
"dir(nfx)"
|
| 339 |
+
]
|
| 340 |
+
},
|
| 341 |
+
{
|
| 342 |
+
"cell_type": "code",
|
| 343 |
+
"execution_count": 7,
|
| 344 |
+
"id": "b1f87847-a91c-4bd6-a307-d746eb5aa9a0",
|
| 345 |
+
"metadata": {},
|
| 346 |
+
"outputs": [],
|
| 347 |
+
"source": [
|
| 348 |
+
"# User handles\n",
|
| 349 |
+
"df['Clean_Text'] = df['Text'].apply(nfx.remove_userhandles)"
|
| 350 |
+
]
|
| 351 |
+
},
|
| 352 |
+
{
|
| 353 |
+
"cell_type": "code",
|
| 354 |
+
"execution_count": 8,
|
| 355 |
+
"id": "03886bc3-1ac4-4f1b-842b-e5d2d770ff81",
|
| 356 |
+
"metadata": {},
|
| 357 |
+
"outputs": [],
|
| 358 |
+
"source": [
|
| 359 |
+
"# Stopwords\n",
|
| 360 |
+
"df['Clean_Text'] = df['Clean_Text'].apply(nfx.remove_stopwords)"
|
| 361 |
+
]
|
| 362 |
+
},
|
| 363 |
+
{
|
| 364 |
+
"cell_type": "markdown",
|
| 365 |
+
"id": "0ffcf4c7",
|
| 366 |
+
"metadata": {},
|
| 367 |
+
"source": [
|
| 368 |
+
"## We are not removing Special Characters as some of the rows have just Special Characters and it'll result into empty row."
|
| 369 |
+
]
|
| 370 |
+
},
|
| 371 |
+
{
|
| 372 |
+
"cell_type": "code",
|
| 373 |
+
"execution_count": 9,
|
| 374 |
+
"id": "0a0fcc0c-4adf-4f0b-b226-164659ad70ba",
|
| 375 |
+
"metadata": {
|
| 376 |
+
"jupyter": {
|
| 377 |
+
"outputs_hidden": true
|
| 378 |
+
},
|
| 379 |
+
"tags": []
|
| 380 |
+
},
|
| 381 |
+
"outputs": [
|
| 382 |
+
{
|
| 383 |
+
"data": {
|
| 384 |
+
"text/html": [
|
| 385 |
+
"<div>\n",
|
| 386 |
+
"<style scoped>\n",
|
| 387 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 388 |
+
" vertical-align: middle;\n",
|
| 389 |
+
" }\n",
|
| 390 |
+
"\n",
|
| 391 |
+
" .dataframe tbody tr th {\n",
|
| 392 |
+
" vertical-align: top;\n",
|
| 393 |
+
" }\n",
|
| 394 |
+
"\n",
|
| 395 |
+
" .dataframe thead th {\n",
|
| 396 |
+
" text-align: right;\n",
|
| 397 |
+
" }\n",
|
| 398 |
+
"</style>\n",
|
| 399 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 400 |
+
" <thead>\n",
|
| 401 |
+
" <tr style=\"text-align: right;\">\n",
|
| 402 |
+
" <th></th>\n",
|
| 403 |
+
" <th>Emotion</th>\n",
|
| 404 |
+
" <th>Text</th>\n",
|
| 405 |
+
" <th>Clean_Text</th>\n",
|
| 406 |
+
" </tr>\n",
|
| 407 |
+
" </thead>\n",
|
| 408 |
+
" <tbody>\n",
|
| 409 |
+
" <tr>\n",
|
| 410 |
+
" <th>0</th>\n",
|
| 411 |
+
" <td>neutral</td>\n",
|
| 412 |
+
" <td>Why ?</td>\n",
|
| 413 |
+
" <td>?</td>\n",
|
| 414 |
+
" </tr>\n",
|
| 415 |
+
" <tr>\n",
|
| 416 |
+
" <th>1</th>\n",
|
| 417 |
+
" <td>joy</td>\n",
|
| 418 |
+
" <td>Sage Act upgrade on my to do list for tommorow.</td>\n",
|
| 419 |
+
" <td>Sage Act upgrade list tommorow.</td>\n",
|
| 420 |
+
" </tr>\n",
|
| 421 |
+
" <tr>\n",
|
| 422 |
+
" <th>2</th>\n",
|
| 423 |
+
" <td>sadness</td>\n",
|
| 424 |
+
" <td>ON THE WAY TO MY HOMEGIRL BABY FUNERAL!!! MAN ...</td>\n",
|
| 425 |
+
" <td>WAY HOMEGIRL BABY FUNERAL!!! MAN HATE FUNERALS...</td>\n",
|
| 426 |
+
" </tr>\n",
|
| 427 |
+
" <tr>\n",
|
| 428 |
+
" <th>3</th>\n",
|
| 429 |
+
" <td>joy</td>\n",
|
| 430 |
+
" <td>Such an eye ! The true hazel eye-and so brill...</td>\n",
|
| 431 |
+
" <td>eye ! true hazel eye-and brilliant ! Regular f...</td>\n",
|
| 432 |
+
" </tr>\n",
|
| 433 |
+
" <tr>\n",
|
| 434 |
+
" <th>4</th>\n",
|
| 435 |
+
" <td>joy</td>\n",
|
| 436 |
+
" <td>@Iluvmiasantos ugh babe.. hugggzzz for u .! b...</td>\n",
|
| 437 |
+
" <td>ugh babe.. hugggzzz u .! babe naamazed nga ako...</td>\n",
|
| 438 |
+
" </tr>\n",
|
| 439 |
+
" <tr>\n",
|
| 440 |
+
" <th>...</th>\n",
|
| 441 |
+
" <td>...</td>\n",
|
| 442 |
+
" <td>...</td>\n",
|
| 443 |
+
" <td>...</td>\n",
|
| 444 |
+
" </tr>\n",
|
| 445 |
+
" <tr>\n",
|
| 446 |
+
" <th>34787</th>\n",
|
| 447 |
+
" <td>surprise</td>\n",
|
| 448 |
+
" <td>@MichelGW have you gift! Hope you like it! It'...</td>\n",
|
| 449 |
+
" <td>gift! Hope like it! hand wear ! It'll warm! Lol</td>\n",
|
| 450 |
+
" </tr>\n",
|
| 451 |
+
" <tr>\n",
|
| 452 |
+
" <th>34788</th>\n",
|
| 453 |
+
" <td>joy</td>\n",
|
| 454 |
+
" <td>The world didnt give it to me..so the world MO...</td>\n",
|
| 455 |
+
" <td>world didnt me..so world DEFINITELY cnt away!!!</td>\n",
|
| 456 |
+
" </tr>\n",
|
| 457 |
+
" <tr>\n",
|
| 458 |
+
" <th>34789</th>\n",
|
| 459 |
+
" <td>anger</td>\n",
|
| 460 |
+
" <td>A man robbed me today .</td>\n",
|
| 461 |
+
" <td>man robbed today .</td>\n",
|
| 462 |
+
" </tr>\n",
|
| 463 |
+
" <tr>\n",
|
| 464 |
+
" <th>34790</th>\n",
|
| 465 |
+
" <td>fear</td>\n",
|
| 466 |
+
" <td>Youu call it JEALOUSY, I call it of #Losing YO...</td>\n",
|
| 467 |
+
" <td>Youu JEALOUSY, #Losing YOU...</td>\n",
|
| 468 |
+
" </tr>\n",
|
| 469 |
+
" <tr>\n",
|
| 470 |
+
" <th>34791</th>\n",
|
| 471 |
+
" <td>sadness</td>\n",
|
| 472 |
+
" <td>I think about you baby, and I dream about you ...</td>\n",
|
| 473 |
+
" <td>think baby, dream time</td>\n",
|
| 474 |
+
" </tr>\n",
|
| 475 |
+
" </tbody>\n",
|
| 476 |
+
"</table>\n",
|
| 477 |
+
"<p>34792 rows × 3 columns</p>\n",
|
| 478 |
+
"</div>"
|
| 479 |
+
],
|
| 480 |
+
"text/plain": [
|
| 481 |
+
" Emotion Text \\\n",
|
| 482 |
+
"0 neutral Why ? \n",
|
| 483 |
+
"1 joy Sage Act upgrade on my to do list for tommorow. \n",
|
| 484 |
+
"2 sadness ON THE WAY TO MY HOMEGIRL BABY FUNERAL!!! MAN ... \n",
|
| 485 |
+
"3 joy Such an eye ! The true hazel eye-and so brill... \n",
|
| 486 |
+
"4 joy @Iluvmiasantos ugh babe.. hugggzzz for u .! b... \n",
|
| 487 |
+
"... ... ... \n",
|
| 488 |
+
"34787 surprise @MichelGW have you gift! Hope you like it! It'... \n",
|
| 489 |
+
"34788 joy The world didnt give it to me..so the world MO... \n",
|
| 490 |
+
"34789 anger A man robbed me today . \n",
|
| 491 |
+
"34790 fear Youu call it JEALOUSY, I call it of #Losing YO... \n",
|
| 492 |
+
"34791 sadness I think about you baby, and I dream about you ... \n",
|
| 493 |
+
"\n",
|
| 494 |
+
" Clean_Text \n",
|
| 495 |
+
"0 ? \n",
|
| 496 |
+
"1 Sage Act upgrade list tommorow. \n",
|
| 497 |
+
"2 WAY HOMEGIRL BABY FUNERAL!!! MAN HATE FUNERALS... \n",
|
| 498 |
+
"3 eye ! true hazel eye-and brilliant ! Regular f... \n",
|
| 499 |
+
"4 ugh babe.. hugggzzz u .! babe naamazed nga ako... \n",
|
| 500 |
+
"... ... \n",
|
| 501 |
+
"34787 gift! Hope like it! hand wear ! It'll warm! Lol \n",
|
| 502 |
+
"34788 world didnt me..so world DEFINITELY cnt away!!! \n",
|
| 503 |
+
"34789 man robbed today . \n",
|
| 504 |
+
"34790 Youu JEALOUSY, #Losing YOU... \n",
|
| 505 |
+
"34791 think baby, dream time \n",
|
| 506 |
+
"\n",
|
| 507 |
+
"[34792 rows x 3 columns]"
|
| 508 |
+
]
|
| 509 |
+
},
|
| 510 |
+
"execution_count": 9,
|
| 511 |
+
"metadata": {},
|
| 512 |
+
"output_type": "execute_result"
|
| 513 |
+
}
|
| 514 |
+
],
|
| 515 |
+
"source": [
|
| 516 |
+
"df"
|
| 517 |
+
]
|
| 518 |
+
},
|
| 519 |
+
{
|
| 520 |
+
"cell_type": "code",
|
| 521 |
+
"execution_count": 10,
|
| 522 |
+
"id": "450c39c0-79dd-4eaf-85fe-57e344eb81bd",
|
| 523 |
+
"metadata": {},
|
| 524 |
+
"outputs": [],
|
| 525 |
+
"source": [
|
| 526 |
+
"# Features & Labels\n",
|
| 527 |
+
"Xfeatures = df['Clean_Text']\n",
|
| 528 |
+
"ylabels = df['Emotion']"
|
| 529 |
+
]
|
| 530 |
+
},
|
| 531 |
+
{
|
| 532 |
+
"cell_type": "markdown",
|
| 533 |
+
"id": "edde3d4b",
|
| 534 |
+
"metadata": {},
|
| 535 |
+
"source": [
|
| 536 |
+
"# It is advisable to split before applying pipelines because it prevents data leakage."
|
| 537 |
+
]
|
| 538 |
+
},
|
| 539 |
+
{
|
| 540 |
+
"cell_type": "code",
|
| 541 |
+
"execution_count": 11,
|
| 542 |
+
"id": "27d7f976-c28f-449e-ae1a-53a42bbda4e8",
|
| 543 |
+
"metadata": {},
|
| 544 |
+
"outputs": [],
|
| 545 |
+
"source": [
|
| 546 |
+
"# Split Data\n",
|
| 547 |
+
"x_train,x_test,y_train,y_test = train_test_split(Xfeatures,ylabels,test_size=0.3,random_state=42)"
|
| 548 |
+
]
|
| 549 |
+
},
|
| 550 |
+
{
|
| 551 |
+
"cell_type": "code",
|
| 552 |
+
"execution_count": 12,
|
| 553 |
+
"id": "2f086f29-dba9-40d2-a9dd-f06a6cca3a4c",
|
| 554 |
+
"metadata": {},
|
| 555 |
+
"outputs": [],
|
| 556 |
+
"source": [
|
| 557 |
+
"# Build Pipeline\n",
|
| 558 |
+
"from sklearn.pipeline import Pipeline"
|
| 559 |
+
]
|
| 560 |
+
},
|
| 561 |
+
{
|
| 562 |
+
"cell_type": "code",
|
| 563 |
+
"execution_count": 13,
|
| 564 |
+
"id": "6b81cc86-2bef-40c2-b9a3-668caaadaff0",
|
| 565 |
+
"metadata": {},
|
| 566 |
+
"outputs": [],
|
| 567 |
+
"source": [
|
| 568 |
+
"# LogisticRegression Pipeline\n",
|
| 569 |
+
"pipe_lr = Pipeline(steps=[('cv',CountVectorizer()),('lr',LogisticRegression())])"
|
| 570 |
+
]
|
| 571 |
+
},
|
| 572 |
+
{
|
| 573 |
+
"cell_type": "code",
|
| 574 |
+
"execution_count": 14,
|
| 575 |
+
"id": "dc64b9a7-efe2-4bc4-a0e7-46dff1d52b31",
|
| 576 |
+
"metadata": {
|
| 577 |
+
"jupyter": {
|
| 578 |
+
"outputs_hidden": true
|
| 579 |
+
},
|
| 580 |
+
"scrolled": false,
|
| 581 |
+
"tags": []
|
| 582 |
+
},
|
| 583 |
+
"outputs": [
|
| 584 |
+
{
|
| 585 |
+
"name": "stderr",
|
| 586 |
+
"output_type": "stream",
|
| 587 |
+
"text": [
|
| 588 |
+
"C:\\Users\\Sanket\\anaconda3\\envs\\nlp\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
|
| 589 |
+
"STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
|
| 590 |
+
"\n",
|
| 591 |
+
"Increase the number of iterations (max_iter) or scale the data as shown in:\n",
|
| 592 |
+
" https://scikit-learn.org/stable/modules/preprocessing.html\n",
|
| 593 |
+
"Please also refer to the documentation for alternative solver options:\n",
|
| 594 |
+
" https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
|
| 595 |
+
" n_iter_i = _check_optimize_result(\n"
|
| 596 |
+
]
|
| 597 |
+
},
|
| 598 |
+
{
|
| 599 |
+
"data": {
|
| 600 |
+
"text/html": [
|
| 601 |
+
"<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[('cv', CountVectorizer()), ('lr', LogisticRegression())])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" ><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[('cv', CountVectorizer()), ('lr', LogisticRegression())])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" ><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">CountVectorizer</label><div class=\"sk-toggleable__content\"><pre>CountVectorizer()</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LogisticRegression</label><div class=\"sk-toggleable__content\"><pre>LogisticRegression()</pre></div></div></div></div></div></div></div>"
|
| 602 |
+
],
|
| 603 |
+
"text/plain": [
|
| 604 |
+
"Pipeline(steps=[('cv', CountVectorizer()), ('lr', LogisticRegression())])"
|
| 605 |
+
]
|
| 606 |
+
},
|
| 607 |
+
"execution_count": 14,
|
| 608 |
+
"metadata": {},
|
| 609 |
+
"output_type": "execute_result"
|
| 610 |
+
}
|
| 611 |
+
],
|
| 612 |
+
"source": [
|
| 613 |
+
"# Train and Fit Data\n",
|
| 614 |
+
"pipe_lr.fit(x_train,y_train)"
|
| 615 |
+
]
|
| 616 |
+
},
|
| 617 |
+
{
|
| 618 |
+
"cell_type": "code",
|
| 619 |
+
"execution_count": 15,
|
| 620 |
+
"id": "135ed6f8-56ff-4d53-85e3-541e3a7ae2d7",
|
| 621 |
+
"metadata": {},
|
| 622 |
+
"outputs": [
|
| 623 |
+
{
|
| 624 |
+
"data": {
|
| 625 |
+
"text/html": [
|
| 626 |
+
"<style>#sk-container-id-2 {color: black;background-color: white;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[('cv', CountVectorizer()), ('lr', LogisticRegression())])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" ><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[('cv', CountVectorizer()), ('lr', LogisticRegression())])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" ><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">CountVectorizer</label><div class=\"sk-toggleable__content\"><pre>CountVectorizer()</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-6\" type=\"checkbox\" ><label for=\"sk-estimator-id-6\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LogisticRegression</label><div class=\"sk-toggleable__content\"><pre>LogisticRegression()</pre></div></div></div></div></div></div></div>"
|
| 627 |
+
],
|
| 628 |
+
"text/plain": [
|
| 629 |
+
"Pipeline(steps=[('cv', CountVectorizer()), ('lr', LogisticRegression())])"
|
| 630 |
+
]
|
| 631 |
+
},
|
| 632 |
+
"execution_count": 15,
|
| 633 |
+
"metadata": {},
|
| 634 |
+
"output_type": "execute_result"
|
| 635 |
+
}
|
| 636 |
+
],
|
| 637 |
+
"source": [
|
| 638 |
+
"pipe_lr"
|
| 639 |
+
]
|
| 640 |
+
},
|
| 641 |
+
{
|
| 642 |
+
"cell_type": "code",
|
| 643 |
+
"execution_count": 16,
|
| 644 |
+
"id": "28396371-5f5c-4a3b-b974-164e047764f3",
|
| 645 |
+
"metadata": {},
|
| 646 |
+
"outputs": [
|
| 647 |
+
{
|
| 648 |
+
"data": {
|
| 649 |
+
"text/plain": [
|
| 650 |
+
"0.619946349875455"
|
| 651 |
+
]
|
| 652 |
+
},
|
| 653 |
+
"execution_count": 16,
|
| 654 |
+
"metadata": {},
|
| 655 |
+
"output_type": "execute_result"
|
| 656 |
+
}
|
| 657 |
+
],
|
| 658 |
+
"source": [
|
| 659 |
+
"# Check Accuracy\n",
|
| 660 |
+
"pipe_lr.score(x_test,y_test)"
|
| 661 |
+
]
|
| 662 |
+
},
|
| 663 |
+
{
|
| 664 |
+
"cell_type": "code",
|
| 665 |
+
"execution_count": 17,
|
| 666 |
+
"id": "eb3a26b6-d09e-422f-991b-b08c48f55b75",
|
| 667 |
+
"metadata": {},
|
| 668 |
+
"outputs": [],
|
| 669 |
+
"source": [
|
| 670 |
+
"# Make A Prediction\n",
|
| 671 |
+
"ex1 = \"This book was so interesting it made me happy\""
|
| 672 |
+
]
|
| 673 |
+
},
|
| 674 |
+
{
|
| 675 |
+
"cell_type": "code",
|
| 676 |
+
"execution_count": 18,
|
| 677 |
+
"id": "b08597d9-6f59-45cb-a648-95b0da1ce313",
|
| 678 |
+
"metadata": {},
|
| 679 |
+
"outputs": [
|
| 680 |
+
{
|
| 681 |
+
"data": {
|
| 682 |
+
"text/plain": [
|
| 683 |
+
"array(['joy'], dtype=object)"
|
| 684 |
+
]
|
| 685 |
+
},
|
| 686 |
+
"execution_count": 18,
|
| 687 |
+
"metadata": {},
|
| 688 |
+
"output_type": "execute_result"
|
| 689 |
+
}
|
| 690 |
+
],
|
| 691 |
+
"source": [
|
| 692 |
+
"pipe_lr.predict([ex1])"
|
| 693 |
+
]
|
| 694 |
+
},
|
| 695 |
+
{
|
| 696 |
+
"cell_type": "code",
|
| 697 |
+
"execution_count": 19,
|
| 698 |
+
"id": "5b3822ac-17fc-43dd-9bb7-8dad07a4d32c",
|
| 699 |
+
"metadata": {},
|
| 700 |
+
"outputs": [
|
| 701 |
+
{
|
| 702 |
+
"data": {
|
| 703 |
+
"text/plain": [
|
| 704 |
+
"array([[1.60505334e-03, 7.06448086e-03, 6.95652453e-03, 9.43810868e-01,\n",
|
| 705 |
+
" 1.00440585e-04, 2.63232385e-02, 6.63277122e-05, 1.40730665e-02]])"
|
| 706 |
+
]
|
| 707 |
+
},
|
| 708 |
+
"execution_count": 19,
|
| 709 |
+
"metadata": {},
|
| 710 |
+
"output_type": "execute_result"
|
| 711 |
+
}
|
| 712 |
+
],
|
| 713 |
+
"source": [
|
| 714 |
+
"# Prediction Prob\n",
|
| 715 |
+
"pipe_lr.predict_proba([ex1])"
|
| 716 |
+
]
|
| 717 |
+
},
|
| 718 |
+
{
|
| 719 |
+
"cell_type": "code",
|
| 720 |
+
"execution_count": 20,
|
| 721 |
+
"id": "5b7c4596-d643-48e5-a777-79a6f55c49da",
|
| 722 |
+
"metadata": {},
|
| 723 |
+
"outputs": [
|
| 724 |
+
{
|
| 725 |
+
"data": {
|
| 726 |
+
"text/plain": [
|
| 727 |
+
"array(['anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'shame',\n",
|
| 728 |
+
" 'surprise'], dtype=object)"
|
| 729 |
+
]
|
| 730 |
+
},
|
| 731 |
+
"execution_count": 20,
|
| 732 |
+
"metadata": {},
|
| 733 |
+
"output_type": "execute_result"
|
| 734 |
+
}
|
| 735 |
+
],
|
| 736 |
+
"source": [
|
| 737 |
+
"# To Know the classes\n",
|
| 738 |
+
"pipe_lr.classes_"
|
| 739 |
+
]
|
| 740 |
+
},
|
| 741 |
+
{
|
| 742 |
+
"cell_type": "code",
|
| 743 |
+
"execution_count": 21,
|
| 744 |
+
"id": "c0d40f62-b1fd-4748-a279-c8f50c748f26",
|
| 745 |
+
"metadata": {},
|
| 746 |
+
"outputs": [],
|
| 747 |
+
"source": [
|
| 748 |
+
"# Save Model & Pipeline\n",
|
| 749 |
+
"import joblib\n",
|
| 750 |
+
"pipeline_file = open(\"../models/emotion_classifier_pipe_lr.pkl\",\"wb\")\n",
|
| 751 |
+
"joblib.dump(pipe_lr,pipeline_file)\n",
|
| 752 |
+
"pipeline_file.close()"
|
| 753 |
+
]
|
| 754 |
+
},
|
| 755 |
+
{
|
| 756 |
+
"cell_type": "code",
|
| 757 |
+
"execution_count": null,
|
| 758 |
+
"id": "377c4e98-67f0-45e5-8dd5-0417585754f0",
|
| 759 |
+
"metadata": {},
|
| 760 |
+
"outputs": [],
|
| 761 |
+
"source": []
|
| 762 |
+
}
|
| 763 |
+
],
|
| 764 |
+
"metadata": {
|
| 765 |
+
"kernelspec": {
|
| 766 |
+
"display_name": "Python 3 (ipykernel)",
|
| 767 |
+
"language": "python",
|
| 768 |
+
"name": "python3"
|
| 769 |
+
},
|
| 770 |
+
"language_info": {
|
| 771 |
+
"codemirror_mode": {
|
| 772 |
+
"name": "ipython",
|
| 773 |
+
"version": 3
|
| 774 |
+
},
|
| 775 |
+
"file_extension": ".py",
|
| 776 |
+
"mimetype": "text/x-python",
|
| 777 |
+
"name": "python",
|
| 778 |
+
"nbconvert_exporter": "python",
|
| 779 |
+
"pygments_lexer": "ipython3",
|
| 780 |
+
"version": "3.10.9"
|
| 781 |
+
}
|
| 782 |
+
},
|
| 783 |
+
"nbformat": 4,
|
| 784 |
+
"nbformat_minor": 5
|
| 785 |
+
}
|