Spaces:

prithush
/

AeroAdvisor_v2

Sleeping

App Files Files Community

prithush commited on Mar 19, 2024

Commit

1cb54c7

verified ·

1 Parent(s): 4aa113a

Upload 26 files

Browse files

Files changed (27) hide show

.gitattributes +6 -0
Airline Passenger Feedback Portal.pptx +3 -0
Notebooks/Cleaning_&_Preprocessing_raw_data.ipynb +0 -0
Notebooks/Cleaning_reviews.ipynb +0 -0
Notebooks/Machine_Leaning_Model_using_Ratings.ipynb +0 -0
Notebooks/Ratings_Model_Pipeline.ipynb +459 -0
Notebooks/raw_data.xlsx +3 -0
app.py +93 -0
data.db +3 -0
images/air_white.jpeg +0 -0
images/air_white_flip.jpg +0 -0
images/dark_bg_home.jpg +0 -0
images/dashboard_bg.jpg +0 -0
images/form_bg.jpg +3 -0
images/logo.png +0 -0
nlp_model/fingerprint.pb +3 -0
nlp_model/keras_metadata.pb +3 -0
nlp_model/saved_model.pb +3 -0
nlp_model/variables/variables.data-00000-of-00001 +3 -0
nlp_model/variables/variables.index +0 -0
pages/2_✈️_Feedback.py +398 -0
pages/3_📊_Dashboard.py +214 -0
ratings_model.joblib +3 -0
raw_data.xlsx +3 -0
requirements.txt +12 -0
stopwords/acronym.json +1 -0
stopwords/contractions.json +1 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+Airline[[:space:]]Passenger[[:space:]]Feedback[[:space:]]Portal.pptx filter=lfs diff=lfs merge=lfs -text
+data.db filter=lfs diff=lfs merge=lfs -text
+images/form_bg.jpg filter=lfs diff=lfs merge=lfs -text
+nlp_model/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
+Notebooks/raw_data.xlsx filter=lfs diff=lfs merge=lfs -text
+raw_data.xlsx filter=lfs diff=lfs merge=lfs -text

Airline Passenger Feedback Portal.pptx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5a81066397780d7b50ce13a3c44ff1b6d1ed7ddc77d305c0b95ae5449b4594cc
+size 6137901

Notebooks/Cleaning_&_Preprocessing_raw_data.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

Notebooks/Cleaning_reviews.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

Notebooks/Machine_Leaning_Model_using_Ratings.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

Notebooks/Ratings_Model_Pipeline.ipynb ADDED Viewed

	@@ -0,0 +1,459 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "80096ce4",
+   "metadata": {},
+   "source": [
+    "# Importing Necessary Libraries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "ae548824",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import sqlite3\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "\n",
+    "from sklearn.compose import ColumnTransformer\n",
+    "from sklearn.pipeline import Pipeline\n",
+    "from sklearn.metrics import classification_report\n",
+    "\n",
+    "import joblib"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "878bccd1",
+   "metadata": {},
+   "source": [
+    "# Reading the dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "70f7f30a",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "ename": "DatabaseError",
+     "evalue": "Execution failed on sql 'SELECT * FROM airline_reviews': no such table: airline_reviews",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mOperationalError\u001b[0m                          Traceback (most recent call last)",
+      "File \u001b[1;32mc:\\Users\\prith\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\pandas\\io\\sql.py:2018\u001b[0m, in \u001b[0;36mSQLiteDatabase.execute\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m   2017\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m-> 2018\u001b[0m     cur\u001b[38;5;241m.\u001b[39mexecute(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m   2019\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m cur\n",
+      "\u001b[1;31mOperationalError\u001b[0m: no such table: airline_reviews",
+      "\nThe above exception was the direct cause of the following exception:\n",
+      "\u001b[1;31mDatabaseError\u001b[0m                             Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[2], line 2\u001b[0m\n\u001b[0;32m      1\u001b[0m connection \u001b[38;5;241m=\u001b[39m sqlite3\u001b[38;5;241m.\u001b[39mconnect(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata.db\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m----> 2\u001b[0m df \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_sql\u001b[49m\u001b[43m(\u001b[49m\u001b[43msql\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\"\"\u001b[39;49m\u001b[38;5;124;43mSELECT * FROM airline_reviews\u001b[39;49m\u001b[38;5;124;43m\"\"\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcon\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconnection\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparse_dates\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mreview_date\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m      3\u001b[0m df\u001b[38;5;241m.\u001b[39mdrop(columns \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mreview_date\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mauthor\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcustomer_review\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mreview_clean\u001b[39m\u001b[38;5;124m'\u001b[39m], inplace\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m      4\u001b[0m df\u001b[38;5;241m.\u001b[39mhead()\n",
+      "File \u001b[1;32mc:\\Users\\prith\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\pandas\\io\\sql.py:564\u001b[0m, in \u001b[0;36mread_sql\u001b[1;34m(sql, con, index_col, coerce_float, params, parse_dates, columns, chunksize)\u001b[0m\n\u001b[0;32m    561\u001b[0m pandas_sql \u001b[38;5;241m=\u001b[39m pandasSQL_builder(con)\n\u001b[0;32m    563\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(pandas_sql, SQLiteDatabase):\n\u001b[1;32m--> 564\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mpandas_sql\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_query\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m    565\u001b[0m \u001b[43m        \u001b[49m\u001b[43msql\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    566\u001b[0m \u001b[43m        \u001b[49m\u001b[43mindex_col\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mindex_col\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    567\u001b[0m \u001b[43m        \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    568\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcoerce_float\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoerce_float\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    569\u001b[0m \u001b[43m        \u001b[49m\u001b[43mparse_dates\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparse_dates\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    570\u001b[0m \u001b[43m        \u001b[49m\u001b[43mchunksize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunksize\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    571\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    573\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m    574\u001b[0m     _is_table_name \u001b[38;5;241m=\u001b[39m pandas_sql\u001b[38;5;241m.\u001b[39mhas_table(sql)\n",
+      "File \u001b[1;32mc:\\Users\\prith\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\pandas\\io\\sql.py:2078\u001b[0m, in \u001b[0;36mSQLiteDatabase.read_query\u001b[1;34m(self, sql, index_col, coerce_float, params, parse_dates, chunksize, dtype)\u001b[0m\n\u001b[0;32m   2066\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mread_query\u001b[39m(\n\u001b[0;32m   2067\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m   2068\u001b[0m     sql,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m   2074\u001b[0m     dtype: DtypeArg \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[0;32m   2075\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m DataFrame \u001b[38;5;241m|\u001b[39m Iterator[DataFrame]:\n\u001b[0;32m   2077\u001b[0m     args \u001b[38;5;241m=\u001b[39m _convert_params(sql, params)\n\u001b[1;32m-> 2078\u001b[0m     cursor \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   2079\u001b[0m     columns \u001b[38;5;241m=\u001b[39m [col_desc[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;28;01mfor\u001b[39;00m col_desc \u001b[38;5;129;01min\u001b[39;00m cursor\u001b[38;5;241m.\u001b[39mdescription]\n\u001b[0;32m   2081\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
+      "File \u001b[1;32mc:\\Users\\prith\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\pandas\\io\\sql.py:2030\u001b[0m, in \u001b[0;36mSQLiteDatabase.execute\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m   2027\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m ex \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01minner_exc\u001b[39;00m\n\u001b[0;32m   2029\u001b[0m ex \u001b[38;5;241m=\u001b[39m DatabaseError(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExecution failed on sql \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00margs[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mexc\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m-> 2030\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ex \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mexc\u001b[39;00m\n",
+      "\u001b[1;31mDatabaseError\u001b[0m: Execution failed on sql 'SELECT * FROM airline_reviews': no such table: airline_reviews"
+     ]
+    }
+   ],
+   "source": [
+    "connection = sqlite3.connect('data.db')\n",
+    "df = pd.read_sql(sql=\"\"\"SELECT * FROM airline_reviews\"\"\", con=connection, parse_dates=['review_date'])\n",
+    "df.drop(columns = ['review_date', 'author', 'customer_review', 'review_clean'], inplace=True)\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "26adeb4a",
+   "metadata": {},
+   "source": [
+    "# Spliting Data into training and test sets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "429fdf53",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X = df.drop(columns = \"recommended\")\n",
+    "y = df[\"recommended\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "214fbd25",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Shape of X_train: (18260, 12)\n",
+      "Shape of y_train: (18260,)\n",
+      "\n",
+      "Shape of X_test: (4566, 12)\n",
+      "Shape of y_test: (4566,)\n"
+     ]
+    }
+   ],
+   "source": [
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state = 42)\n",
+    "\n",
+    "# Printing shapes of train and test data\n",
+    "print(f'Shape of X_train: {X_train.shape}')\n",
+    "print(f'Shape of y_train: {y_train.shape}\\n')\n",
+    "print(f'Shape of X_test: {X_test.shape}')\n",
+    "print(f'Shape of y_test: {y_test.shape}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2fac7fe1",
+   "metadata": {},
+   "source": [
+    "# Column Transformer\n",
+    "- Column Transformer shall be used to encode categorical columns\n",
+    "- Ordinal Encoder:\n",
+    "    - `cabin`, `type_of_flight`, `frequency` columns\n",
+    "- One Hot Encoder:\n",
+    "    - `airline` and `traveller_type` columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "c1c76301",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ct_encoding = ColumnTransformer(transformers = [\n",
+    "    ('ohe_enc', OneHotEncoder(handle_unknown = \"ignore\", sparse_output = False), [0,1]),\n",
+    "    (\"ord_enc\", OrdinalEncoder(categories = [[\"Economy Class\", \"Premium Economy\", \"Business Class\", \"First Class\"], [\"Direct\", \"Indirect\"], [\"Rarely\", \"Occasionally\", \"Often\"]], encoded_missing_value = 0), [2,3,4]),\n",
+    "], remainder = 'passthrough')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "06fa281d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[ 0.,  0.,  0., ...,  4.,  5.,  8.],\n",
+       "       [ 0.,  0.,  0., ...,  1.,  1.,  1.],\n",
+       "       [ 0.,  0.,  0., ...,  1.,  1.,  1.],\n",
+       "       ...,\n",
+       "       [ 0.,  0.,  0., ...,  5.,  5., 10.],\n",
+       "       [ 0.,  0.,  0., ...,  5.,  5.,  8.],\n",
+       "       [ 0.,  0.,  0., ...,  5.,  3.,  8.]])"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ct_encoding.fit_transform(X_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "a6aa28ce",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(18260, 80)"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ct_encoding.fit_transform(X_train).shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "83a0f447",
+   "metadata": {},
+   "source": [
+    "# Defining model\n",
+    "- We have already finalized our ML model to be applied by analyzing various models and performing extensive hyperparameter tuning.\n",
+    "- We will be using Random Forest Classifier as our ML model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "46c88017",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = RandomForestClassifier(n_estimators=359, \n",
+    "                               criterion='gini', \n",
+    "                               max_depth=16, \n",
+    "                               max_features='log2', \n",
+    "                               min_samples_split=25, \n",
+    "                               random_state=42)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c3dcdf2f",
+   "metadata": {},
+   "source": [
+    "# Creating Pipeline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "605e7f32",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipe = Pipeline(steps=[('encoding', ct_encoding), ('model_deploy', model)])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "04e7ad13",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;encoding&#x27;,\n",
+       "                 ColumnTransformer(remainder=&#x27;passthrough&#x27;,\n",
+       "                                   transformers=[(&#x27;ohe_enc&#x27;,\n",
+       "                                                  OneHotEncoder(handle_unknown=&#x27;ignore&#x27;,\n",
+       "                                                                sparse_output=False),\n",
+       "                                                  [0, 1]),\n",
+       "                                                 (&#x27;ord_enc&#x27;,\n",
+       "                                                  OrdinalEncoder(categories=[[&#x27;Economy &#x27;\n",
+       "                                                                              &#x27;Class&#x27;,\n",
+       "                                                                              &#x27;Premium &#x27;\n",
+       "                                                                              &#x27;Economy&#x27;,\n",
+       "                                                                              &#x27;Business &#x27;\n",
+       "                                                                              &#x27;Class&#x27;,\n",
+       "                                                                              &#x27;First &#x27;\n",
+       "                                                                              &#x27;Class&#x27;],\n",
+       "                                                                             [&#x27;Direct&#x27;,\n",
+       "                                                                              &#x27;Indirect&#x27;],\n",
+       "                                                                             [&#x27;Rarely&#x27;,\n",
+       "                                                                              &#x27;Occasionally&#x27;,\n",
+       "                                                                              &#x27;Often&#x27;]],\n",
+       "                                                                 encoded_missing_value=0),\n",
+       "                                                  [2, 3, 4])])),\n",
+       "                (&#x27;model_deploy&#x27;,\n",
+       "                 RandomForestClassifier(max_depth=16, max_features=&#x27;log2&#x27;,\n",
+       "                                        min_samples_split=25, n_estimators=359,\n",
+       "                                        random_state=42))])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" ><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[(&#x27;encoding&#x27;,\n",
+       "                 ColumnTransformer(remainder=&#x27;passthrough&#x27;,\n",
+       "                                   transformers=[(&#x27;ohe_enc&#x27;,\n",
+       "                                                  OneHotEncoder(handle_unknown=&#x27;ignore&#x27;,\n",
+       "                                                                sparse_output=False),\n",
+       "                                                  [0, 1]),\n",
+       "                                                 (&#x27;ord_enc&#x27;,\n",
+       "                                                  OrdinalEncoder(categories=[[&#x27;Economy &#x27;\n",
+       "                                                                              &#x27;Class&#x27;,\n",
+       "                                                                              &#x27;Premium &#x27;\n",
+       "                                                                              &#x27;Economy&#x27;,\n",
+       "                                                                              &#x27;Business &#x27;\n",
+       "                                                                              &#x27;Class&#x27;,\n",
+       "                                                                              &#x27;First &#x27;\n",
+       "                                                                              &#x27;Class&#x27;],\n",
+       "                                                                             [&#x27;Direct&#x27;,\n",
+       "                                                                              &#x27;Indirect&#x27;],\n",
+       "                                                                             [&#x27;Rarely&#x27;,\n",
+       "                                                                              &#x27;Occasionally&#x27;,\n",
+       "                                                                              &#x27;Often&#x27;]],\n",
+       "                                                                 encoded_missing_value=0),\n",
+       "                                                  [2, 3, 4])])),\n",
+       "                (&#x27;model_deploy&#x27;,\n",
+       "                 RandomForestClassifier(max_depth=16, max_features=&#x27;log2&#x27;,\n",
+       "                                        min_samples_split=25, n_estimators=359,\n",
+       "                                        random_state=42))])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" ><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">encoding: ColumnTransformer</label><div class=\"sk-toggleable__content\"><pre>ColumnTransformer(remainder=&#x27;passthrough&#x27;,\n",
+       "                  transformers=[(&#x27;ohe_enc&#x27;,\n",
+       "                                 OneHotEncoder(handle_unknown=&#x27;ignore&#x27;,\n",
+       "                                               sparse_output=False),\n",
+       "                                 [0, 1]),\n",
+       "                                (&#x27;ord_enc&#x27;,\n",
+       "                                 OrdinalEncoder(categories=[[&#x27;Economy Class&#x27;,\n",
+       "                                                             &#x27;Premium Economy&#x27;,\n",
+       "                                                             &#x27;Business Class&#x27;,\n",
+       "                                                             &#x27;First Class&#x27;],\n",
+       "                                                            [&#x27;Direct&#x27;,\n",
+       "                                                             &#x27;Indirect&#x27;],\n",
+       "                                                            [&#x27;Rarely&#x27;,\n",
+       "                                                             &#x27;Occasionally&#x27;,\n",
+       "                                                             &#x27;Often&#x27;]],\n",
+       "                                                encoded_missing_value=0),\n",
+       "                                 [2, 3, 4])])</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">ohe_enc</label><div class=\"sk-toggleable__content\"><pre>[0, 1]</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" ><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OneHotEncoder</label><div class=\"sk-toggleable__content\"><pre>OneHotEncoder(handle_unknown=&#x27;ignore&#x27;, sparse_output=False)</pre></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" ><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">ord_enc</label><div class=\"sk-toggleable__content\"><pre>[2, 3, 4]</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-6\" type=\"checkbox\" ><label for=\"sk-estimator-id-6\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OrdinalEncoder</label><div class=\"sk-toggleable__content\"><pre>OrdinalEncoder(categories=[[&#x27;Economy Class&#x27;, &#x27;Premium Economy&#x27;,\n",
+       "                            &#x27;Business Class&#x27;, &#x27;First Class&#x27;],\n",
+       "                           [&#x27;Direct&#x27;, &#x27;Indirect&#x27;],\n",
+       "                           [&#x27;Rarely&#x27;, &#x27;Occasionally&#x27;, &#x27;Often&#x27;]],\n",
+       "               encoded_missing_value=0)</pre></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-7\" type=\"checkbox\" ><label for=\"sk-estimator-id-7\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">remainder</label><div class=\"sk-toggleable__content\"><pre>[&#x27;seat_comfort&#x27;, &#x27;cabin_service&#x27;, &#x27;food_bev&#x27;, &#x27;entertainment&#x27;, &#x27;ground_service&#x27;, &#x27;value_for_money&#x27;, &#x27;overall&#x27;]</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-8\" type=\"checkbox\" ><label for=\"sk-estimator-id-8\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">passthrough</label><div class=\"sk-toggleable__content\"><pre>passthrough</pre></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-9\" type=\"checkbox\" ><label for=\"sk-estimator-id-9\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RandomForestClassifier</label><div class=\"sk-toggleable__content\"><pre>RandomForestClassifier(max_depth=16, max_features=&#x27;log2&#x27;, min_samples_split=25,\n",
+       "                       n_estimators=359, random_state=42)</pre></div></div></div></div></div></div></div>"
+      ],
+      "text/plain": [
+       "Pipeline(steps=[('encoding',\n",
+       "                 ColumnTransformer(remainder='passthrough',\n",
+       "                                   transformers=[('ohe_enc',\n",
+       "                                                  OneHotEncoder(handle_unknown='ignore',\n",
+       "                                                                sparse_output=False),\n",
+       "                                                  [0, 1]),\n",
+       "                                                 ('ord_enc',\n",
+       "                                                  OrdinalEncoder(categories=[['Economy '\n",
+       "                                                                              'Class',\n",
+       "                                                                              'Premium '\n",
+       "                                                                              'Economy',\n",
+       "                                                                              'Business '\n",
+       "                                                                              'Class',\n",
+       "                                                                              'First '\n",
+       "                                                                              'Class'],\n",
+       "                                                                             ['Direct',\n",
+       "                                                                              'Indirect'],\n",
+       "                                                                             ['Rarely',\n",
+       "                                                                              'Occasionally',\n",
+       "                                                                              'Often']],\n",
+       "                                                                 encoded_missing_value=0),\n",
+       "                                                  [2, 3, 4])])),\n",
+       "                ('model_deploy',\n",
+       "                 RandomForestClassifier(max_depth=16, max_features='log2',\n",
+       "                                        min_samples_split=25, n_estimators=359,\n",
+       "                                        random_state=42))])"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pipe.fit(X_train, y_train)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "641083dd",
+   "metadata": {},
+   "source": [
+    "# Exporting pipeline model as a joblib file"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "db7f494a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['ratings_model.joblib']"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# joblib.dump(pipe, 'ratings_model.joblib')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3ebceee1",
+   "metadata": {},
+   "source": [
+    "# Loading and taking predictions using exported model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "d4219971",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "          no       0.96      0.96      0.96      2333\n",
+      "         yes       0.96      0.95      0.96      2233\n",
+      "\n",
+      "    accuracy                           0.96      4566\n",
+      "   macro avg       0.96      0.96      0.96      4566\n",
+      "weighted avg       0.96      0.96      0.96      4566\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "load_model = joblib.load('ratings_model.joblib')\n",
+    "y_pred = load_model.predict(X_test)\n",
+    "print(classification_report(y_test, y_pred))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "18279bbf",
+   "metadata": {},
+   "source": [
+    "### FINAL NOTES\n",
+    "- The model has been exported to a joblib file which can be used to deploy the model to a production environment.\n",
+    "- We have cross-checked the exported model and it is giving 96% accuracy on the test data."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

Notebooks/raw_data.xlsx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8518b2905c0bb31fa4a15ee12505e6c4b2e375e4296a93b008044ecd81c047cf
+size 24238491

app.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import streamlit as st
+import base64
+# Setting page config
+st.set_page_config(page_title="Home", page_icon=":house:", layout="wide")
+#-------------------------------- Background and custom CSS -------------------------------------#
+#impliment background formating
+def set_bg_hack(main_bg):
+    # set bg name
+    main_bg_ext = "jpg"
+    st.markdown(
+        f"""
+         <style>
+         .stApp {{
+             background: url(data:image/{main_bg_ext};base64,{base64.b64encode(open(main_bg, "rb").read()).decode()});
+             background-repeat: no-repeat;
+             background-position: right 50% bottom 95% ;
+             background-size: cover;
+             background-attachment: scroll;
+         }}
+         </style>
+         """,
+        unsafe_allow_html=True,
+    )
+set_bg_hack("images/dark_bg_home.jpg")
+# Setting custom css
+css = f"""
+<style>
+[data-testid="stHeader"] {{
+background: rgba(0,0,0,0);
+}}
+</style>
+"""
+st.markdown(css, unsafe_allow_html=True)
+#-------------------------------- Sidebar Modification -------------------------------------#
+# Setting logo on sidebar
+st.sidebar.image("images/logo.png", caption="About this app")
+st.sidebar.markdown("##")
+st.sidebar.markdown("##")
+st.sidebar.markdown("##")
+st.sidebar.markdown("[![GitHub](https://img.shields.io/badge/GitHub-Profile-blue?style=for-the-badge&logo=github)](https://github.com/prithush92)")
+st.sidebar.markdown("[![LinkedIn](https://img.shields.io/badge/LinkedIn-Profile-blue?style=for-the-badge&logo=linkedin)](https://www.linkedin.com/in/prithush92/)")
+# Page title
+st.title("Airline Passenger Feedback Portal ✈️")
+# Description
+st.markdown("""
+Welcome to the Airline Passenger Feedback Portal! This app analyzes passenger ratings and reviews and predicts how likely is a passenger to recommend the airline. \n
+""")
+# Features
+st.header("🚩 Features", divider="red")
+st.markdown("""
+- **Analyze Passenger Ratings**: A Machine Learning model is deployed to predict customer recommendation status based on Ratings.
+- **Review Sentiment Analysis**: Customer Reviews are deeply analyzed and sentiment analysis is performed to determine the sentiment of the review.
+- **Overall Recommendation Prediction**: Finally using both the Ratings and Review Sentiment, Overall Recommendation Status of the user is predicted.
+- **Airline Reviews Dashboard**: For the convenience of Airline Companies, a dashboard is designed to easily visualize Passenger Ratings
+            and Important Keywords in both Positive and Negative Reviews.
+""")
+# Technologies Used
+st.header("🌐 Technologies Used", divider="blue")
+st.markdown("""
+- **SQLite**: Database management system for storing and retrieving passenger feedback data.
+- **Pandas**: Data manipulation and analysis library for handling datasets.
+- **NumPy**: Numerical computing library for performing mathematical operations.
+- **Matplotlib**: Visualization library for creating insightful plots and charts.
+- **WordCloud**: Visualization tool for generating word clouds from textual data.
+- **TensorFlow**: Deep learning framework for building and training machine learning models.
+""")
+# About the Developer
+st.header("👨🏻‍💻 About the Developer", divider="green")
+st.write("""
+This app is developed by **Prithu Sharma**.
+""")
+st.markdown(
+        """
+        [![GitHub](https://img.shields.io/badge/GitHub-Profile-blue?style=for-the-badge&logo=github)](https://github.com/prithush92)
+        [![LinkedIn](https://img.shields.io/badge/LinkedIn-Profile-blue?style=for-the-badge&logo=linkedin)](https://www.linkedin.com/in/prithush92/)
+        """
+    )
+# Footer
+st.markdown("---")
+st.write("Explore the app and make informed decisions based on passenger feedback!")

data.db ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2e54875f79f986d64329099ed965236b0cf77ceee9a4139c70c64339aaa7fd2e
+size 40427520

images/air_white.jpeg ADDED Viewed

images/air_white_flip.jpg ADDED Viewed

images/dark_bg_home.jpg ADDED Viewed

images/dashboard_bg.jpg ADDED Viewed

images/form_bg.jpg ADDED Viewed

Git LFS Details

SHA256: 90bf29c094e9ba1032ffde4e7b433839abc2a0f4858972038a445eda1d235609
Pointer size: 132 Bytes
Size of remote file: 1.44 MB

images/logo.png ADDED Viewed

nlp_model/fingerprint.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2a50311c25f3ca296cb576f49c98420f5d1d944ec0729fd1365a70bb78fc6929
+size 54

nlp_model/keras_metadata.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d2161136a12f9a758f2283b7a7dcdea7a6304ee5b165b79218601b270e2d9d25
+size 32086

nlp_model/saved_model.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:37a501dc32d63688682a828760f2d109dec97473e0e39e761d83bc251c5e891f
+size 11251034

nlp_model/variables/variables.data-00000-of-00001 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:719f227365b36776469b240ebe2e5a5a125b04a03a7af4691646a6e76eb05a41
+size 1029836856

nlp_model/variables/variables.index ADDED Viewed

Binary file (15.8 kB). View file

pages/2_✈️_Feedback.py ADDED Viewed

	@@ -0,0 +1,398 @@

+# Importing Necessary Libraries
+import streamlit as st
+import sqlite3
+import numpy as np
+import pandas as pd
+import joblib
+import tensorflow as tf
+import re
+import string
+from nltk.tokenize import RegexpTokenizer
+import spacy
+import datetime
+import base64
+#-------------------------------- Setting Page Style -------------------------------------#
+st.set_page_config(layout="wide", page_title="Customer Feedback", page_icon="✈️")
+#impliment background formating
+def set_bg_hack(main_bg):
+    # set bg name
+    main_bg_ext = "jpg"
+    st.markdown(
+        f"""
+         <style>
+         .stApp {{
+             background: url(data:image/{main_bg_ext};base64,{base64.b64encode(open(main_bg, "rb").read()).decode()});
+             background-repeat: no-repeat;
+             background-position: right 50% bottom 95% ;
+             background-size: cover;
+             background-attachment: scroll;
+         }}
+         </style>
+         """,
+        unsafe_allow_html=True,
+    )
+set_bg_hack("images/form_bg.jpg")
+css = f"""
+<style>
+[data-testid="stHeader"] {{
+background: rgba(0,0,0,0);
+}}
+[data-testid="stExpander"] {{
+background: rgba(0,0,0,0.4);
+border: 2px solid #000071;
+border-radius: 10px;
+}}
+</style>
+"""
+st.markdown(css, unsafe_allow_html=True)
+# Setting logo on sidebar
+st.sidebar.image("images/logo.png", caption="Passenger Feedback Form")
+#-------------------------------- Connecting to database and loading data -------------------------------------#
+connection = sqlite3.connect('data.db')
+df = pd.read_sql(sql="SELECT * FROM airline_reviews", con=connection, parse_dates=['review_date'])
+connection.close()
+#-------------------------------- Function to clean reviews -------------------------------------#
+# Defining acronyms and contractions
+acronyms_dict = pd.read_json("stopwords/acronym.json", typ="series")
+contractions_dict = pd.read_json("stopwords/contractions.json", typ="series")
+# Defining stopwords
+alphabets = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"]
+others = ["ã", "å", "ì", "û", "ûªm", "ûó", "ûò", "ìñ", "ûªre", "ûªve", "ûª", "ûªs", "ûówe", "ï", "ûï", "â€™"]
+common_words = ["flight", "fly", "airline", "via"]
+airline_names = [airline.split()[0].lower() for airline in df['airline'].unique()]
+stops = alphabets + others + common_words + airline_names
+stops = list(set(stops))
+# Defining Tokenizer
+regexp = RegexpTokenizer("[\w']+")
+nlp = spacy.load("en_core_web_sm")
+# Defining function to clean reviews
+def preprocess(text):
+    # lowercase
+    text = text.lower()
+    # remove whitespaces
+    text = text.strip()
+    # removing html tags
+    html = re.compile(r'<.*?>')
+    text = html.sub(r'', text)
+    # removing emoji patterns
+    emoji_pattern = re.compile("["
+                           u"\U0001F600-\U0001F64F"  # emoticons
+                           u"\U0001F300-\U0001F5FF"  # symbols & pictographs
+                           u"\U0001F680-\U0001F6FF"  # transport & map symbols
+                           u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
+                           u"\U00002702-\U000027B0"
+                           u"\U000024C2-\U0001F251"
+                           "]+", flags = re.UNICODE)
+    text = emoji_pattern.sub(r'', text)
+    # removing urls
+    http = "https?://\S+|www\.\S+"
+    pattern = r"({})".format(http)
+    text = re.sub(pattern, "", text)
+    # removing twitter usernames if they exist
+    pattern = r'@[\w_]+'
+    text = re.sub(pattern, "", text)
+    # Removing punctuations and numbers except ' and -
+    punct_str = string.punctuation + string.digits
+    punct_str = punct_str.replace("'", "")
+    punct_str = punct_str.replace("-", "")
+    text = text.translate(str.maketrans('', '', punct_str))
+    # Replacing "-" in text with empty space
+    text = text.replace("-", " ")
+    # Substituting acronyms
+    words = []
+    for word in regexp.tokenize(text):
+        if word in acronyms_dict.index:
+            words = words + acronyms_dict[word].split()
+        else:
+            words = words + word.split()
+    text = ' '.join(words)                                                                                       # acronyms
+    # Substituting Contractions
+    words = []
+    for word in regexp.tokenize(text):
+        if word in contractions_dict.index:
+            words = words + contractions_dict[word].split()
+        else:
+            words = words + word.split()
+    text = " ".join(words)
+    # Removing punctuations again
+    punct_str = string.punctuation
+    text = text.translate(str.maketrans('', '', punct_str))
+    # Lemmatization using spacy
+    text = " ".join([token.lemma_ for token in nlp(text)])
+    # Stopwords removal
+    text = ' '.join([word for word in regexp.tokenize(text) if word not in stops])
+    # Removing words with one alphabet occuring more than 3 times continuously
+    pattern = r'\b\w*?(.)\1{2,}\w*\b'
+    text = re.sub(pattern, "", text).strip()
+    # Removing words with less than 3 characters
+    pattern = r'\b\w{1,2}\b'
+    text = re.sub(pattern, "", text).strip()
+    # Removing all characters except alphabets and " " (space)
+    filter_text = string.ascii_letters + " "
+    text = "".join([chr for chr in text if chr in filter_text])
+    # Replacing multiple spaces with one single space
+    pattern = r'\s+'
+    text = re.sub(pattern, " ", text).strip()
+    # Removing certain POS tags from the text
+    remove_tags = ['PROPN', 'DET', 'CCONJ', 'PRON', 'AUX']
+    text = " ".join([token.text for token in nlp(text) if token.pos_ not in remove_tags])
+    # return final output
+    return text
+#================================== Web App Designing Begins ==================================#
+#-------------------------------- Container 1 for Heading -------------------------------------#
+container_1 = st.container()
+with container_1:
+    empty1, head2, empty3 = st.columns(spec = [1.5,3,1.5], gap = 'medium')
+    with empty1:
+        st.empty()
+    with head2:
+        st.markdown("<h1><center>Welcome Aboard</center></h1>",unsafe_allow_html=True)
+        st.markdown("<h2><center>Tell us about your Experience ✈️ </center></h2>", unsafe_allow_html=True)
+    with empty3:
+        st.empty()
+#-------------------------------- Container 2 for main_content --------------------------------#
+container_2 = st.container()
+with container_2:
+    col1, col2, col3, col4 = st.columns(spec = [1,3,3,1], gap = 'medium')
+    with col1:
+        st.empty()
+    with col2:
+        expander_1 = st.expander(label = "**Your Trip Info**", expanded = True)
+        with expander_1:
+            author = st.text_input(
+                label = "Please Enter your Name",
+                placeholder = "Enter your name"
+            )
+            airline = st.selectbox(
+                label = "Select your Airline",
+                options = tuple(sorted(df['airline'].unique())),
+                index = 0,
+                placeholder = "Choose an option..."
+            )
+            traveller_type = st.selectbox(
+                label = "Select your Trip type",
+                options = ("Business", "Solo Leisure", "Couple Leisure", "Family Leisure"),
+                index = 0,
+                placeholder = "Choose an option..."
+            )
+            cabin = st.selectbox(
+                label = "Select your Seat Class",
+                options = ("Economy Class", "Premium Economy", "Business Class", "First Class"),
+                index = 0,
+                placeholder = "Choose an option..."
+            )
+            type_of_flight = st.radio(
+                label = "Select your Flight Type",
+                options = ("Direct", 'Indirect'),
+                index = 0,
+            )
+            frequency = st.radio(
+                label = "How often do you fly?",
+                options = ('Often', 'Occasionally', 'Rarely'),
+                index = 1,
+            )
+    with col3:
+        expander_2 = st.expander(label = "Your Ratings", expanded = True)
+        with expander_2:
+            seat_comfort = st.slider(
+                label = "How comfortable are you with your seat?",
+                min_value = 1,
+                max_value = 5,
+                value = 3
+            )
+            cabin_service = st.slider(
+                label = "Please Rate your Cabin Service",
+                min_value = 1,
+                max_value = 5,
+                value = 3
+            )
+            food_bev = st.slider(
+                label = "Please rate the quality of food/beverages",
+                min_value = 1,
+                max_value = 5,
+                value = 3
+            )
+            entertainment = st.slider(
+                label = "Please rate the Entertainment Service",
+                min_value = 1,
+                max_value = 5,
+                value = 3
+            )
+            ground_service = st.slider(
+                label = "Please rate the Ground Service",
+                min_value = 1,
+                max_value = 5,
+                value = 3
+            )
+            value_for_money = st.slider(
+                label = "Value for Money Rating",
+                min_value = 1,
+                max_value = 5,
+                value = 3
+            )
+    with col4:
+        st.empty()
+#-------------------------------- Container 3 for Final Rating Slider and Customer Review--------------------------------#
+container_3 = st.container()
+with container_3:
+    empty1, head2, empty3 = st.columns(spec = [1,3,1], gap = 'medium')
+    with empty1:
+        st.empty()
+    with head2:
+        overall = st.slider(
+            label = "How was your overall experience with the Airline?",
+            min_value = 1,
+            max_value = 10,
+            value = 7
+        )
+        review = st.text_area("Enter your review")
+    with empty3:
+        st.empty()
+#-------------------------------- Creating DataFrame to pass into ML model --------------------------------#
+temp_df = pd.DataFrame(
+    data = [[airline, traveller_type, cabin, type_of_flight, frequency,
+            seat_comfort, cabin_service, food_bev, entertainment,
+            ground_service, value_for_money, overall]],
+    columns = ['airline', 'traveller_type', 'cabin', 'type_of_flight', 'frequency',
+                'seat_comfort', 'cabin_service', 'food_bev', 'entertainment',
+                'ground_service', 'value_for_money', 'overall']
+)
+# Loading ML model using joblib file
+model = joblib.load('ratings_model.joblib')
+# Defining a function to store the nlp_model in streamlit cache memory
+@st.cache_resource
+def cache_model(model_name):
+    model = tf.keras.models.load_model(model_name)
+    return model
+# Loading the nlp_model
+nlp_model = cache_model("nlp_model")
+#-------------------------------- Container 4 for Final Predictions --------------------------------#
+container_4 = st.container()
+with container_4:
+    empty1, head2, empty3 = st.columns(spec = [1,3,1], gap = 'medium')
+    with empty1:
+        st.empty()
+    with head2:
+        # Creating a toggle button to save form to database
+        save_to_db = st.toggle("Save to Database")
+        # Creating a button to get prediction
+        if st.button('Submit'):
+            y_pred = model.predict(temp_df)
+            y_pred_prob = model.predict_proba(temp_df)
+            clean_review = preprocess(review)
+            review_pred_proba = nlp_model.predict([clean_review])
+            review_pred = np.where(review_pred_proba > 0.5, 1, 0)[0][0]
+            review_date = datetime.datetime.now()
+            # if sum of probabilities of both ratings model and nlp_model >=1 then the author has recommended the airline
+            if y_pred_prob[:,1] + review_pred_proba >= 1:
+                recommended = "yes"
+            if y_pred_prob[:,1] + review_pred_proba < 1:
+                recommended = "no"
+            append_df = pd.DataFrame(data = [[review_date, author, airline, traveller_type, cabin, type_of_flight, frequency,
+                                        seat_comfort, cabin_service, food_bev, entertainment,
+                                        ground_service, value_for_money, overall, review, clean_review, recommended]],
+                                     columns = ['review_date', 'author', 'airline', 'traveller_type', 'cabin', 'type_of_flight', 'frequency',
+                                        'seat_comfort', 'cabin_service', 'food_bev', 'entertainment',
+                                        'ground_service', 'value_for_money', 'overall', 'customer_review', 'review_clean', 'recommended'])
+            # If save_to_db toggle is True, then append to database
+            if save_to_db:
+                # Creating a connection to the database
+                connection = sqlite3.connect('data.db')
+                # Appending append_df to airline_reviews table in data.db
+                append_df.to_sql(name='airline_reviews', con=connection, if_exists='append', index=False)
+                # Closing the connection to the database
+                connection.close()
+            if review=="":
+                st.error("Please write your Review")
+                st.stop()
+            if clean_review=="":
+                st.error("Please write a proper review")
+                st.stop()
+            if author=="":
+                st.error("Please Enter your Name")
+                st.stop()
+            # Show the entered data
+            st.dataframe(append_df)
+            if (y_pred[0] == 'yes') & (review_pred == 1):
+                st.success("Thank you for your positive feedback! \nWe're delighted to hear that you had a great experience with our service.")
+                st.balloons()
+            elif (y_pred[0] == 'yes') & (review_pred == 0):
+                st.warning("We appreciate your positive rating, but we're sorry to hear about your concerns in the review. \nPlease share more details so we can address them and enhance your experience.")
+            elif (y_pred[0] == 'no') & (review_pred == 0):
+                st.error("We apologize for falling short of your expectations. \nYour feedback is valuable, and we're committed to improving. \nPlease provide specific details about your experience for us to better understand and address the issues.")
+            elif (y_pred[0] == 'no') & (review_pred == 1):
+                st.error("We're sorry to hear about your negative rating, but we're glad to see your positive comments in the review. \nWe'd like to learn more about your concerns to ensure we address any issues and enhance your satisfaction.")
+    with empty3:
+        st.empty()

pages/3_📊_Dashboard.py ADDED Viewed

	@@ -0,0 +1,214 @@

+import streamlit as st
+import pandas as pd
+import sqlite3
+import numpy as np
+import matplotlib.pyplot as plt
+from wordcloud import WordCloud
+from PIL import Image
+import base64
+#-------------------------------- Setting Page Style -------------------------------------#
+st.set_option('deprecation.showPyplotGlobalUse', False)
+st.set_page_config(page_title="Dashboard", page_icon=":bar_chart:", layout="wide")
+# st.subheader("Dashboard")
+# st.markdown("##")
+#-------------------------------- Background and custom CSS -------------------------------------#
+#impliment background formating
+def set_bg_hack(main_bg):
+    # set bg name
+    main_bg_ext = "jpg"
+    st.markdown(
+        f"""
+         <style>
+         .stApp {{
+             background: url(data:image/{main_bg_ext};base64,{base64.b64encode(open(main_bg, "rb").read()).decode()});
+             background-repeat: no-repeat;
+             background-position: right 50% bottom 95% ;
+             background-size: cover;
+             background-attachment: scroll;
+         }}
+         </style>
+         """,
+        unsafe_allow_html=True,
+    )
+set_bg_hack("images/dashboard_bg.jpg")
+# Setting custom css
+css = f"""
+<style>
+[data-testid="stHeader"] {{
+background: rgba(0,0,0,0);
+}}
+</style>
+"""
+st.markdown(css, unsafe_allow_html=True)
+#-------------------------------- Connecting to database and loading data -------------------------------------#
+connection = sqlite3.connect('data.db')
+df = pd.read_sql(sql="SELECT * FROM airline_reviews", con=connection, parse_dates=['review_date'])
+# Setting logo on sidebar
+st.sidebar.image("images/logo.png", caption='Airline Reviews Dashboard')
+#-------------------------------- Creating Filters --------------------------------#
+st.sidebar.header("Filters")
+airline = st.sidebar.selectbox(
+    label = "Select your Airline",
+    options=df["airline"].unique(),
+    index=6
+)
+traveller_type = st.sidebar.multiselect(
+    label = "Select Traveller Type",
+    options=df["traveller_type"].unique(),
+    default=df["traveller_type"].unique()
+)
+cabin = st.sidebar.multiselect(
+    label = "Select Cabin",
+    options=df["cabin"].unique(),
+    default=df["cabin"].unique()
+)
+type_of_flight = st.sidebar.multiselect(
+    label = "Select flight type",
+    options=df["type_of_flight"].unique(),
+    default=df["type_of_flight"].unique()
+)
+frequency = st.sidebar.multiselect(
+    label = "Select Frequency",
+    options=df["frequency"].unique(),
+    default=df["frequency"].unique()
+)
+df_selection = df.query(
+    "airline == @airline & traveller_type == @traveller_type & cabin == @cabin & type_of_flight == @type_of_flight & frequency == @frequency"
+)
+#-------------------------------- Defining Function to show KPIs --------------------------------#
+def KPI():
+    with st.expander("View Data"):
+        showdata = st.multiselect(label='Filter:', options=df_selection.columns,
+                                  default=['review_date', 'author', 'airline', 'cabin', 'seat_comfort',
+                                           'cabin_service', 'food_bev', 'entertainment',
+                                           'ground_service', 'value_for_money',
+                                           'overall', 'review_clean', 'recommended'])
+        st.write(df_selection[showdata])
+    # Designing KPIs
+    total_reviews = len(df_selection)
+    total_positive_reviews = len(df_selection[df_selection["recommended"] == 'yes'])
+    total_negative_reviews = len(df_selection[df_selection["recommended"] == 'no'])
+    positive_percentage = float((total_positive_reviews / total_reviews) * 100)
+    negative_percentage = float((total_negative_reviews / total_reviews) * 100)
+    # Creating columns for all KPIs
+    col1, col2, col3, col4, col5 = st.columns(5, gap="medium")
+    # Assigning KPIs to columns one by one
+    with col1:
+        st.info('Total Reviews', icon='🔍')
+        st.metric(label = 'Total Reviews', value=total_reviews)
+    with col2:
+        st.info('Total Positive Reviews', icon='✅')
+        st.metric(label = 'Total Positive Reviews', value=total_positive_reviews)
+    with col3:
+        st.info('Total Negative Reviews', icon='❌')
+        st.metric(label = 'Total Negative Reviews', value=total_negative_reviews)
+    with col4:
+        st.info('Positive Percentage', icon='👍')
+        st.metric(label = 'Positive Percentage', value=f"{positive_percentage:,.2f}")
+    with col5:
+        st.info('Negative Percentage', icon='👎')
+        st.metric(label = 'Negative Percentage', value=f"{negative_percentage:,.2f}")
+    # st.markdown("---")
+#-------------------------------- Defining Function to Wordcloud --------------------------------#
+def graphs():
+    positive_review_text = " ".join(review for review in df_selection[df_selection["recommended"] == 'yes']['review_clean'])
+    negative_review_text = " ".join(review for review in df_selection[df_selection["recommended"] == 'no']['review_clean'])
+    plot1, plot2 = st.columns(2)
+    with plot1:
+        air_mask = np.array(Image.open("images/air_white.jpeg"))
+        pos_wordcloud = WordCloud(max_words=50,
+                                  mask = air_mask,
+                                  colormap="summer",
+                                  min_word_length=3,
+                                  background_color="black").generate(positive_review_text)
+        plt.imshow(pos_wordcloud, interpolation="bilinear")
+        plt.gcf().set_facecolor("black")
+        plt.axis("off")
+        plt.title("Positive Reviews", fontsize=15, fontweight="bold", color="green")
+        st.pyplot()
+    with plot2:
+        air_mask_flip = np.array(Image.open("images/air_white_flip.jpg"))
+        neg_wordcloud = WordCloud(max_words=50,
+                                  mask = air_mask_flip,
+                                  colormap="autumn",
+                                  min_word_length=3,
+                                  background_color="black").generate(negative_review_text)
+        plt.imshow(neg_wordcloud, interpolation="bilinear")
+        plt.gcf().set_facecolor("black")
+        plt.axis("off")
+        plt.title("Negative Reviews", fontsize=15, fontweight="bold", color="red")
+        st.pyplot()
+#-------------------------------- Defining Function to showcase Average Ratings --------------------------------#
+def Ratings():
+    # Creating an expander showcasing average ratings for all amenities
+    with st.expander("View Average Ratings", expanded=True):
+        avg_seat_comfort = float(df_selection["seat_comfort"].mean())
+        avg_cabin_service = float(df_selection["cabin_service"].mean())
+        avg_food_bev = float(df_selection["food_bev"].mean())
+        avg_entertainment = float(df_selection["entertainment"].mean())
+        avg_ground_service = float(df_selection["ground_service"].mean())
+        avg_value_for_money = float(df_selection["value_for_money"].mean())
+        avg1, avg2, avg3, avg4, avg5, avg6 = st.columns(6, gap="medium")
+        with avg1:
+            st.info('Seat Comfort', icon='💺')
+            st.metric(label = 'Average', value=f"{avg_seat_comfort:,.1f}")
+        with avg2:
+            st.info('Cabin Service', icon='🛏️')
+            st.metric(label = 'Average', value=f"{avg_cabin_service:,.1f}")
+        with avg3:
+            st.info('Food & Beverage', icon='🍔')
+            st.metric(label = 'Average', value=f"{avg_food_bev:,.1f}")
+        with avg4:
+            st.info('Entertainment', icon='🎬')
+            st.metric(label = 'Average', value=f"{avg_entertainment:,.1f}")
+        with avg5:
+            st.info('Ground Service', icon='🚚')
+            st.metric(label = 'Average', value=f"{avg_ground_service:,.1f}")
+        with avg6:
+            st.info('Value for Money', icon='💰')
+            st.metric(label = 'Average', value=f"{avg_value_for_money:,.1f}")
+    # st.markdown("---")
+#--------------------------------------- Calling Functions ----------------------------------------#
+KPI()
+graphs()
+Ratings()

ratings_model.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:67f9e41a2389e5d3550d8d551dd4c3568d91044964c92738c712325da7c2ec0b
+size 18548591

raw_data.xlsx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8518b2905c0bb31fa4a15ee12505e6c4b2e375e4296a93b008044ecd81c047cf
+size 24238491

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+streamlit==1.3.0
+Pillow == 9.4.0
+nltk == 3.8.1
+numpy == 1.24.3
+pandas == 2.0.3
+tensorflow  == 2.14.0
+regex == 2022.7.9
+joblib
+spacy == 3.7.2
+scikit-learn == 1.3.2
+wordcloud == 1.9.3
+https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl

stopwords/acronym.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"aka":"also known as","asap":"as soon as possible","brb":"be right back","btw":"by the way","dob":"date of birth","faq":"frequently asked questions","fyi":"for your information","idk":"i don't know","idc":"i don't care","iirc":"if i recall correctly","imo":"in my opinion","irl":"in real life","lmk":"let me know","lol":"laugh out loud","ngl":"not gonna lie","noyb":"none of your business","nvm":"never mind","ofc":"of course","omg":"oh my god","pfa":"please find attached","rofl":"rolling on the floor laughing","stfu":"shut the fuck up","tba":"to be announced","tbc":"to be continued","tbd":"to be determined","tbh":"to be honest","ttyl":"talk to you later","wtf":"what the fuck","wth":"what the heck"}

stopwords/contractions.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"'aight":"alright","ain't":"are not","amn't":"am not","arencha":"are not you","aren't":"are not","'bout":"about","can't":"cannot","cap'n":"captain","'cause":"because","'cept":"except","could've":"could have","couldn't":"could not","couldn't've":"could not have","dammit":"damn it","daren't":"dare not","daresn't":"dare not","dasn't":"dare not","didn't":"did not","didn":"did not","doesn't":"does not","doin'":"doing","don't":"do not","dunno":"do not know","d'ye":"do you","e'en":"even","e'er":"ever","'em":"them","everybody's":"everybody is","everyone's":"everyone is","fo'c'sle":"forecastle","finna":"fixing to","'gainst":"against","g'day":"good day","gimme":"give me","giv'n":"given","gonna":"going to","gon't":"go not","gotcha":"got you","gotta":"got to","gtg":"got to go","hadn't":"had not","had've":"had have","hasn't":"has not","haven't":"have not","he'd":"he had","he'll":"he shall","helluva":"hell of a","he's":"he is","here's":"here is","he've":"he have","how'd":"how would","howdy":"how do you do","how'll":"how will","how're":"how are","how's":"how is","i'd":"i had","i'd've":"i would have","i'll":"i shall","i'm":"i am","imma":"i am about to","i'm'a":"i am about to","i'm'o":"i am going to","innit":"is it not","ion":"i do not","i've":"i have","isn't":"is not","it'd":"it would","it'll":"it shall","it's":"it is","iunno":"i do not know","kinda":"kind of","let's":"let us","li'l":"little","ma'am":"madam","mayn't":"may not","may've":"may have","methinks":"me thinks","mightn't":"might not","might've":"might have","mustn't":"must not","mustn't've":"must not have","must've":"must have","'neath":"beneath","needn't":"need not","nal":"and all","ne'er":"never","o'clock":"of the clock","o'er":"over","ol'":"old","oughtn't":"ought not","'round":"around","'s":"is","shalln't":"shall not","shan't":"shall not","she'd":"she had","she'll":"she shall","she's":"she is","should've":"should have","shouldn't":"should not","shouldn't've":"should not have","somebody's":"somebody is","someone's":"someone is","something's":"something is","so're":"so are","so's":"so is","so've":"so have","that'll":"that shall","that're":"that are","that's":"that is","that'd":"that would","there'd":"there had","there'll":"there shall","there're":"there are","there's":"there is","these're":"these are","these've":"these have","they'd":"they had","they'll":"they shall","they're":"they are","they've":"they have","this's":"this is","those're":"those are","those've":"those have","'thout":"without","'til":"until","'tis":"it is","to've":"to have","'twas":"it was","'tween":"between","'twhere":"it were","wanna":"want to","wasn't":"was not","we'd":"we had","we'd've":"we would have","we'll":"we shall","we're":"we are","we've":"we have","weren't":"were not","whatcha":"what are you","what'd":"what did","what'll":"what shall","what're":"what are","what's":"what is","what've":"what have","when's":"when is","where'd":"where did","where'll":"where shall","where're":"where are","where's":"where is","where've":"where have","which'd":"which had","which'll":"which shall","which're":"which are","which's":"which is","which've":"which have","who'd":"who would","who'd've":"who would have","who'll":"who shall","who're":"who are","who's":"who is","who've":"who have","why'd":"why did","why're":"why are","why's":"why is","willn't":"will not","won't":"will not","wonnot":"will not","would've":"would have","wouldn't":"would not","wouldn't've":"would not have","y'all":"you all","y'all'd've":"you all would have","y'all'd'n't've":"you all would not have","y'all're":"you all are","y'all'ren't":"you all are not","y'at":"you at","yes'm":"yes madam","yessir":"yes sir","you'd":"you had","you'll":"you shall","you're":"you are","you've":"you have","aight":"alright","aint":"are not","amnt":"am not","arent":"are not","cant":"cannot","cause":"because","couldve":"could have","couldnt":"could not","couldntve":"could not have","darent":"dare not","daresnt":"dare not","dasnt":"dare not","didnt":"did not","doesnt":"does not","doin":"doing","dont":"do not","eer":"ever","everybodys":"everybody is","everyones":"everyone is","gday":"good day","givn":"given","gont":"go not","hadnt":"had not","hadve":"had have","hasnt":"has not","havent":"have not","hed":"he had","hell":"he shall","hes":"he is","heve":"he have","howd":"how did","howll":"how will","howre":"how are","hows":"how is","idve":"i would have","ill":"i shall","im":"i am","ima":"i am about to","imo":"i am going to","ive":"i have","isnt":"is not","itd":"it would","itll":"it shall","its":"it is","lets":"let us","lil":"little","maam":"madam","maynt":"may not","mayve":"may have","mightnt":"might not","mightve":"might have","mustnt":"must not","mustntve":"must not have","mustve":"must have","neednt":"need not","neer":"never","oclock":"of the clock","oer":"over","ol":"old","oughtnt":"ought not","shallnt":"shall not","shant":"shall not","shed":"she had","shell":"she shall","shes":"she is","shouldve":"should have","shouldnt":"should not","shouldntve":"should not have","somebodys":"somebody is","someones":"someone is","somethings":"something is","thatll":"that shall","thatre":"that are","thatd":"that would","thered":"there had","therell":"there shall","therere":"there are","theres":"there is","thesere":"these are","theseve":"these have","theyd":"they had","theyll":"they shall","theyre":"they are","theyve":"they have","thiss":"this is","thosere":"those are","thoseve":"those have","tis":"it is","tove":"to have","twas":"it was","wasnt":"was not","wed":"we had","wedve":"we would have","were":"we are","weve":"we have","werent":"were not","whatd":"what did","whatll":"what shall","whatre":"what are","whats":"what is","whatve":"what have","whens":"when is","whered":"where did","wherell":"where shall","wherere":"where are","wheres":"where is","whereve":"where have","whichd":"which had","whichll":"which shall","whichre":"which are","whichs":"which is","whichve":"which have","whod":"who would","whodve":"who would have","wholl":"who shall","whore":"who are","whos":"who is","whove":"who have","whyd":"why did","whyre":"why are","whys":"why is","wont":"will not","wouldve":"would have","wouldnt":"would not","wouldntve":"would not have","yall":"you all","yalldve":"you all would have","yallre":"you all are","youd":"you had","youll":"you shall","youre":"you are","youve":"you have","'re":"are","thats":"that is"}