Spaces:
Sleeping
Sleeping
Upload 26 files
Browse files- .gitattributes +6 -0
- Airline Passenger Feedback Portal.pptx +3 -0
- Notebooks/Cleaning_&_Preprocessing_raw_data.ipynb +0 -0
- Notebooks/Cleaning_reviews.ipynb +0 -0
- Notebooks/Machine_Leaning_Model_using_Ratings.ipynb +0 -0
- Notebooks/Ratings_Model_Pipeline.ipynb +459 -0
- Notebooks/raw_data.xlsx +3 -0
- app.py +93 -0
- data.db +3 -0
- images/air_white.jpeg +0 -0
- images/air_white_flip.jpg +0 -0
- images/dark_bg_home.jpg +0 -0
- images/dashboard_bg.jpg +0 -0
- images/form_bg.jpg +3 -0
- images/logo.png +0 -0
- nlp_model/fingerprint.pb +3 -0
- nlp_model/keras_metadata.pb +3 -0
- nlp_model/saved_model.pb +3 -0
- nlp_model/variables/variables.data-00000-of-00001 +3 -0
- nlp_model/variables/variables.index +0 -0
- pages/2_βοΈ_Feedback.py +398 -0
- pages/3_π_Dashboard.py +214 -0
- ratings_model.joblib +3 -0
- raw_data.xlsx +3 -0
- requirements.txt +12 -0
- stopwords/acronym.json +1 -0
- stopwords/contractions.json +1 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
Airline[[:space:]]Passenger[[:space:]]Feedback[[:space:]]Portal.pptx filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
data.db filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
images/form_bg.jpg filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
nlp_model/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
Notebooks/raw_data.xlsx filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
raw_data.xlsx filter=lfs diff=lfs merge=lfs -text
|
Airline Passenger Feedback Portal.pptx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5a81066397780d7b50ce13a3c44ff1b6d1ed7ddc77d305c0b95ae5449b4594cc
|
| 3 |
+
size 6137901
|
Notebooks/Cleaning_&_Preprocessing_raw_data.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Notebooks/Cleaning_reviews.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Notebooks/Machine_Leaning_Model_using_Ratings.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Notebooks/Ratings_Model_Pipeline.ipynb
ADDED
|
@@ -0,0 +1,459 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"id": "80096ce4",
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"source": [
|
| 8 |
+
"# Importing Necessary Libraries"
|
| 9 |
+
]
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"cell_type": "code",
|
| 13 |
+
"execution_count": 1,
|
| 14 |
+
"id": "ae548824",
|
| 15 |
+
"metadata": {
|
| 16 |
+
"tags": []
|
| 17 |
+
},
|
| 18 |
+
"outputs": [],
|
| 19 |
+
"source": [
|
| 20 |
+
"import sqlite3\n",
|
| 21 |
+
"\n",
|
| 22 |
+
"import numpy as np\n",
|
| 23 |
+
"import pandas as pd\n",
|
| 24 |
+
"\n",
|
| 25 |
+
"from sklearn.model_selection import train_test_split\n",
|
| 26 |
+
"from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder\n",
|
| 27 |
+
"from sklearn.ensemble import RandomForestClassifier\n",
|
| 28 |
+
"\n",
|
| 29 |
+
"from sklearn.compose import ColumnTransformer\n",
|
| 30 |
+
"from sklearn.pipeline import Pipeline\n",
|
| 31 |
+
"from sklearn.metrics import classification_report\n",
|
| 32 |
+
"\n",
|
| 33 |
+
"import joblib"
|
| 34 |
+
]
|
| 35 |
+
},
|
| 36 |
+
{
|
| 37 |
+
"cell_type": "markdown",
|
| 38 |
+
"id": "878bccd1",
|
| 39 |
+
"metadata": {},
|
| 40 |
+
"source": [
|
| 41 |
+
"# Reading the dataset"
|
| 42 |
+
]
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"cell_type": "code",
|
| 46 |
+
"execution_count": 2,
|
| 47 |
+
"id": "70f7f30a",
|
| 48 |
+
"metadata": {
|
| 49 |
+
"tags": []
|
| 50 |
+
},
|
| 51 |
+
"outputs": [
|
| 52 |
+
{
|
| 53 |
+
"ename": "DatabaseError",
|
| 54 |
+
"evalue": "Execution failed on sql 'SELECT * FROM airline_reviews': no such table: airline_reviews",
|
| 55 |
+
"output_type": "error",
|
| 56 |
+
"traceback": [
|
| 57 |
+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
| 58 |
+
"\u001b[1;31mOperationalError\u001b[0m Traceback (most recent call last)",
|
| 59 |
+
"File \u001b[1;32mc:\\Users\\prith\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\pandas\\io\\sql.py:2018\u001b[0m, in \u001b[0;36mSQLiteDatabase.execute\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 2017\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m-> 2018\u001b[0m cur\u001b[38;5;241m.\u001b[39mexecute(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 2019\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cur\n",
|
| 60 |
+
"\u001b[1;31mOperationalError\u001b[0m: no such table: airline_reviews",
|
| 61 |
+
"\nThe above exception was the direct cause of the following exception:\n",
|
| 62 |
+
"\u001b[1;31mDatabaseError\u001b[0m Traceback (most recent call last)",
|
| 63 |
+
"Cell \u001b[1;32mIn[2], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m connection \u001b[38;5;241m=\u001b[39m sqlite3\u001b[38;5;241m.\u001b[39mconnect(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata.db\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m----> 2\u001b[0m df \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_sql\u001b[49m\u001b[43m(\u001b[49m\u001b[43msql\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\"\"\u001b[39;49m\u001b[38;5;124;43mSELECT * FROM airline_reviews\u001b[39;49m\u001b[38;5;124;43m\"\"\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcon\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconnection\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparse_dates\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mreview_date\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 3\u001b[0m df\u001b[38;5;241m.\u001b[39mdrop(columns \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mreview_date\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mauthor\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcustomer_review\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mreview_clean\u001b[39m\u001b[38;5;124m'\u001b[39m], inplace\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m 4\u001b[0m df\u001b[38;5;241m.\u001b[39mhead()\n",
|
| 64 |
+
"File \u001b[1;32mc:\\Users\\prith\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\pandas\\io\\sql.py:564\u001b[0m, in \u001b[0;36mread_sql\u001b[1;34m(sql, con, index_col, coerce_float, params, parse_dates, columns, chunksize)\u001b[0m\n\u001b[0;32m 561\u001b[0m pandas_sql \u001b[38;5;241m=\u001b[39m pandasSQL_builder(con)\n\u001b[0;32m 563\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(pandas_sql, SQLiteDatabase):\n\u001b[1;32m--> 564\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mpandas_sql\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_query\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 565\u001b[0m \u001b[43m \u001b[49m\u001b[43msql\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 566\u001b[0m \u001b[43m \u001b[49m\u001b[43mindex_col\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mindex_col\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 567\u001b[0m \u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 568\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoerce_float\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoerce_float\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 569\u001b[0m \u001b[43m \u001b[49m\u001b[43mparse_dates\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparse_dates\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 570\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunksize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunksize\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 571\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 573\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 574\u001b[0m _is_table_name \u001b[38;5;241m=\u001b[39m pandas_sql\u001b[38;5;241m.\u001b[39mhas_table(sql)\n",
|
| 65 |
+
"File \u001b[1;32mc:\\Users\\prith\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\pandas\\io\\sql.py:2078\u001b[0m, in \u001b[0;36mSQLiteDatabase.read_query\u001b[1;34m(self, sql, index_col, coerce_float, params, parse_dates, chunksize, dtype)\u001b[0m\n\u001b[0;32m 2066\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mread_query\u001b[39m(\n\u001b[0;32m 2067\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m 2068\u001b[0m sql,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 2074\u001b[0m dtype: DtypeArg \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[0;32m 2075\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m DataFrame \u001b[38;5;241m|\u001b[39m Iterator[DataFrame]:\n\u001b[0;32m 2077\u001b[0m args \u001b[38;5;241m=\u001b[39m _convert_params(sql, params)\n\u001b[1;32m-> 2078\u001b[0m cursor \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2079\u001b[0m columns \u001b[38;5;241m=\u001b[39m [col_desc[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;28;01mfor\u001b[39;00m col_desc \u001b[38;5;129;01min\u001b[39;00m cursor\u001b[38;5;241m.\u001b[39mdescription]\n\u001b[0;32m 2081\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
|
| 66 |
+
"File \u001b[1;32mc:\\Users\\prith\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\pandas\\io\\sql.py:2030\u001b[0m, in \u001b[0;36mSQLiteDatabase.execute\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 2027\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ex \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01minner_exc\u001b[39;00m\n\u001b[0;32m 2029\u001b[0m ex \u001b[38;5;241m=\u001b[39m DatabaseError(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExecution failed on sql \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00margs[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mexc\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m-> 2030\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ex \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mexc\u001b[39;00m\n",
|
| 67 |
+
"\u001b[1;31mDatabaseError\u001b[0m: Execution failed on sql 'SELECT * FROM airline_reviews': no such table: airline_reviews"
|
| 68 |
+
]
|
| 69 |
+
}
|
| 70 |
+
],
|
| 71 |
+
"source": [
|
| 72 |
+
"connection = sqlite3.connect('data.db')\n",
|
| 73 |
+
"df = pd.read_sql(sql=\"\"\"SELECT * FROM airline_reviews\"\"\", con=connection, parse_dates=['review_date'])\n",
|
| 74 |
+
"df.drop(columns = ['review_date', 'author', 'customer_review', 'review_clean'], inplace=True)\n",
|
| 75 |
+
"df.head()"
|
| 76 |
+
]
|
| 77 |
+
},
|
| 78 |
+
{
|
| 79 |
+
"cell_type": "markdown",
|
| 80 |
+
"id": "26adeb4a",
|
| 81 |
+
"metadata": {},
|
| 82 |
+
"source": [
|
| 83 |
+
"# Spliting Data into training and test sets"
|
| 84 |
+
]
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"cell_type": "code",
|
| 88 |
+
"execution_count": 7,
|
| 89 |
+
"id": "429fdf53",
|
| 90 |
+
"metadata": {},
|
| 91 |
+
"outputs": [],
|
| 92 |
+
"source": [
|
| 93 |
+
"X = df.drop(columns = \"recommended\")\n",
|
| 94 |
+
"y = df[\"recommended\"]"
|
| 95 |
+
]
|
| 96 |
+
},
|
| 97 |
+
{
|
| 98 |
+
"cell_type": "code",
|
| 99 |
+
"execution_count": 8,
|
| 100 |
+
"id": "214fbd25",
|
| 101 |
+
"metadata": {},
|
| 102 |
+
"outputs": [
|
| 103 |
+
{
|
| 104 |
+
"name": "stdout",
|
| 105 |
+
"output_type": "stream",
|
| 106 |
+
"text": [
|
| 107 |
+
"Shape of X_train: (18260, 12)\n",
|
| 108 |
+
"Shape of y_train: (18260,)\n",
|
| 109 |
+
"\n",
|
| 110 |
+
"Shape of X_test: (4566, 12)\n",
|
| 111 |
+
"Shape of y_test: (4566,)\n"
|
| 112 |
+
]
|
| 113 |
+
}
|
| 114 |
+
],
|
| 115 |
+
"source": [
|
| 116 |
+
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state = 42)\n",
|
| 117 |
+
"\n",
|
| 118 |
+
"# Printing shapes of train and test data\n",
|
| 119 |
+
"print(f'Shape of X_train: {X_train.shape}')\n",
|
| 120 |
+
"print(f'Shape of y_train: {y_train.shape}\\n')\n",
|
| 121 |
+
"print(f'Shape of X_test: {X_test.shape}')\n",
|
| 122 |
+
"print(f'Shape of y_test: {y_test.shape}')"
|
| 123 |
+
]
|
| 124 |
+
},
|
| 125 |
+
{
|
| 126 |
+
"cell_type": "markdown",
|
| 127 |
+
"id": "2fac7fe1",
|
| 128 |
+
"metadata": {},
|
| 129 |
+
"source": [
|
| 130 |
+
"# Column Transformer\n",
|
| 131 |
+
"- Column Transformer shall be used to encode categorical columns\n",
|
| 132 |
+
"- Ordinal Encoder:\n",
|
| 133 |
+
" - `cabin`, `type_of_flight`, `frequency` columns\n",
|
| 134 |
+
"- One Hot Encoder:\n",
|
| 135 |
+
" - `airline` and `traveller_type` columns"
|
| 136 |
+
]
|
| 137 |
+
},
|
| 138 |
+
{
|
| 139 |
+
"cell_type": "code",
|
| 140 |
+
"execution_count": 9,
|
| 141 |
+
"id": "c1c76301",
|
| 142 |
+
"metadata": {},
|
| 143 |
+
"outputs": [],
|
| 144 |
+
"source": [
|
| 145 |
+
"ct_encoding = ColumnTransformer(transformers = [\n",
|
| 146 |
+
" ('ohe_enc', OneHotEncoder(handle_unknown = \"ignore\", sparse_output = False), [0,1]),\n",
|
| 147 |
+
" (\"ord_enc\", OrdinalEncoder(categories = [[\"Economy Class\", \"Premium Economy\", \"Business Class\", \"First Class\"], [\"Direct\", \"Indirect\"], [\"Rarely\", \"Occasionally\", \"Often\"]], encoded_missing_value = 0), [2,3,4]),\n",
|
| 148 |
+
"], remainder = 'passthrough')"
|
| 149 |
+
]
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"cell_type": "code",
|
| 153 |
+
"execution_count": 10,
|
| 154 |
+
"id": "06fa281d",
|
| 155 |
+
"metadata": {},
|
| 156 |
+
"outputs": [
|
| 157 |
+
{
|
| 158 |
+
"data": {
|
| 159 |
+
"text/plain": [
|
| 160 |
+
"array([[ 0., 0., 0., ..., 4., 5., 8.],\n",
|
| 161 |
+
" [ 0., 0., 0., ..., 1., 1., 1.],\n",
|
| 162 |
+
" [ 0., 0., 0., ..., 1., 1., 1.],\n",
|
| 163 |
+
" ...,\n",
|
| 164 |
+
" [ 0., 0., 0., ..., 5., 5., 10.],\n",
|
| 165 |
+
" [ 0., 0., 0., ..., 5., 5., 8.],\n",
|
| 166 |
+
" [ 0., 0., 0., ..., 5., 3., 8.]])"
|
| 167 |
+
]
|
| 168 |
+
},
|
| 169 |
+
"execution_count": 10,
|
| 170 |
+
"metadata": {},
|
| 171 |
+
"output_type": "execute_result"
|
| 172 |
+
}
|
| 173 |
+
],
|
| 174 |
+
"source": [
|
| 175 |
+
"ct_encoding.fit_transform(X_train)"
|
| 176 |
+
]
|
| 177 |
+
},
|
| 178 |
+
{
|
| 179 |
+
"cell_type": "code",
|
| 180 |
+
"execution_count": 11,
|
| 181 |
+
"id": "a6aa28ce",
|
| 182 |
+
"metadata": {},
|
| 183 |
+
"outputs": [
|
| 184 |
+
{
|
| 185 |
+
"data": {
|
| 186 |
+
"text/plain": [
|
| 187 |
+
"(18260, 80)"
|
| 188 |
+
]
|
| 189 |
+
},
|
| 190 |
+
"execution_count": 11,
|
| 191 |
+
"metadata": {},
|
| 192 |
+
"output_type": "execute_result"
|
| 193 |
+
}
|
| 194 |
+
],
|
| 195 |
+
"source": [
|
| 196 |
+
"ct_encoding.fit_transform(X_train).shape"
|
| 197 |
+
]
|
| 198 |
+
},
|
| 199 |
+
{
|
| 200 |
+
"cell_type": "markdown",
|
| 201 |
+
"id": "83a0f447",
|
| 202 |
+
"metadata": {},
|
| 203 |
+
"source": [
|
| 204 |
+
"# Defining model\n",
|
| 205 |
+
"- We have already finalized our ML model to be applied by analyzing various models and performing extensive hyperparameter tuning.\n",
|
| 206 |
+
"- We will be using Random Forest Classifier as our ML model."
|
| 207 |
+
]
|
| 208 |
+
},
|
| 209 |
+
{
|
| 210 |
+
"cell_type": "code",
|
| 211 |
+
"execution_count": 12,
|
| 212 |
+
"id": "46c88017",
|
| 213 |
+
"metadata": {},
|
| 214 |
+
"outputs": [],
|
| 215 |
+
"source": [
|
| 216 |
+
"model = RandomForestClassifier(n_estimators=359, \n",
|
| 217 |
+
" criterion='gini', \n",
|
| 218 |
+
" max_depth=16, \n",
|
| 219 |
+
" max_features='log2', \n",
|
| 220 |
+
" min_samples_split=25, \n",
|
| 221 |
+
" random_state=42)"
|
| 222 |
+
]
|
| 223 |
+
},
|
| 224 |
+
{
|
| 225 |
+
"cell_type": "markdown",
|
| 226 |
+
"id": "c3dcdf2f",
|
| 227 |
+
"metadata": {},
|
| 228 |
+
"source": [
|
| 229 |
+
"# Creating Pipeline"
|
| 230 |
+
]
|
| 231 |
+
},
|
| 232 |
+
{
|
| 233 |
+
"cell_type": "code",
|
| 234 |
+
"execution_count": 13,
|
| 235 |
+
"id": "605e7f32",
|
| 236 |
+
"metadata": {},
|
| 237 |
+
"outputs": [],
|
| 238 |
+
"source": [
|
| 239 |
+
"pipe = Pipeline(steps=[('encoding', ct_encoding), ('model_deploy', model)])"
|
| 240 |
+
]
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"cell_type": "code",
|
| 244 |
+
"execution_count": 14,
|
| 245 |
+
"id": "04e7ad13",
|
| 246 |
+
"metadata": {},
|
| 247 |
+
"outputs": [
|
| 248 |
+
{
|
| 249 |
+
"data": {
|
| 250 |
+
"text/html": [
|
| 251 |
+
"<style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"βΈ\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"βΎ\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[('encoding',\n",
|
| 252 |
+
" ColumnTransformer(remainder='passthrough',\n",
|
| 253 |
+
" transformers=[('ohe_enc',\n",
|
| 254 |
+
" OneHotEncoder(handle_unknown='ignore',\n",
|
| 255 |
+
" sparse_output=False),\n",
|
| 256 |
+
" [0, 1]),\n",
|
| 257 |
+
" ('ord_enc',\n",
|
| 258 |
+
" OrdinalEncoder(categories=[['Economy '\n",
|
| 259 |
+
" 'Class',\n",
|
| 260 |
+
" 'Premium '\n",
|
| 261 |
+
" 'Economy',\n",
|
| 262 |
+
" 'Business '\n",
|
| 263 |
+
" 'Class',\n",
|
| 264 |
+
" 'First '\n",
|
| 265 |
+
" 'Class'],\n",
|
| 266 |
+
" ['Direct',\n",
|
| 267 |
+
" 'Indirect'],\n",
|
| 268 |
+
" ['Rarely',\n",
|
| 269 |
+
" 'Occasionally',\n",
|
| 270 |
+
" 'Often']],\n",
|
| 271 |
+
" encoded_missing_value=0),\n",
|
| 272 |
+
" [2, 3, 4])])),\n",
|
| 273 |
+
" ('model_deploy',\n",
|
| 274 |
+
" RandomForestClassifier(max_depth=16, max_features='log2',\n",
|
| 275 |
+
" min_samples_split=25, n_estimators=359,\n",
|
| 276 |
+
" random_state=42))])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" ><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[('encoding',\n",
|
| 277 |
+
" ColumnTransformer(remainder='passthrough',\n",
|
| 278 |
+
" transformers=[('ohe_enc',\n",
|
| 279 |
+
" OneHotEncoder(handle_unknown='ignore',\n",
|
| 280 |
+
" sparse_output=False),\n",
|
| 281 |
+
" [0, 1]),\n",
|
| 282 |
+
" ('ord_enc',\n",
|
| 283 |
+
" OrdinalEncoder(categories=[['Economy '\n",
|
| 284 |
+
" 'Class',\n",
|
| 285 |
+
" 'Premium '\n",
|
| 286 |
+
" 'Economy',\n",
|
| 287 |
+
" 'Business '\n",
|
| 288 |
+
" 'Class',\n",
|
| 289 |
+
" 'First '\n",
|
| 290 |
+
" 'Class'],\n",
|
| 291 |
+
" ['Direct',\n",
|
| 292 |
+
" 'Indirect'],\n",
|
| 293 |
+
" ['Rarely',\n",
|
| 294 |
+
" 'Occasionally',\n",
|
| 295 |
+
" 'Often']],\n",
|
| 296 |
+
" encoded_missing_value=0),\n",
|
| 297 |
+
" [2, 3, 4])])),\n",
|
| 298 |
+
" ('model_deploy',\n",
|
| 299 |
+
" RandomForestClassifier(max_depth=16, max_features='log2',\n",
|
| 300 |
+
" min_samples_split=25, n_estimators=359,\n",
|
| 301 |
+
" random_state=42))])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" ><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">encoding: ColumnTransformer</label><div class=\"sk-toggleable__content\"><pre>ColumnTransformer(remainder='passthrough',\n",
|
| 302 |
+
" transformers=[('ohe_enc',\n",
|
| 303 |
+
" OneHotEncoder(handle_unknown='ignore',\n",
|
| 304 |
+
" sparse_output=False),\n",
|
| 305 |
+
" [0, 1]),\n",
|
| 306 |
+
" ('ord_enc',\n",
|
| 307 |
+
" OrdinalEncoder(categories=[['Economy Class',\n",
|
| 308 |
+
" 'Premium Economy',\n",
|
| 309 |
+
" 'Business Class',\n",
|
| 310 |
+
" 'First Class'],\n",
|
| 311 |
+
" ['Direct',\n",
|
| 312 |
+
" 'Indirect'],\n",
|
| 313 |
+
" ['Rarely',\n",
|
| 314 |
+
" 'Occasionally',\n",
|
| 315 |
+
" 'Often']],\n",
|
| 316 |
+
" encoded_missing_value=0),\n",
|
| 317 |
+
" [2, 3, 4])])</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">ohe_enc</label><div class=\"sk-toggleable__content\"><pre>[0, 1]</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" ><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OneHotEncoder</label><div class=\"sk-toggleable__content\"><pre>OneHotEncoder(handle_unknown='ignore', sparse_output=False)</pre></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" ><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">ord_enc</label><div class=\"sk-toggleable__content\"><pre>[2, 3, 4]</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-6\" type=\"checkbox\" ><label for=\"sk-estimator-id-6\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OrdinalEncoder</label><div class=\"sk-toggleable__content\"><pre>OrdinalEncoder(categories=[['Economy Class', 'Premium Economy',\n",
|
| 318 |
+
" 'Business Class', 'First Class'],\n",
|
| 319 |
+
" ['Direct', 'Indirect'],\n",
|
| 320 |
+
" ['Rarely', 'Occasionally', 'Often']],\n",
|
| 321 |
+
" encoded_missing_value=0)</pre></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-7\" type=\"checkbox\" ><label for=\"sk-estimator-id-7\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">remainder</label><div class=\"sk-toggleable__content\"><pre>['seat_comfort', 'cabin_service', 'food_bev', 'entertainment', 'ground_service', 'value_for_money', 'overall']</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-8\" type=\"checkbox\" ><label for=\"sk-estimator-id-8\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">passthrough</label><div class=\"sk-toggleable__content\"><pre>passthrough</pre></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-9\" type=\"checkbox\" ><label for=\"sk-estimator-id-9\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RandomForestClassifier</label><div class=\"sk-toggleable__content\"><pre>RandomForestClassifier(max_depth=16, max_features='log2', min_samples_split=25,\n",
|
| 322 |
+
" n_estimators=359, random_state=42)</pre></div></div></div></div></div></div></div>"
|
| 323 |
+
],
|
| 324 |
+
"text/plain": [
|
| 325 |
+
"Pipeline(steps=[('encoding',\n",
|
| 326 |
+
" ColumnTransformer(remainder='passthrough',\n",
|
| 327 |
+
" transformers=[('ohe_enc',\n",
|
| 328 |
+
" OneHotEncoder(handle_unknown='ignore',\n",
|
| 329 |
+
" sparse_output=False),\n",
|
| 330 |
+
" [0, 1]),\n",
|
| 331 |
+
" ('ord_enc',\n",
|
| 332 |
+
" OrdinalEncoder(categories=[['Economy '\n",
|
| 333 |
+
" 'Class',\n",
|
| 334 |
+
" 'Premium '\n",
|
| 335 |
+
" 'Economy',\n",
|
| 336 |
+
" 'Business '\n",
|
| 337 |
+
" 'Class',\n",
|
| 338 |
+
" 'First '\n",
|
| 339 |
+
" 'Class'],\n",
|
| 340 |
+
" ['Direct',\n",
|
| 341 |
+
" 'Indirect'],\n",
|
| 342 |
+
" ['Rarely',\n",
|
| 343 |
+
" 'Occasionally',\n",
|
| 344 |
+
" 'Often']],\n",
|
| 345 |
+
" encoded_missing_value=0),\n",
|
| 346 |
+
" [2, 3, 4])])),\n",
|
| 347 |
+
" ('model_deploy',\n",
|
| 348 |
+
" RandomForestClassifier(max_depth=16, max_features='log2',\n",
|
| 349 |
+
" min_samples_split=25, n_estimators=359,\n",
|
| 350 |
+
" random_state=42))])"
|
| 351 |
+
]
|
| 352 |
+
},
|
| 353 |
+
"execution_count": 14,
|
| 354 |
+
"metadata": {},
|
| 355 |
+
"output_type": "execute_result"
|
| 356 |
+
}
|
| 357 |
+
],
|
| 358 |
+
"source": [
|
| 359 |
+
"pipe.fit(X_train, y_train)"
|
| 360 |
+
]
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"cell_type": "markdown",
|
| 364 |
+
"id": "641083dd",
|
| 365 |
+
"metadata": {},
|
| 366 |
+
"source": [
|
| 367 |
+
"# Exporting pipeline model as a joblib file"
|
| 368 |
+
]
|
| 369 |
+
},
|
| 370 |
+
{
|
| 371 |
+
"cell_type": "code",
|
| 372 |
+
"execution_count": 16,
|
| 373 |
+
"id": "db7f494a",
|
| 374 |
+
"metadata": {},
|
| 375 |
+
"outputs": [
|
| 376 |
+
{
|
| 377 |
+
"data": {
|
| 378 |
+
"text/plain": [
|
| 379 |
+
"['ratings_model.joblib']"
|
| 380 |
+
]
|
| 381 |
+
},
|
| 382 |
+
"execution_count": 16,
|
| 383 |
+
"metadata": {},
|
| 384 |
+
"output_type": "execute_result"
|
| 385 |
+
}
|
| 386 |
+
],
|
| 387 |
+
"source": [
|
| 388 |
+
"# joblib.dump(pipe, 'ratings_model.joblib')"
|
| 389 |
+
]
|
| 390 |
+
},
|
| 391 |
+
{
|
| 392 |
+
"cell_type": "markdown",
|
| 393 |
+
"id": "3ebceee1",
|
| 394 |
+
"metadata": {},
|
| 395 |
+
"source": [
|
| 396 |
+
"# Loading and taking predictions using exported model"
|
| 397 |
+
]
|
| 398 |
+
},
|
| 399 |
+
{
|
| 400 |
+
"cell_type": "code",
|
| 401 |
+
"execution_count": 19,
|
| 402 |
+
"id": "d4219971",
|
| 403 |
+
"metadata": {},
|
| 404 |
+
"outputs": [
|
| 405 |
+
{
|
| 406 |
+
"name": "stdout",
|
| 407 |
+
"output_type": "stream",
|
| 408 |
+
"text": [
|
| 409 |
+
" precision recall f1-score support\n",
|
| 410 |
+
"\n",
|
| 411 |
+
" no 0.96 0.96 0.96 2333\n",
|
| 412 |
+
" yes 0.96 0.95 0.96 2233\n",
|
| 413 |
+
"\n",
|
| 414 |
+
" accuracy 0.96 4566\n",
|
| 415 |
+
" macro avg 0.96 0.96 0.96 4566\n",
|
| 416 |
+
"weighted avg 0.96 0.96 0.96 4566\n",
|
| 417 |
+
"\n"
|
| 418 |
+
]
|
| 419 |
+
}
|
| 420 |
+
],
|
| 421 |
+
"source": [
|
| 422 |
+
"load_model = joblib.load('ratings_model.joblib')\n",
|
| 423 |
+
"y_pred = load_model.predict(X_test)\n",
|
| 424 |
+
"print(classification_report(y_test, y_pred))"
|
| 425 |
+
]
|
| 426 |
+
},
|
| 427 |
+
{
|
| 428 |
+
"cell_type": "markdown",
|
| 429 |
+
"id": "18279bbf",
|
| 430 |
+
"metadata": {},
|
| 431 |
+
"source": [
|
| 432 |
+
"### FINAL NOTES\n",
|
| 433 |
+
"- The model has been exported to a joblib file which can be used to deploy the model to a production environment.\n",
|
| 434 |
+
"- We have cross-checked the exported model and it is giving 96% accuracy on the test data."
|
| 435 |
+
]
|
| 436 |
+
}
|
| 437 |
+
],
|
| 438 |
+
"metadata": {
|
| 439 |
+
"kernelspec": {
|
| 440 |
+
"display_name": "Python 3 (ipykernel)",
|
| 441 |
+
"language": "python",
|
| 442 |
+
"name": "python3"
|
| 443 |
+
},
|
| 444 |
+
"language_info": {
|
| 445 |
+
"codemirror_mode": {
|
| 446 |
+
"name": "ipython",
|
| 447 |
+
"version": 3
|
| 448 |
+
},
|
| 449 |
+
"file_extension": ".py",
|
| 450 |
+
"mimetype": "text/x-python",
|
| 451 |
+
"name": "python",
|
| 452 |
+
"nbconvert_exporter": "python",
|
| 453 |
+
"pygments_lexer": "ipython3",
|
| 454 |
+
"version": "3.10.10"
|
| 455 |
+
}
|
| 456 |
+
},
|
| 457 |
+
"nbformat": 4,
|
| 458 |
+
"nbformat_minor": 5
|
| 459 |
+
}
|
Notebooks/raw_data.xlsx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8518b2905c0bb31fa4a15ee12505e6c4b2e375e4296a93b008044ecd81c047cf
|
| 3 |
+
size 24238491
|
app.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import base64
|
| 3 |
+
|
| 4 |
+
# Setting page config
|
| 5 |
+
st.set_page_config(page_title="Home", page_icon=":house:", layout="wide")
|
| 6 |
+
|
| 7 |
+
#-------------------------------- Background and custom CSS -------------------------------------#
|
| 8 |
+
#impliment background formating
|
| 9 |
+
def set_bg_hack(main_bg):
|
| 10 |
+
# set bg name
|
| 11 |
+
main_bg_ext = "jpg"
|
| 12 |
+
st.markdown(
|
| 13 |
+
f"""
|
| 14 |
+
<style>
|
| 15 |
+
.stApp {{
|
| 16 |
+
background: url(data:image/{main_bg_ext};base64,{base64.b64encode(open(main_bg, "rb").read()).decode()});
|
| 17 |
+
background-repeat: no-repeat;
|
| 18 |
+
background-position: right 50% bottom 95% ;
|
| 19 |
+
background-size: cover;
|
| 20 |
+
background-attachment: scroll;
|
| 21 |
+
}}
|
| 22 |
+
</style>
|
| 23 |
+
""",
|
| 24 |
+
unsafe_allow_html=True,
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
set_bg_hack("images/dark_bg_home.jpg")
|
| 28 |
+
|
| 29 |
+
# Setting custom css
|
| 30 |
+
css = f"""
|
| 31 |
+
<style>
|
| 32 |
+
|
| 33 |
+
[data-testid="stHeader"] {{
|
| 34 |
+
background: rgba(0,0,0,0);
|
| 35 |
+
}}
|
| 36 |
+
|
| 37 |
+
</style>
|
| 38 |
+
"""
|
| 39 |
+
st.markdown(css, unsafe_allow_html=True)
|
| 40 |
+
|
| 41 |
+
#-------------------------------- Sidebar Modification -------------------------------------#
|
| 42 |
+
# Setting logo on sidebar
|
| 43 |
+
st.sidebar.image("images/logo.png", caption="About this app")
|
| 44 |
+
st.sidebar.markdown("##")
|
| 45 |
+
st.sidebar.markdown("##")
|
| 46 |
+
st.sidebar.markdown("##")
|
| 47 |
+
st.sidebar.markdown("[](https://github.com/prithush92)")
|
| 48 |
+
st.sidebar.markdown("[](https://www.linkedin.com/in/prithush92/)")
|
| 49 |
+
|
| 50 |
+
# Page title
|
| 51 |
+
st.title("Airline Passenger Feedback Portal βοΈ")
|
| 52 |
+
|
| 53 |
+
# Description
|
| 54 |
+
st.markdown("""
|
| 55 |
+
Welcome to the Airline Passenger Feedback Portal! This app analyzes passenger ratings and reviews and predicts how likely is a passenger to recommend the airline. \n
|
| 56 |
+
""")
|
| 57 |
+
|
| 58 |
+
# Features
|
| 59 |
+
st.header("π© Features", divider="red")
|
| 60 |
+
st.markdown("""
|
| 61 |
+
- **Analyze Passenger Ratings**: A Machine Learning model is deployed to predict customer recommendation status based on Ratings.
|
| 62 |
+
- **Review Sentiment Analysis**: Customer Reviews are deeply analyzed and sentiment analysis is performed to determine the sentiment of the review.
|
| 63 |
+
- **Overall Recommendation Prediction**: Finally using both the Ratings and Review Sentiment, Overall Recommendation Status of the user is predicted.
|
| 64 |
+
- **Airline Reviews Dashboard**: For the convenience of Airline Companies, a dashboard is designed to easily visualize Passenger Ratings
|
| 65 |
+
and Important Keywords in both Positive and Negative Reviews.
|
| 66 |
+
""")
|
| 67 |
+
|
| 68 |
+
# Technologies Used
|
| 69 |
+
st.header("π Technologies Used", divider="blue")
|
| 70 |
+
st.markdown("""
|
| 71 |
+
- **SQLite**: Database management system for storing and retrieving passenger feedback data.
|
| 72 |
+
- **Pandas**: Data manipulation and analysis library for handling datasets.
|
| 73 |
+
- **NumPy**: Numerical computing library for performing mathematical operations.
|
| 74 |
+
- **Matplotlib**: Visualization library for creating insightful plots and charts.
|
| 75 |
+
- **WordCloud**: Visualization tool for generating word clouds from textual data.
|
| 76 |
+
- **TensorFlow**: Deep learning framework for building and training machine learning models.
|
| 77 |
+
""")
|
| 78 |
+
|
| 79 |
+
# About the Developer
|
| 80 |
+
st.header("π¨π»βπ» About the Developer", divider="green")
|
| 81 |
+
st.write("""
|
| 82 |
+
This app is developed by **Prithu Sharma**.
|
| 83 |
+
""")
|
| 84 |
+
st.markdown(
|
| 85 |
+
"""
|
| 86 |
+
[](https://github.com/prithush92)
|
| 87 |
+
[](https://www.linkedin.com/in/prithush92/)
|
| 88 |
+
"""
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
# Footer
|
| 92 |
+
st.markdown("---")
|
| 93 |
+
st.write("Explore the app and make informed decisions based on passenger feedback!")
|
data.db
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e54875f79f986d64329099ed965236b0cf77ceee9a4139c70c64339aaa7fd2e
|
| 3 |
+
size 40427520
|
images/air_white.jpeg
ADDED
|
images/air_white_flip.jpg
ADDED
|
images/dark_bg_home.jpg
ADDED
|
images/dashboard_bg.jpg
ADDED
|
images/form_bg.jpg
ADDED
|
Git LFS Details
|
images/logo.png
ADDED
|
nlp_model/fingerprint.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a50311c25f3ca296cb576f49c98420f5d1d944ec0729fd1365a70bb78fc6929
|
| 3 |
+
size 54
|
nlp_model/keras_metadata.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d2161136a12f9a758f2283b7a7dcdea7a6304ee5b165b79218601b270e2d9d25
|
| 3 |
+
size 32086
|
nlp_model/saved_model.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37a501dc32d63688682a828760f2d109dec97473e0e39e761d83bc251c5e891f
|
| 3 |
+
size 11251034
|
nlp_model/variables/variables.data-00000-of-00001
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:719f227365b36776469b240ebe2e5a5a125b04a03a7af4691646a6e76eb05a41
|
| 3 |
+
size 1029836856
|
nlp_model/variables/variables.index
ADDED
|
Binary file (15.8 kB). View file
|
|
|
pages/2_βοΈ_Feedback.py
ADDED
|
@@ -0,0 +1,398 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Importing Necessary Libraries
|
| 2 |
+
import streamlit as st
|
| 3 |
+
import sqlite3
|
| 4 |
+
import numpy as np
|
| 5 |
+
import pandas as pd
|
| 6 |
+
import joblib
|
| 7 |
+
import tensorflow as tf
|
| 8 |
+
import re
|
| 9 |
+
import string
|
| 10 |
+
from nltk.tokenize import RegexpTokenizer
|
| 11 |
+
import spacy
|
| 12 |
+
import datetime
|
| 13 |
+
import base64
|
| 14 |
+
|
| 15 |
+
#-------------------------------- Setting Page Style -------------------------------------#
|
| 16 |
+
st.set_page_config(layout="wide", page_title="Customer Feedback", page_icon="βοΈ")
|
| 17 |
+
|
| 18 |
+
#impliment background formating
|
| 19 |
+
def set_bg_hack(main_bg):
|
| 20 |
+
# set bg name
|
| 21 |
+
main_bg_ext = "jpg"
|
| 22 |
+
st.markdown(
|
| 23 |
+
f"""
|
| 24 |
+
<style>
|
| 25 |
+
.stApp {{
|
| 26 |
+
background: url(data:image/{main_bg_ext};base64,{base64.b64encode(open(main_bg, "rb").read()).decode()});
|
| 27 |
+
background-repeat: no-repeat;
|
| 28 |
+
background-position: right 50% bottom 95% ;
|
| 29 |
+
background-size: cover;
|
| 30 |
+
background-attachment: scroll;
|
| 31 |
+
}}
|
| 32 |
+
</style>
|
| 33 |
+
""",
|
| 34 |
+
unsafe_allow_html=True,
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
set_bg_hack("images/form_bg.jpg")
|
| 38 |
+
|
| 39 |
+
css = f"""
|
| 40 |
+
<style>
|
| 41 |
+
|
| 42 |
+
[data-testid="stHeader"] {{
|
| 43 |
+
background: rgba(0,0,0,0);
|
| 44 |
+
}}
|
| 45 |
+
|
| 46 |
+
[data-testid="stExpander"] {{
|
| 47 |
+
background: rgba(0,0,0,0.4);
|
| 48 |
+
border: 2px solid #000071;
|
| 49 |
+
border-radius: 10px;
|
| 50 |
+
}}
|
| 51 |
+
|
| 52 |
+
</style>
|
| 53 |
+
"""
|
| 54 |
+
st.markdown(css, unsafe_allow_html=True)
|
| 55 |
+
|
| 56 |
+
# Setting logo on sidebar
|
| 57 |
+
st.sidebar.image("images/logo.png", caption="Passenger Feedback Form")
|
| 58 |
+
|
| 59 |
+
#-------------------------------- Connecting to database and loading data -------------------------------------#
|
| 60 |
+
connection = sqlite3.connect('data.db')
|
| 61 |
+
df = pd.read_sql(sql="SELECT * FROM airline_reviews", con=connection, parse_dates=['review_date'])
|
| 62 |
+
connection.close()
|
| 63 |
+
|
| 64 |
+
#-------------------------------- Function to clean reviews -------------------------------------#
|
| 65 |
+
|
| 66 |
+
# Defining acronyms and contractions
|
| 67 |
+
acronyms_dict = pd.read_json("stopwords/acronym.json", typ="series")
|
| 68 |
+
contractions_dict = pd.read_json("stopwords/contractions.json", typ="series")
|
| 69 |
+
|
| 70 |
+
# Defining stopwords
|
| 71 |
+
alphabets = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"]
|
| 72 |
+
others = ["Γ£", "Γ₯", "Γ¬", "Γ»", "Γ»Βͺm", "ûó", "ûò", "ìñ", "Γ»Βͺre", "Γ»Βͺve", "Γ»Βͺ", "Γ»Βͺs", "ûówe", "Γ―", "ûï", "Γ’β¬β’"]
|
| 73 |
+
common_words = ["flight", "fly", "airline", "via"]
|
| 74 |
+
airline_names = [airline.split()[0].lower() for airline in df['airline'].unique()]
|
| 75 |
+
stops = alphabets + others + common_words + airline_names
|
| 76 |
+
stops = list(set(stops))
|
| 77 |
+
|
| 78 |
+
# Defining Tokenizer
|
| 79 |
+
regexp = RegexpTokenizer("[\w']+")
|
| 80 |
+
nlp = spacy.load("en_core_web_sm")
|
| 81 |
+
|
| 82 |
+
# Defining function to clean reviews
|
| 83 |
+
def preprocess(text):
|
| 84 |
+
|
| 85 |
+
# lowercase
|
| 86 |
+
text = text.lower()
|
| 87 |
+
|
| 88 |
+
# remove whitespaces
|
| 89 |
+
text = text.strip()
|
| 90 |
+
|
| 91 |
+
# removing html tags
|
| 92 |
+
html = re.compile(r'<.*?>')
|
| 93 |
+
text = html.sub(r'', text)
|
| 94 |
+
|
| 95 |
+
# removing emoji patterns
|
| 96 |
+
emoji_pattern = re.compile("["
|
| 97 |
+
u"\U0001F600-\U0001F64F" # emoticons
|
| 98 |
+
u"\U0001F300-\U0001F5FF" # symbols & pictographs
|
| 99 |
+
u"\U0001F680-\U0001F6FF" # transport & map symbols
|
| 100 |
+
u"\U0001F1E0-\U0001F1FF" # flags (iOS)
|
| 101 |
+
u"\U00002702-\U000027B0"
|
| 102 |
+
u"\U000024C2-\U0001F251"
|
| 103 |
+
"]+", flags = re.UNICODE)
|
| 104 |
+
text = emoji_pattern.sub(r'', text)
|
| 105 |
+
|
| 106 |
+
# removing urls
|
| 107 |
+
http = "https?://\S+|www\.\S+"
|
| 108 |
+
pattern = r"({})".format(http)
|
| 109 |
+
text = re.sub(pattern, "", text)
|
| 110 |
+
|
| 111 |
+
# removing twitter usernames if they exist
|
| 112 |
+
pattern = r'@[\w_]+'
|
| 113 |
+
text = re.sub(pattern, "", text)
|
| 114 |
+
|
| 115 |
+
# Removing punctuations and numbers except ' and -
|
| 116 |
+
punct_str = string.punctuation + string.digits
|
| 117 |
+
punct_str = punct_str.replace("'", "")
|
| 118 |
+
punct_str = punct_str.replace("-", "")
|
| 119 |
+
text = text.translate(str.maketrans('', '', punct_str))
|
| 120 |
+
|
| 121 |
+
# Replacing "-" in text with empty space
|
| 122 |
+
text = text.replace("-", " ")
|
| 123 |
+
|
| 124 |
+
# Substituting acronyms
|
| 125 |
+
words = []
|
| 126 |
+
for word in regexp.tokenize(text):
|
| 127 |
+
if word in acronyms_dict.index:
|
| 128 |
+
words = words + acronyms_dict[word].split()
|
| 129 |
+
else:
|
| 130 |
+
words = words + word.split()
|
| 131 |
+
text = ' '.join(words) # acronyms
|
| 132 |
+
|
| 133 |
+
# Substituting Contractions
|
| 134 |
+
words = []
|
| 135 |
+
for word in regexp.tokenize(text):
|
| 136 |
+
if word in contractions_dict.index:
|
| 137 |
+
words = words + contractions_dict[word].split()
|
| 138 |
+
else:
|
| 139 |
+
words = words + word.split()
|
| 140 |
+
text = " ".join(words)
|
| 141 |
+
|
| 142 |
+
# Removing punctuations again
|
| 143 |
+
punct_str = string.punctuation
|
| 144 |
+
text = text.translate(str.maketrans('', '', punct_str))
|
| 145 |
+
|
| 146 |
+
# Lemmatization using spacy
|
| 147 |
+
text = " ".join([token.lemma_ for token in nlp(text)])
|
| 148 |
+
|
| 149 |
+
# Stopwords removal
|
| 150 |
+
text = ' '.join([word for word in regexp.tokenize(text) if word not in stops])
|
| 151 |
+
|
| 152 |
+
# Removing words with one alphabet occuring more than 3 times continuously
|
| 153 |
+
pattern = r'\b\w*?(.)\1{2,}\w*\b'
|
| 154 |
+
text = re.sub(pattern, "", text).strip()
|
| 155 |
+
|
| 156 |
+
# Removing words with less than 3 characters
|
| 157 |
+
pattern = r'\b\w{1,2}\b'
|
| 158 |
+
text = re.sub(pattern, "", text).strip()
|
| 159 |
+
|
| 160 |
+
# Removing all characters except alphabets and " " (space)
|
| 161 |
+
filter_text = string.ascii_letters + " "
|
| 162 |
+
text = "".join([chr for chr in text if chr in filter_text])
|
| 163 |
+
|
| 164 |
+
# Replacing multiple spaces with one single space
|
| 165 |
+
pattern = r'\s+'
|
| 166 |
+
text = re.sub(pattern, " ", text).strip()
|
| 167 |
+
|
| 168 |
+
# Removing certain POS tags from the text
|
| 169 |
+
remove_tags = ['PROPN', 'DET', 'CCONJ', 'PRON', 'AUX']
|
| 170 |
+
text = " ".join([token.text for token in nlp(text) if token.pos_ not in remove_tags])
|
| 171 |
+
|
| 172 |
+
# return final output
|
| 173 |
+
return text
|
| 174 |
+
|
| 175 |
+
#================================== Web App Designing Begins ==================================#
|
| 176 |
+
|
| 177 |
+
#-------------------------------- Container 1 for Heading -------------------------------------#
|
| 178 |
+
container_1 = st.container()
|
| 179 |
+
with container_1:
|
| 180 |
+
empty1, head2, empty3 = st.columns(spec = [1.5,3,1.5], gap = 'medium')
|
| 181 |
+
with empty1:
|
| 182 |
+
st.empty()
|
| 183 |
+
with head2:
|
| 184 |
+
st.markdown("<h1><center>Welcome Aboard</center></h1>",unsafe_allow_html=True)
|
| 185 |
+
st.markdown("<h2><center>Tell us about your Experience βοΈ </center></h2>", unsafe_allow_html=True)
|
| 186 |
+
with empty3:
|
| 187 |
+
st.empty()
|
| 188 |
+
|
| 189 |
+
#-------------------------------- Container 2 for main_content --------------------------------#
|
| 190 |
+
container_2 = st.container()
|
| 191 |
+
with container_2:
|
| 192 |
+
col1, col2, col3, col4 = st.columns(spec = [1,3,3,1], gap = 'medium')
|
| 193 |
+
with col1:
|
| 194 |
+
st.empty()
|
| 195 |
+
|
| 196 |
+
with col2:
|
| 197 |
+
expander_1 = st.expander(label = "**Your Trip Info**", expanded = True)
|
| 198 |
+
with expander_1:
|
| 199 |
+
|
| 200 |
+
author = st.text_input(
|
| 201 |
+
label = "Please Enter your Name",
|
| 202 |
+
placeholder = "Enter your name"
|
| 203 |
+
)
|
| 204 |
+
|
| 205 |
+
airline = st.selectbox(
|
| 206 |
+
label = "Select your Airline",
|
| 207 |
+
options = tuple(sorted(df['airline'].unique())),
|
| 208 |
+
index = 0,
|
| 209 |
+
placeholder = "Choose an option..."
|
| 210 |
+
)
|
| 211 |
+
|
| 212 |
+
traveller_type = st.selectbox(
|
| 213 |
+
label = "Select your Trip type",
|
| 214 |
+
options = ("Business", "Solo Leisure", "Couple Leisure", "Family Leisure"),
|
| 215 |
+
index = 0,
|
| 216 |
+
placeholder = "Choose an option..."
|
| 217 |
+
)
|
| 218 |
+
|
| 219 |
+
cabin = st.selectbox(
|
| 220 |
+
label = "Select your Seat Class",
|
| 221 |
+
options = ("Economy Class", "Premium Economy", "Business Class", "First Class"),
|
| 222 |
+
index = 0,
|
| 223 |
+
placeholder = "Choose an option..."
|
| 224 |
+
)
|
| 225 |
+
|
| 226 |
+
type_of_flight = st.radio(
|
| 227 |
+
label = "Select your Flight Type",
|
| 228 |
+
options = ("Direct", 'Indirect'),
|
| 229 |
+
index = 0,
|
| 230 |
+
)
|
| 231 |
+
|
| 232 |
+
frequency = st.radio(
|
| 233 |
+
label = "How often do you fly?",
|
| 234 |
+
options = ('Often', 'Occasionally', 'Rarely'),
|
| 235 |
+
index = 1,
|
| 236 |
+
)
|
| 237 |
+
|
| 238 |
+
with col3:
|
| 239 |
+
expander_2 = st.expander(label = "Your Ratings", expanded = True)
|
| 240 |
+
with expander_2:
|
| 241 |
+
seat_comfort = st.slider(
|
| 242 |
+
label = "How comfortable are you with your seat?",
|
| 243 |
+
min_value = 1,
|
| 244 |
+
max_value = 5,
|
| 245 |
+
value = 3
|
| 246 |
+
)
|
| 247 |
+
|
| 248 |
+
cabin_service = st.slider(
|
| 249 |
+
label = "Please Rate your Cabin Service",
|
| 250 |
+
min_value = 1,
|
| 251 |
+
max_value = 5,
|
| 252 |
+
value = 3
|
| 253 |
+
)
|
| 254 |
+
|
| 255 |
+
food_bev = st.slider(
|
| 256 |
+
label = "Please rate the quality of food/beverages",
|
| 257 |
+
min_value = 1,
|
| 258 |
+
max_value = 5,
|
| 259 |
+
value = 3
|
| 260 |
+
)
|
| 261 |
+
|
| 262 |
+
entertainment = st.slider(
|
| 263 |
+
label = "Please rate the Entertainment Service",
|
| 264 |
+
min_value = 1,
|
| 265 |
+
max_value = 5,
|
| 266 |
+
value = 3
|
| 267 |
+
)
|
| 268 |
+
|
| 269 |
+
ground_service = st.slider(
|
| 270 |
+
label = "Please rate the Ground Service",
|
| 271 |
+
min_value = 1,
|
| 272 |
+
max_value = 5,
|
| 273 |
+
value = 3
|
| 274 |
+
)
|
| 275 |
+
|
| 276 |
+
value_for_money = st.slider(
|
| 277 |
+
label = "Value for Money Rating",
|
| 278 |
+
min_value = 1,
|
| 279 |
+
max_value = 5,
|
| 280 |
+
value = 3
|
| 281 |
+
)
|
| 282 |
+
|
| 283 |
+
with col4:
|
| 284 |
+
st.empty()
|
| 285 |
+
|
| 286 |
+
#-------------------------------- Container 3 for Final Rating Slider and Customer Review--------------------------------#
|
| 287 |
+
container_3 = st.container()
|
| 288 |
+
with container_3:
|
| 289 |
+
empty1, head2, empty3 = st.columns(spec = [1,3,1], gap = 'medium')
|
| 290 |
+
with empty1:
|
| 291 |
+
st.empty()
|
| 292 |
+
with head2:
|
| 293 |
+
overall = st.slider(
|
| 294 |
+
label = "How was your overall experience with the Airline?",
|
| 295 |
+
min_value = 1,
|
| 296 |
+
max_value = 10,
|
| 297 |
+
value = 7
|
| 298 |
+
)
|
| 299 |
+
|
| 300 |
+
review = st.text_area("Enter your review")
|
| 301 |
+
with empty3:
|
| 302 |
+
st.empty()
|
| 303 |
+
|
| 304 |
+
#-------------------------------- Creating DataFrame to pass into ML model --------------------------------#
|
| 305 |
+
temp_df = pd.DataFrame(
|
| 306 |
+
data = [[airline, traveller_type, cabin, type_of_flight, frequency,
|
| 307 |
+
seat_comfort, cabin_service, food_bev, entertainment,
|
| 308 |
+
ground_service, value_for_money, overall]],
|
| 309 |
+
columns = ['airline', 'traveller_type', 'cabin', 'type_of_flight', 'frequency',
|
| 310 |
+
'seat_comfort', 'cabin_service', 'food_bev', 'entertainment',
|
| 311 |
+
'ground_service', 'value_for_money', 'overall']
|
| 312 |
+
)
|
| 313 |
+
|
| 314 |
+
# Loading ML model using joblib file
|
| 315 |
+
model = joblib.load('ratings_model.joblib')
|
| 316 |
+
|
| 317 |
+
# Defining a function to store the nlp_model in streamlit cache memory
|
| 318 |
+
@st.cache_resource
|
| 319 |
+
def cache_model(model_name):
|
| 320 |
+
model = tf.keras.models.load_model(model_name)
|
| 321 |
+
return model
|
| 322 |
+
|
| 323 |
+
# Loading the nlp_model
|
| 324 |
+
nlp_model = cache_model("nlp_model")
|
| 325 |
+
|
| 326 |
+
#-------------------------------- Container 4 for Final Predictions --------------------------------#
|
| 327 |
+
|
| 328 |
+
container_4 = st.container()
|
| 329 |
+
with container_4:
|
| 330 |
+
empty1, head2, empty3 = st.columns(spec = [1,3,1], gap = 'medium')
|
| 331 |
+
with empty1:
|
| 332 |
+
st.empty()
|
| 333 |
+
|
| 334 |
+
with head2:
|
| 335 |
+
|
| 336 |
+
# Creating a toggle button to save form to database
|
| 337 |
+
save_to_db = st.toggle("Save to Database")
|
| 338 |
+
|
| 339 |
+
# Creating a button to get prediction
|
| 340 |
+
if st.button('Submit'):
|
| 341 |
+
y_pred = model.predict(temp_df)
|
| 342 |
+
y_pred_prob = model.predict_proba(temp_df)
|
| 343 |
+
|
| 344 |
+
clean_review = preprocess(review)
|
| 345 |
+
review_pred_proba = nlp_model.predict([clean_review])
|
| 346 |
+
review_pred = np.where(review_pred_proba > 0.5, 1, 0)[0][0]
|
| 347 |
+
|
| 348 |
+
review_date = datetime.datetime.now()
|
| 349 |
+
|
| 350 |
+
# if sum of probabilities of both ratings model and nlp_model >=1 then the author has recommended the airline
|
| 351 |
+
if y_pred_prob[:,1] + review_pred_proba >= 1:
|
| 352 |
+
recommended = "yes"
|
| 353 |
+
if y_pred_prob[:,1] + review_pred_proba < 1:
|
| 354 |
+
recommended = "no"
|
| 355 |
+
|
| 356 |
+
append_df = pd.DataFrame(data = [[review_date, author, airline, traveller_type, cabin, type_of_flight, frequency,
|
| 357 |
+
seat_comfort, cabin_service, food_bev, entertainment,
|
| 358 |
+
ground_service, value_for_money, overall, review, clean_review, recommended]],
|
| 359 |
+
columns = ['review_date', 'author', 'airline', 'traveller_type', 'cabin', 'type_of_flight', 'frequency',
|
| 360 |
+
'seat_comfort', 'cabin_service', 'food_bev', 'entertainment',
|
| 361 |
+
'ground_service', 'value_for_money', 'overall', 'customer_review', 'review_clean', 'recommended'])
|
| 362 |
+
|
| 363 |
+
# If save_to_db toggle is True, then append to database
|
| 364 |
+
if save_to_db:
|
| 365 |
+
# Creating a connection to the database
|
| 366 |
+
connection = sqlite3.connect('data.db')
|
| 367 |
+
# Appending append_df to airline_reviews table in data.db
|
| 368 |
+
append_df.to_sql(name='airline_reviews', con=connection, if_exists='append', index=False)
|
| 369 |
+
# Closing the connection to the database
|
| 370 |
+
connection.close()
|
| 371 |
+
|
| 372 |
+
if review=="":
|
| 373 |
+
st.error("Please write your Review")
|
| 374 |
+
st.stop()
|
| 375 |
+
|
| 376 |
+
if clean_review=="":
|
| 377 |
+
st.error("Please write a proper review")
|
| 378 |
+
st.stop()
|
| 379 |
+
|
| 380 |
+
if author=="":
|
| 381 |
+
st.error("Please Enter your Name")
|
| 382 |
+
st.stop()
|
| 383 |
+
|
| 384 |
+
# Show the entered data
|
| 385 |
+
st.dataframe(append_df)
|
| 386 |
+
|
| 387 |
+
if (y_pred[0] == 'yes') & (review_pred == 1):
|
| 388 |
+
st.success("Thank you for your positive feedback! \nWe're delighted to hear that you had a great experience with our service.")
|
| 389 |
+
st.balloons()
|
| 390 |
+
elif (y_pred[0] == 'yes') & (review_pred == 0):
|
| 391 |
+
st.warning("We appreciate your positive rating, but we're sorry to hear about your concerns in the review. \nPlease share more details so we can address them and enhance your experience.")
|
| 392 |
+
elif (y_pred[0] == 'no') & (review_pred == 0):
|
| 393 |
+
st.error("We apologize for falling short of your expectations. \nYour feedback is valuable, and we're committed to improving. \nPlease provide specific details about your experience for us to better understand and address the issues.")
|
| 394 |
+
elif (y_pred[0] == 'no') & (review_pred == 1):
|
| 395 |
+
st.error("We're sorry to hear about your negative rating, but we're glad to see your positive comments in the review. \nWe'd like to learn more about your concerns to ensure we address any issues and enhance your satisfaction.")
|
| 396 |
+
|
| 397 |
+
with empty3:
|
| 398 |
+
st.empty()
|
pages/3_π_Dashboard.py
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import sqlite3
|
| 4 |
+
import numpy as np
|
| 5 |
+
import matplotlib.pyplot as plt
|
| 6 |
+
from wordcloud import WordCloud
|
| 7 |
+
from PIL import Image
|
| 8 |
+
import base64
|
| 9 |
+
|
| 10 |
+
#-------------------------------- Setting Page Style -------------------------------------#
|
| 11 |
+
st.set_option('deprecation.showPyplotGlobalUse', False)
|
| 12 |
+
|
| 13 |
+
st.set_page_config(page_title="Dashboard", page_icon=":bar_chart:", layout="wide")
|
| 14 |
+
# st.subheader("Dashboard")
|
| 15 |
+
# st.markdown("##")
|
| 16 |
+
|
| 17 |
+
#-------------------------------- Background and custom CSS -------------------------------------#
|
| 18 |
+
#impliment background formating
|
| 19 |
+
def set_bg_hack(main_bg):
|
| 20 |
+
# set bg name
|
| 21 |
+
main_bg_ext = "jpg"
|
| 22 |
+
st.markdown(
|
| 23 |
+
f"""
|
| 24 |
+
<style>
|
| 25 |
+
.stApp {{
|
| 26 |
+
background: url(data:image/{main_bg_ext};base64,{base64.b64encode(open(main_bg, "rb").read()).decode()});
|
| 27 |
+
background-repeat: no-repeat;
|
| 28 |
+
background-position: right 50% bottom 95% ;
|
| 29 |
+
background-size: cover;
|
| 30 |
+
background-attachment: scroll;
|
| 31 |
+
}}
|
| 32 |
+
</style>
|
| 33 |
+
""",
|
| 34 |
+
unsafe_allow_html=True,
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
set_bg_hack("images/dashboard_bg.jpg")
|
| 38 |
+
|
| 39 |
+
# Setting custom css
|
| 40 |
+
css = f"""
|
| 41 |
+
<style>
|
| 42 |
+
|
| 43 |
+
[data-testid="stHeader"] {{
|
| 44 |
+
background: rgba(0,0,0,0);
|
| 45 |
+
}}
|
| 46 |
+
|
| 47 |
+
</style>
|
| 48 |
+
"""
|
| 49 |
+
st.markdown(css, unsafe_allow_html=True)
|
| 50 |
+
|
| 51 |
+
#-------------------------------- Connecting to database and loading data -------------------------------------#
|
| 52 |
+
connection = sqlite3.connect('data.db')
|
| 53 |
+
df = pd.read_sql(sql="SELECT * FROM airline_reviews", con=connection, parse_dates=['review_date'])
|
| 54 |
+
|
| 55 |
+
# Setting logo on sidebar
|
| 56 |
+
st.sidebar.image("images/logo.png", caption='Airline Reviews Dashboard')
|
| 57 |
+
|
| 58 |
+
#-------------------------------- Creating Filters --------------------------------#
|
| 59 |
+
st.sidebar.header("Filters")
|
| 60 |
+
|
| 61 |
+
airline = st.sidebar.selectbox(
|
| 62 |
+
label = "Select your Airline",
|
| 63 |
+
options=df["airline"].unique(),
|
| 64 |
+
index=6
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
traveller_type = st.sidebar.multiselect(
|
| 68 |
+
label = "Select Traveller Type",
|
| 69 |
+
options=df["traveller_type"].unique(),
|
| 70 |
+
default=df["traveller_type"].unique()
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
cabin = st.sidebar.multiselect(
|
| 74 |
+
label = "Select Cabin",
|
| 75 |
+
options=df["cabin"].unique(),
|
| 76 |
+
default=df["cabin"].unique()
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
type_of_flight = st.sidebar.multiselect(
|
| 80 |
+
label = "Select flight type",
|
| 81 |
+
options=df["type_of_flight"].unique(),
|
| 82 |
+
default=df["type_of_flight"].unique()
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
frequency = st.sidebar.multiselect(
|
| 86 |
+
label = "Select Frequency",
|
| 87 |
+
options=df["frequency"].unique(),
|
| 88 |
+
default=df["frequency"].unique()
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
df_selection = df.query(
|
| 92 |
+
"airline == @airline & traveller_type == @traveller_type & cabin == @cabin & type_of_flight == @type_of_flight & frequency == @frequency"
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
#-------------------------------- Defining Function to show KPIs --------------------------------#
|
| 96 |
+
def KPI():
|
| 97 |
+
with st.expander("View Data"):
|
| 98 |
+
showdata = st.multiselect(label='Filter:', options=df_selection.columns,
|
| 99 |
+
default=['review_date', 'author', 'airline', 'cabin', 'seat_comfort',
|
| 100 |
+
'cabin_service', 'food_bev', 'entertainment',
|
| 101 |
+
'ground_service', 'value_for_money',
|
| 102 |
+
'overall', 'review_clean', 'recommended'])
|
| 103 |
+
st.write(df_selection[showdata])
|
| 104 |
+
|
| 105 |
+
# Designing KPIs
|
| 106 |
+
total_reviews = len(df_selection)
|
| 107 |
+
total_positive_reviews = len(df_selection[df_selection["recommended"] == 'yes'])
|
| 108 |
+
total_negative_reviews = len(df_selection[df_selection["recommended"] == 'no'])
|
| 109 |
+
positive_percentage = float((total_positive_reviews / total_reviews) * 100)
|
| 110 |
+
negative_percentage = float((total_negative_reviews / total_reviews) * 100)
|
| 111 |
+
|
| 112 |
+
# Creating columns for all KPIs
|
| 113 |
+
col1, col2, col3, col4, col5 = st.columns(5, gap="medium")
|
| 114 |
+
|
| 115 |
+
# Assigning KPIs to columns one by one
|
| 116 |
+
with col1:
|
| 117 |
+
st.info('Total Reviews', icon='π')
|
| 118 |
+
st.metric(label = 'Total Reviews', value=total_reviews)
|
| 119 |
+
|
| 120 |
+
with col2:
|
| 121 |
+
st.info('Total Positive Reviews', icon='β
')
|
| 122 |
+
st.metric(label = 'Total Positive Reviews', value=total_positive_reviews)
|
| 123 |
+
|
| 124 |
+
with col3:
|
| 125 |
+
st.info('Total Negative Reviews', icon='β')
|
| 126 |
+
st.metric(label = 'Total Negative Reviews', value=total_negative_reviews)
|
| 127 |
+
|
| 128 |
+
with col4:
|
| 129 |
+
st.info('Positive Percentage', icon='π')
|
| 130 |
+
st.metric(label = 'Positive Percentage', value=f"{positive_percentage:,.2f}")
|
| 131 |
+
|
| 132 |
+
with col5:
|
| 133 |
+
st.info('Negative Percentage', icon='π')
|
| 134 |
+
st.metric(label = 'Negative Percentage', value=f"{negative_percentage:,.2f}")
|
| 135 |
+
|
| 136 |
+
# st.markdown("---")
|
| 137 |
+
|
| 138 |
+
#-------------------------------- Defining Function to Wordcloud --------------------------------#
|
| 139 |
+
def graphs():
|
| 140 |
+
|
| 141 |
+
positive_review_text = " ".join(review for review in df_selection[df_selection["recommended"] == 'yes']['review_clean'])
|
| 142 |
+
negative_review_text = " ".join(review for review in df_selection[df_selection["recommended"] == 'no']['review_clean'])
|
| 143 |
+
|
| 144 |
+
plot1, plot2 = st.columns(2)
|
| 145 |
+
|
| 146 |
+
with plot1:
|
| 147 |
+
air_mask = np.array(Image.open("images/air_white.jpeg"))
|
| 148 |
+
pos_wordcloud = WordCloud(max_words=50,
|
| 149 |
+
mask = air_mask,
|
| 150 |
+
colormap="summer",
|
| 151 |
+
min_word_length=3,
|
| 152 |
+
background_color="black").generate(positive_review_text)
|
| 153 |
+
plt.imshow(pos_wordcloud, interpolation="bilinear")
|
| 154 |
+
plt.gcf().set_facecolor("black")
|
| 155 |
+
plt.axis("off")
|
| 156 |
+
plt.title("Positive Reviews", fontsize=15, fontweight="bold", color="green")
|
| 157 |
+
st.pyplot()
|
| 158 |
+
|
| 159 |
+
with plot2:
|
| 160 |
+
air_mask_flip = np.array(Image.open("images/air_white_flip.jpg"))
|
| 161 |
+
neg_wordcloud = WordCloud(max_words=50,
|
| 162 |
+
mask = air_mask_flip,
|
| 163 |
+
colormap="autumn",
|
| 164 |
+
min_word_length=3,
|
| 165 |
+
background_color="black").generate(negative_review_text)
|
| 166 |
+
plt.imshow(neg_wordcloud, interpolation="bilinear")
|
| 167 |
+
plt.gcf().set_facecolor("black")
|
| 168 |
+
plt.axis("off")
|
| 169 |
+
plt.title("Negative Reviews", fontsize=15, fontweight="bold", color="red")
|
| 170 |
+
st.pyplot()
|
| 171 |
+
|
| 172 |
+
#-------------------------------- Defining Function to showcase Average Ratings --------------------------------#
|
| 173 |
+
def Ratings():
|
| 174 |
+
# Creating an expander showcasing average ratings for all amenities
|
| 175 |
+
with st.expander("View Average Ratings", expanded=True):
|
| 176 |
+
avg_seat_comfort = float(df_selection["seat_comfort"].mean())
|
| 177 |
+
avg_cabin_service = float(df_selection["cabin_service"].mean())
|
| 178 |
+
avg_food_bev = float(df_selection["food_bev"].mean())
|
| 179 |
+
avg_entertainment = float(df_selection["entertainment"].mean())
|
| 180 |
+
avg_ground_service = float(df_selection["ground_service"].mean())
|
| 181 |
+
avg_value_for_money = float(df_selection["value_for_money"].mean())
|
| 182 |
+
|
| 183 |
+
avg1, avg2, avg3, avg4, avg5, avg6 = st.columns(6, gap="medium")
|
| 184 |
+
|
| 185 |
+
with avg1:
|
| 186 |
+
st.info('Seat Comfort', icon='πΊ')
|
| 187 |
+
st.metric(label = 'Average', value=f"{avg_seat_comfort:,.1f}")
|
| 188 |
+
|
| 189 |
+
with avg2:
|
| 190 |
+
st.info('Cabin Service', icon='ποΈ')
|
| 191 |
+
st.metric(label = 'Average', value=f"{avg_cabin_service:,.1f}")
|
| 192 |
+
|
| 193 |
+
with avg3:
|
| 194 |
+
st.info('Food & Beverage', icon='π')
|
| 195 |
+
st.metric(label = 'Average', value=f"{avg_food_bev:,.1f}")
|
| 196 |
+
|
| 197 |
+
with avg4:
|
| 198 |
+
st.info('Entertainment', icon='π¬')
|
| 199 |
+
st.metric(label = 'Average', value=f"{avg_entertainment:,.1f}")
|
| 200 |
+
|
| 201 |
+
with avg5:
|
| 202 |
+
st.info('Ground Service', icon='π')
|
| 203 |
+
st.metric(label = 'Average', value=f"{avg_ground_service:,.1f}")
|
| 204 |
+
|
| 205 |
+
with avg6:
|
| 206 |
+
st.info('Value for Money', icon='π°')
|
| 207 |
+
st.metric(label = 'Average', value=f"{avg_value_for_money:,.1f}")
|
| 208 |
+
|
| 209 |
+
# st.markdown("---")
|
| 210 |
+
|
| 211 |
+
#--------------------------------------- Calling Functions ----------------------------------------#
|
| 212 |
+
KPI()
|
| 213 |
+
graphs()
|
| 214 |
+
Ratings()
|
ratings_model.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:67f9e41a2389e5d3550d8d551dd4c3568d91044964c92738c712325da7c2ec0b
|
| 3 |
+
size 18548591
|
raw_data.xlsx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8518b2905c0bb31fa4a15ee12505e6c4b2e375e4296a93b008044ecd81c047cf
|
| 3 |
+
size 24238491
|
requirements.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit==1.3.0
|
| 2 |
+
Pillow == 9.4.0
|
| 3 |
+
nltk == 3.8.1
|
| 4 |
+
numpy == 1.24.3
|
| 5 |
+
pandas == 2.0.3
|
| 6 |
+
tensorflow == 2.14.0
|
| 7 |
+
regex == 2022.7.9
|
| 8 |
+
joblib
|
| 9 |
+
spacy == 3.7.2
|
| 10 |
+
scikit-learn == 1.3.2
|
| 11 |
+
wordcloud == 1.9.3
|
| 12 |
+
https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl
|
stopwords/acronym.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"aka":"also known as","asap":"as soon as possible","brb":"be right back","btw":"by the way","dob":"date of birth","faq":"frequently asked questions","fyi":"for your information","idk":"i don't know","idc":"i don't care","iirc":"if i recall correctly","imo":"in my opinion","irl":"in real life","lmk":"let me know","lol":"laugh out loud","ngl":"not gonna lie","noyb":"none of your business","nvm":"never mind","ofc":"of course","omg":"oh my god","pfa":"please find attached","rofl":"rolling on the floor laughing","stfu":"shut the fuck up","tba":"to be announced","tbc":"to be continued","tbd":"to be determined","tbh":"to be honest","ttyl":"talk to you later","wtf":"what the fuck","wth":"what the heck"}
|
stopwords/contractions.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"'aight":"alright","ain't":"are not","amn't":"am not","arencha":"are not you","aren't":"are not","'bout":"about","can't":"cannot","cap'n":"captain","'cause":"because","'cept":"except","could've":"could have","couldn't":"could not","couldn't've":"could not have","dammit":"damn it","daren't":"dare not","daresn't":"dare not","dasn't":"dare not","didn't":"did not","didn":"did not","doesn't":"does not","doin'":"doing","don't":"do not","dunno":"do not know","d'ye":"do you","e'en":"even","e'er":"ever","'em":"them","everybody's":"everybody is","everyone's":"everyone is","fo'c'sle":"forecastle","finna":"fixing to","'gainst":"against","g'day":"good day","gimme":"give me","giv'n":"given","gonna":"going to","gon't":"go not","gotcha":"got you","gotta":"got to","gtg":"got to go","hadn't":"had not","had've":"had have","hasn't":"has not","haven't":"have not","he'd":"he had","he'll":"he shall","helluva":"hell of a","he's":"he is","here's":"here is","he've":"he have","how'd":"how would","howdy":"how do you do","how'll":"how will","how're":"how are","how's":"how is","i'd":"i had","i'd've":"i would have","i'll":"i shall","i'm":"i am","imma":"i am about to","i'm'a":"i am about to","i'm'o":"i am going to","innit":"is it not","ion":"i do not","i've":"i have","isn't":"is not","it'd":"it would","it'll":"it shall","it's":"it is","iunno":"i do not know","kinda":"kind of","let's":"let us","li'l":"little","ma'am":"madam","mayn't":"may not","may've":"may have","methinks":"me thinks","mightn't":"might not","might've":"might have","mustn't":"must not","mustn't've":"must not have","must've":"must have","'neath":"beneath","needn't":"need not","nal":"and all","ne'er":"never","o'clock":"of the clock","o'er":"over","ol'":"old","oughtn't":"ought not","'round":"around","'s":"is","shalln't":"shall not","shan't":"shall not","she'd":"she had","she'll":"she shall","she's":"she is","should've":"should have","shouldn't":"should not","shouldn't've":"should not have","somebody's":"somebody is","someone's":"someone is","something's":"something is","so're":"so are","so's":"so is","so've":"so have","that'll":"that shall","that're":"that are","that's":"that is","that'd":"that would","there'd":"there had","there'll":"there shall","there're":"there are","there's":"there is","these're":"these are","these've":"these have","they'd":"they had","they'll":"they shall","they're":"they are","they've":"they have","this's":"this is","those're":"those are","those've":"those have","'thout":"without","'til":"until","'tis":"it is","to've":"to have","'twas":"it was","'tween":"between","'twhere":"it were","wanna":"want to","wasn't":"was not","we'd":"we had","we'd've":"we would have","we'll":"we shall","we're":"we are","we've":"we have","weren't":"were not","whatcha":"what are you","what'd":"what did","what'll":"what shall","what're":"what are","what's":"what is","what've":"what have","when's":"when is","where'd":"where did","where'll":"where shall","where're":"where are","where's":"where is","where've":"where have","which'd":"which had","which'll":"which shall","which're":"which are","which's":"which is","which've":"which have","who'd":"who would","who'd've":"who would have","who'll":"who shall","who're":"who are","who's":"who is","who've":"who have","why'd":"why did","why're":"why are","why's":"why is","willn't":"will not","won't":"will not","wonnot":"will not","would've":"would have","wouldn't":"would not","wouldn't've":"would not have","y'all":"you all","y'all'd've":"you all would have","y'all'd'n't've":"you all would not have","y'all're":"you all are","y'all'ren't":"you all are not","y'at":"you at","yes'm":"yes madam","yessir":"yes sir","you'd":"you had","you'll":"you shall","you're":"you are","you've":"you have","aight":"alright","aint":"are not","amnt":"am not","arent":"are not","cant":"cannot","cause":"because","couldve":"could have","couldnt":"could not","couldntve":"could not have","darent":"dare not","daresnt":"dare not","dasnt":"dare not","didnt":"did not","doesnt":"does not","doin":"doing","dont":"do not","eer":"ever","everybodys":"everybody is","everyones":"everyone is","gday":"good day","givn":"given","gont":"go not","hadnt":"had not","hadve":"had have","hasnt":"has not","havent":"have not","hed":"he had","hell":"he shall","hes":"he is","heve":"he have","howd":"how did","howll":"how will","howre":"how are","hows":"how is","idve":"i would have","ill":"i shall","im":"i am","ima":"i am about to","imo":"i am going to","ive":"i have","isnt":"is not","itd":"it would","itll":"it shall","its":"it is","lets":"let us","lil":"little","maam":"madam","maynt":"may not","mayve":"may have","mightnt":"might not","mightve":"might have","mustnt":"must not","mustntve":"must not have","mustve":"must have","neednt":"need not","neer":"never","oclock":"of the clock","oer":"over","ol":"old","oughtnt":"ought not","shallnt":"shall not","shant":"shall not","shed":"she had","shell":"she shall","shes":"she is","shouldve":"should have","shouldnt":"should not","shouldntve":"should not have","somebodys":"somebody is","someones":"someone is","somethings":"something is","thatll":"that shall","thatre":"that are","thatd":"that would","thered":"there had","therell":"there shall","therere":"there are","theres":"there is","thesere":"these are","theseve":"these have","theyd":"they had","theyll":"they shall","theyre":"they are","theyve":"they have","thiss":"this is","thosere":"those are","thoseve":"those have","tis":"it is","tove":"to have","twas":"it was","wasnt":"was not","wed":"we had","wedve":"we would have","were":"we are","weve":"we have","werent":"were not","whatd":"what did","whatll":"what shall","whatre":"what are","whats":"what is","whatve":"what have","whens":"when is","whered":"where did","wherell":"where shall","wherere":"where are","wheres":"where is","whereve":"where have","whichd":"which had","whichll":"which shall","whichre":"which are","whichs":"which is","whichve":"which have","whod":"who would","whodve":"who would have","wholl":"who shall","whore":"who are","whos":"who is","whove":"who have","whyd":"why did","whyre":"why are","whys":"why is","wont":"will not","wouldve":"would have","wouldnt":"would not","wouldntve":"would not have","yall":"you all","yalldve":"you all would have","yallre":"you all are","youd":"you had","youll":"you shall","youre":"you are","youve":"you have","'re":"are","thats":"that is"}
|