prithush commited on
Commit
1cb54c7
Β·
verified Β·
1 Parent(s): 4aa113a

Upload 26 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Airline[[:space:]]Passenger[[:space:]]Feedback[[:space:]]Portal.pptx filter=lfs diff=lfs merge=lfs -text
37
+ data.db filter=lfs diff=lfs merge=lfs -text
38
+ images/form_bg.jpg filter=lfs diff=lfs merge=lfs -text
39
+ nlp_model/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
40
+ Notebooks/raw_data.xlsx filter=lfs diff=lfs merge=lfs -text
41
+ raw_data.xlsx filter=lfs diff=lfs merge=lfs -text
Airline Passenger Feedback Portal.pptx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a81066397780d7b50ce13a3c44ff1b6d1ed7ddc77d305c0b95ae5449b4594cc
3
+ size 6137901
Notebooks/Cleaning_&_Preprocessing_raw_data.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
Notebooks/Cleaning_reviews.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
Notebooks/Machine_Leaning_Model_using_Ratings.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
Notebooks/Ratings_Model_Pipeline.ipynb ADDED
@@ -0,0 +1,459 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "80096ce4",
6
+ "metadata": {},
7
+ "source": [
8
+ "# Importing Necessary Libraries"
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "code",
13
+ "execution_count": 1,
14
+ "id": "ae548824",
15
+ "metadata": {
16
+ "tags": []
17
+ },
18
+ "outputs": [],
19
+ "source": [
20
+ "import sqlite3\n",
21
+ "\n",
22
+ "import numpy as np\n",
23
+ "import pandas as pd\n",
24
+ "\n",
25
+ "from sklearn.model_selection import train_test_split\n",
26
+ "from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder\n",
27
+ "from sklearn.ensemble import RandomForestClassifier\n",
28
+ "\n",
29
+ "from sklearn.compose import ColumnTransformer\n",
30
+ "from sklearn.pipeline import Pipeline\n",
31
+ "from sklearn.metrics import classification_report\n",
32
+ "\n",
33
+ "import joblib"
34
+ ]
35
+ },
36
+ {
37
+ "cell_type": "markdown",
38
+ "id": "878bccd1",
39
+ "metadata": {},
40
+ "source": [
41
+ "# Reading the dataset"
42
+ ]
43
+ },
44
+ {
45
+ "cell_type": "code",
46
+ "execution_count": 2,
47
+ "id": "70f7f30a",
48
+ "metadata": {
49
+ "tags": []
50
+ },
51
+ "outputs": [
52
+ {
53
+ "ename": "DatabaseError",
54
+ "evalue": "Execution failed on sql 'SELECT * FROM airline_reviews': no such table: airline_reviews",
55
+ "output_type": "error",
56
+ "traceback": [
57
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
58
+ "\u001b[1;31mOperationalError\u001b[0m Traceback (most recent call last)",
59
+ "File \u001b[1;32mc:\\Users\\prith\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\pandas\\io\\sql.py:2018\u001b[0m, in \u001b[0;36mSQLiteDatabase.execute\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 2017\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m-> 2018\u001b[0m cur\u001b[38;5;241m.\u001b[39mexecute(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 2019\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cur\n",
60
+ "\u001b[1;31mOperationalError\u001b[0m: no such table: airline_reviews",
61
+ "\nThe above exception was the direct cause of the following exception:\n",
62
+ "\u001b[1;31mDatabaseError\u001b[0m Traceback (most recent call last)",
63
+ "Cell \u001b[1;32mIn[2], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m connection \u001b[38;5;241m=\u001b[39m sqlite3\u001b[38;5;241m.\u001b[39mconnect(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata.db\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m----> 2\u001b[0m df \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_sql\u001b[49m\u001b[43m(\u001b[49m\u001b[43msql\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\"\"\u001b[39;49m\u001b[38;5;124;43mSELECT * FROM airline_reviews\u001b[39;49m\u001b[38;5;124;43m\"\"\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcon\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconnection\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparse_dates\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mreview_date\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 3\u001b[0m df\u001b[38;5;241m.\u001b[39mdrop(columns \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mreview_date\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mauthor\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcustomer_review\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mreview_clean\u001b[39m\u001b[38;5;124m'\u001b[39m], inplace\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m 4\u001b[0m df\u001b[38;5;241m.\u001b[39mhead()\n",
64
+ "File \u001b[1;32mc:\\Users\\prith\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\pandas\\io\\sql.py:564\u001b[0m, in \u001b[0;36mread_sql\u001b[1;34m(sql, con, index_col, coerce_float, params, parse_dates, columns, chunksize)\u001b[0m\n\u001b[0;32m 561\u001b[0m pandas_sql \u001b[38;5;241m=\u001b[39m pandasSQL_builder(con)\n\u001b[0;32m 563\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(pandas_sql, SQLiteDatabase):\n\u001b[1;32m--> 564\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mpandas_sql\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_query\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 565\u001b[0m \u001b[43m \u001b[49m\u001b[43msql\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 566\u001b[0m \u001b[43m \u001b[49m\u001b[43mindex_col\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mindex_col\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 567\u001b[0m \u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 568\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoerce_float\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoerce_float\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 569\u001b[0m \u001b[43m \u001b[49m\u001b[43mparse_dates\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparse_dates\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 570\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunksize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunksize\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 571\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 573\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 574\u001b[0m _is_table_name \u001b[38;5;241m=\u001b[39m pandas_sql\u001b[38;5;241m.\u001b[39mhas_table(sql)\n",
65
+ "File \u001b[1;32mc:\\Users\\prith\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\pandas\\io\\sql.py:2078\u001b[0m, in \u001b[0;36mSQLiteDatabase.read_query\u001b[1;34m(self, sql, index_col, coerce_float, params, parse_dates, chunksize, dtype)\u001b[0m\n\u001b[0;32m 2066\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mread_query\u001b[39m(\n\u001b[0;32m 2067\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m 2068\u001b[0m sql,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 2074\u001b[0m dtype: DtypeArg \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[0;32m 2075\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m DataFrame \u001b[38;5;241m|\u001b[39m Iterator[DataFrame]:\n\u001b[0;32m 2077\u001b[0m args \u001b[38;5;241m=\u001b[39m _convert_params(sql, params)\n\u001b[1;32m-> 2078\u001b[0m cursor \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2079\u001b[0m columns \u001b[38;5;241m=\u001b[39m [col_desc[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;28;01mfor\u001b[39;00m col_desc \u001b[38;5;129;01min\u001b[39;00m cursor\u001b[38;5;241m.\u001b[39mdescription]\n\u001b[0;32m 2081\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
66
+ "File \u001b[1;32mc:\\Users\\prith\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\pandas\\io\\sql.py:2030\u001b[0m, in \u001b[0;36mSQLiteDatabase.execute\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 2027\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ex \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01minner_exc\u001b[39;00m\n\u001b[0;32m 2029\u001b[0m ex \u001b[38;5;241m=\u001b[39m DatabaseError(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExecution failed on sql \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00margs[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mexc\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m-> 2030\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ex \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mexc\u001b[39;00m\n",
67
+ "\u001b[1;31mDatabaseError\u001b[0m: Execution failed on sql 'SELECT * FROM airline_reviews': no such table: airline_reviews"
68
+ ]
69
+ }
70
+ ],
71
+ "source": [
72
+ "connection = sqlite3.connect('data.db')\n",
73
+ "df = pd.read_sql(sql=\"\"\"SELECT * FROM airline_reviews\"\"\", con=connection, parse_dates=['review_date'])\n",
74
+ "df.drop(columns = ['review_date', 'author', 'customer_review', 'review_clean'], inplace=True)\n",
75
+ "df.head()"
76
+ ]
77
+ },
78
+ {
79
+ "cell_type": "markdown",
80
+ "id": "26adeb4a",
81
+ "metadata": {},
82
+ "source": [
83
+ "# Spliting Data into training and test sets"
84
+ ]
85
+ },
86
+ {
87
+ "cell_type": "code",
88
+ "execution_count": 7,
89
+ "id": "429fdf53",
90
+ "metadata": {},
91
+ "outputs": [],
92
+ "source": [
93
+ "X = df.drop(columns = \"recommended\")\n",
94
+ "y = df[\"recommended\"]"
95
+ ]
96
+ },
97
+ {
98
+ "cell_type": "code",
99
+ "execution_count": 8,
100
+ "id": "214fbd25",
101
+ "metadata": {},
102
+ "outputs": [
103
+ {
104
+ "name": "stdout",
105
+ "output_type": "stream",
106
+ "text": [
107
+ "Shape of X_train: (18260, 12)\n",
108
+ "Shape of y_train: (18260,)\n",
109
+ "\n",
110
+ "Shape of X_test: (4566, 12)\n",
111
+ "Shape of y_test: (4566,)\n"
112
+ ]
113
+ }
114
+ ],
115
+ "source": [
116
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state = 42)\n",
117
+ "\n",
118
+ "# Printing shapes of train and test data\n",
119
+ "print(f'Shape of X_train: {X_train.shape}')\n",
120
+ "print(f'Shape of y_train: {y_train.shape}\\n')\n",
121
+ "print(f'Shape of X_test: {X_test.shape}')\n",
122
+ "print(f'Shape of y_test: {y_test.shape}')"
123
+ ]
124
+ },
125
+ {
126
+ "cell_type": "markdown",
127
+ "id": "2fac7fe1",
128
+ "metadata": {},
129
+ "source": [
130
+ "# Column Transformer\n",
131
+ "- Column Transformer shall be used to encode categorical columns\n",
132
+ "- Ordinal Encoder:\n",
133
+ " - `cabin`, `type_of_flight`, `frequency` columns\n",
134
+ "- One Hot Encoder:\n",
135
+ " - `airline` and `traveller_type` columns"
136
+ ]
137
+ },
138
+ {
139
+ "cell_type": "code",
140
+ "execution_count": 9,
141
+ "id": "c1c76301",
142
+ "metadata": {},
143
+ "outputs": [],
144
+ "source": [
145
+ "ct_encoding = ColumnTransformer(transformers = [\n",
146
+ " ('ohe_enc', OneHotEncoder(handle_unknown = \"ignore\", sparse_output = False), [0,1]),\n",
147
+ " (\"ord_enc\", OrdinalEncoder(categories = [[\"Economy Class\", \"Premium Economy\", \"Business Class\", \"First Class\"], [\"Direct\", \"Indirect\"], [\"Rarely\", \"Occasionally\", \"Often\"]], encoded_missing_value = 0), [2,3,4]),\n",
148
+ "], remainder = 'passthrough')"
149
+ ]
150
+ },
151
+ {
152
+ "cell_type": "code",
153
+ "execution_count": 10,
154
+ "id": "06fa281d",
155
+ "metadata": {},
156
+ "outputs": [
157
+ {
158
+ "data": {
159
+ "text/plain": [
160
+ "array([[ 0., 0., 0., ..., 4., 5., 8.],\n",
161
+ " [ 0., 0., 0., ..., 1., 1., 1.],\n",
162
+ " [ 0., 0., 0., ..., 1., 1., 1.],\n",
163
+ " ...,\n",
164
+ " [ 0., 0., 0., ..., 5., 5., 10.],\n",
165
+ " [ 0., 0., 0., ..., 5., 5., 8.],\n",
166
+ " [ 0., 0., 0., ..., 5., 3., 8.]])"
167
+ ]
168
+ },
169
+ "execution_count": 10,
170
+ "metadata": {},
171
+ "output_type": "execute_result"
172
+ }
173
+ ],
174
+ "source": [
175
+ "ct_encoding.fit_transform(X_train)"
176
+ ]
177
+ },
178
+ {
179
+ "cell_type": "code",
180
+ "execution_count": 11,
181
+ "id": "a6aa28ce",
182
+ "metadata": {},
183
+ "outputs": [
184
+ {
185
+ "data": {
186
+ "text/plain": [
187
+ "(18260, 80)"
188
+ ]
189
+ },
190
+ "execution_count": 11,
191
+ "metadata": {},
192
+ "output_type": "execute_result"
193
+ }
194
+ ],
195
+ "source": [
196
+ "ct_encoding.fit_transform(X_train).shape"
197
+ ]
198
+ },
199
+ {
200
+ "cell_type": "markdown",
201
+ "id": "83a0f447",
202
+ "metadata": {},
203
+ "source": [
204
+ "# Defining model\n",
205
+ "- We have already finalized our ML model to be applied by analyzing various models and performing extensive hyperparameter tuning.\n",
206
+ "- We will be using Random Forest Classifier as our ML model."
207
+ ]
208
+ },
209
+ {
210
+ "cell_type": "code",
211
+ "execution_count": 12,
212
+ "id": "46c88017",
213
+ "metadata": {},
214
+ "outputs": [],
215
+ "source": [
216
+ "model = RandomForestClassifier(n_estimators=359, \n",
217
+ " criterion='gini', \n",
218
+ " max_depth=16, \n",
219
+ " max_features='log2', \n",
220
+ " min_samples_split=25, \n",
221
+ " random_state=42)"
222
+ ]
223
+ },
224
+ {
225
+ "cell_type": "markdown",
226
+ "id": "c3dcdf2f",
227
+ "metadata": {},
228
+ "source": [
229
+ "# Creating Pipeline"
230
+ ]
231
+ },
232
+ {
233
+ "cell_type": "code",
234
+ "execution_count": 13,
235
+ "id": "605e7f32",
236
+ "metadata": {},
237
+ "outputs": [],
238
+ "source": [
239
+ "pipe = Pipeline(steps=[('encoding', ct_encoding), ('model_deploy', model)])"
240
+ ]
241
+ },
242
+ {
243
+ "cell_type": "code",
244
+ "execution_count": 14,
245
+ "id": "04e7ad13",
246
+ "metadata": {},
247
+ "outputs": [
248
+ {
249
+ "data": {
250
+ "text/html": [
251
+ "<style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"β–Έ\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"β–Ύ\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;encoding&#x27;,\n",
252
+ " ColumnTransformer(remainder=&#x27;passthrough&#x27;,\n",
253
+ " transformers=[(&#x27;ohe_enc&#x27;,\n",
254
+ " OneHotEncoder(handle_unknown=&#x27;ignore&#x27;,\n",
255
+ " sparse_output=False),\n",
256
+ " [0, 1]),\n",
257
+ " (&#x27;ord_enc&#x27;,\n",
258
+ " OrdinalEncoder(categories=[[&#x27;Economy &#x27;\n",
259
+ " &#x27;Class&#x27;,\n",
260
+ " &#x27;Premium &#x27;\n",
261
+ " &#x27;Economy&#x27;,\n",
262
+ " &#x27;Business &#x27;\n",
263
+ " &#x27;Class&#x27;,\n",
264
+ " &#x27;First &#x27;\n",
265
+ " &#x27;Class&#x27;],\n",
266
+ " [&#x27;Direct&#x27;,\n",
267
+ " &#x27;Indirect&#x27;],\n",
268
+ " [&#x27;Rarely&#x27;,\n",
269
+ " &#x27;Occasionally&#x27;,\n",
270
+ " &#x27;Often&#x27;]],\n",
271
+ " encoded_missing_value=0),\n",
272
+ " [2, 3, 4])])),\n",
273
+ " (&#x27;model_deploy&#x27;,\n",
274
+ " RandomForestClassifier(max_depth=16, max_features=&#x27;log2&#x27;,\n",
275
+ " min_samples_split=25, n_estimators=359,\n",
276
+ " random_state=42))])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" ><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[(&#x27;encoding&#x27;,\n",
277
+ " ColumnTransformer(remainder=&#x27;passthrough&#x27;,\n",
278
+ " transformers=[(&#x27;ohe_enc&#x27;,\n",
279
+ " OneHotEncoder(handle_unknown=&#x27;ignore&#x27;,\n",
280
+ " sparse_output=False),\n",
281
+ " [0, 1]),\n",
282
+ " (&#x27;ord_enc&#x27;,\n",
283
+ " OrdinalEncoder(categories=[[&#x27;Economy &#x27;\n",
284
+ " &#x27;Class&#x27;,\n",
285
+ " &#x27;Premium &#x27;\n",
286
+ " &#x27;Economy&#x27;,\n",
287
+ " &#x27;Business &#x27;\n",
288
+ " &#x27;Class&#x27;,\n",
289
+ " &#x27;First &#x27;\n",
290
+ " &#x27;Class&#x27;],\n",
291
+ " [&#x27;Direct&#x27;,\n",
292
+ " &#x27;Indirect&#x27;],\n",
293
+ " [&#x27;Rarely&#x27;,\n",
294
+ " &#x27;Occasionally&#x27;,\n",
295
+ " &#x27;Often&#x27;]],\n",
296
+ " encoded_missing_value=0),\n",
297
+ " [2, 3, 4])])),\n",
298
+ " (&#x27;model_deploy&#x27;,\n",
299
+ " RandomForestClassifier(max_depth=16, max_features=&#x27;log2&#x27;,\n",
300
+ " min_samples_split=25, n_estimators=359,\n",
301
+ " random_state=42))])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" ><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">encoding: ColumnTransformer</label><div class=\"sk-toggleable__content\"><pre>ColumnTransformer(remainder=&#x27;passthrough&#x27;,\n",
302
+ " transformers=[(&#x27;ohe_enc&#x27;,\n",
303
+ " OneHotEncoder(handle_unknown=&#x27;ignore&#x27;,\n",
304
+ " sparse_output=False),\n",
305
+ " [0, 1]),\n",
306
+ " (&#x27;ord_enc&#x27;,\n",
307
+ " OrdinalEncoder(categories=[[&#x27;Economy Class&#x27;,\n",
308
+ " &#x27;Premium Economy&#x27;,\n",
309
+ " &#x27;Business Class&#x27;,\n",
310
+ " &#x27;First Class&#x27;],\n",
311
+ " [&#x27;Direct&#x27;,\n",
312
+ " &#x27;Indirect&#x27;],\n",
313
+ " [&#x27;Rarely&#x27;,\n",
314
+ " &#x27;Occasionally&#x27;,\n",
315
+ " &#x27;Often&#x27;]],\n",
316
+ " encoded_missing_value=0),\n",
317
+ " [2, 3, 4])])</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">ohe_enc</label><div class=\"sk-toggleable__content\"><pre>[0, 1]</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" ><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OneHotEncoder</label><div class=\"sk-toggleable__content\"><pre>OneHotEncoder(handle_unknown=&#x27;ignore&#x27;, sparse_output=False)</pre></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" ><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">ord_enc</label><div class=\"sk-toggleable__content\"><pre>[2, 3, 4]</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-6\" type=\"checkbox\" ><label for=\"sk-estimator-id-6\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OrdinalEncoder</label><div class=\"sk-toggleable__content\"><pre>OrdinalEncoder(categories=[[&#x27;Economy Class&#x27;, &#x27;Premium Economy&#x27;,\n",
318
+ " &#x27;Business Class&#x27;, &#x27;First Class&#x27;],\n",
319
+ " [&#x27;Direct&#x27;, &#x27;Indirect&#x27;],\n",
320
+ " [&#x27;Rarely&#x27;, &#x27;Occasionally&#x27;, &#x27;Often&#x27;]],\n",
321
+ " encoded_missing_value=0)</pre></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-7\" type=\"checkbox\" ><label for=\"sk-estimator-id-7\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">remainder</label><div class=\"sk-toggleable__content\"><pre>[&#x27;seat_comfort&#x27;, &#x27;cabin_service&#x27;, &#x27;food_bev&#x27;, &#x27;entertainment&#x27;, &#x27;ground_service&#x27;, &#x27;value_for_money&#x27;, &#x27;overall&#x27;]</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-8\" type=\"checkbox\" ><label for=\"sk-estimator-id-8\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">passthrough</label><div class=\"sk-toggleable__content\"><pre>passthrough</pre></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-9\" type=\"checkbox\" ><label for=\"sk-estimator-id-9\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RandomForestClassifier</label><div class=\"sk-toggleable__content\"><pre>RandomForestClassifier(max_depth=16, max_features=&#x27;log2&#x27;, min_samples_split=25,\n",
322
+ " n_estimators=359, random_state=42)</pre></div></div></div></div></div></div></div>"
323
+ ],
324
+ "text/plain": [
325
+ "Pipeline(steps=[('encoding',\n",
326
+ " ColumnTransformer(remainder='passthrough',\n",
327
+ " transformers=[('ohe_enc',\n",
328
+ " OneHotEncoder(handle_unknown='ignore',\n",
329
+ " sparse_output=False),\n",
330
+ " [0, 1]),\n",
331
+ " ('ord_enc',\n",
332
+ " OrdinalEncoder(categories=[['Economy '\n",
333
+ " 'Class',\n",
334
+ " 'Premium '\n",
335
+ " 'Economy',\n",
336
+ " 'Business '\n",
337
+ " 'Class',\n",
338
+ " 'First '\n",
339
+ " 'Class'],\n",
340
+ " ['Direct',\n",
341
+ " 'Indirect'],\n",
342
+ " ['Rarely',\n",
343
+ " 'Occasionally',\n",
344
+ " 'Often']],\n",
345
+ " encoded_missing_value=0),\n",
346
+ " [2, 3, 4])])),\n",
347
+ " ('model_deploy',\n",
348
+ " RandomForestClassifier(max_depth=16, max_features='log2',\n",
349
+ " min_samples_split=25, n_estimators=359,\n",
350
+ " random_state=42))])"
351
+ ]
352
+ },
353
+ "execution_count": 14,
354
+ "metadata": {},
355
+ "output_type": "execute_result"
356
+ }
357
+ ],
358
+ "source": [
359
+ "pipe.fit(X_train, y_train)"
360
+ ]
361
+ },
362
+ {
363
+ "cell_type": "markdown",
364
+ "id": "641083dd",
365
+ "metadata": {},
366
+ "source": [
367
+ "# Exporting pipeline model as a joblib file"
368
+ ]
369
+ },
370
+ {
371
+ "cell_type": "code",
372
+ "execution_count": 16,
373
+ "id": "db7f494a",
374
+ "metadata": {},
375
+ "outputs": [
376
+ {
377
+ "data": {
378
+ "text/plain": [
379
+ "['ratings_model.joblib']"
380
+ ]
381
+ },
382
+ "execution_count": 16,
383
+ "metadata": {},
384
+ "output_type": "execute_result"
385
+ }
386
+ ],
387
+ "source": [
388
+ "# joblib.dump(pipe, 'ratings_model.joblib')"
389
+ ]
390
+ },
391
+ {
392
+ "cell_type": "markdown",
393
+ "id": "3ebceee1",
394
+ "metadata": {},
395
+ "source": [
396
+ "# Loading and taking predictions using exported model"
397
+ ]
398
+ },
399
+ {
400
+ "cell_type": "code",
401
+ "execution_count": 19,
402
+ "id": "d4219971",
403
+ "metadata": {},
404
+ "outputs": [
405
+ {
406
+ "name": "stdout",
407
+ "output_type": "stream",
408
+ "text": [
409
+ " precision recall f1-score support\n",
410
+ "\n",
411
+ " no 0.96 0.96 0.96 2333\n",
412
+ " yes 0.96 0.95 0.96 2233\n",
413
+ "\n",
414
+ " accuracy 0.96 4566\n",
415
+ " macro avg 0.96 0.96 0.96 4566\n",
416
+ "weighted avg 0.96 0.96 0.96 4566\n",
417
+ "\n"
418
+ ]
419
+ }
420
+ ],
421
+ "source": [
422
+ "load_model = joblib.load('ratings_model.joblib')\n",
423
+ "y_pred = load_model.predict(X_test)\n",
424
+ "print(classification_report(y_test, y_pred))"
425
+ ]
426
+ },
427
+ {
428
+ "cell_type": "markdown",
429
+ "id": "18279bbf",
430
+ "metadata": {},
431
+ "source": [
432
+ "### FINAL NOTES\n",
433
+ "- The model has been exported to a joblib file which can be used to deploy the model to a production environment.\n",
434
+ "- We have cross-checked the exported model and it is giving 96% accuracy on the test data."
435
+ ]
436
+ }
437
+ ],
438
+ "metadata": {
439
+ "kernelspec": {
440
+ "display_name": "Python 3 (ipykernel)",
441
+ "language": "python",
442
+ "name": "python3"
443
+ },
444
+ "language_info": {
445
+ "codemirror_mode": {
446
+ "name": "ipython",
447
+ "version": 3
448
+ },
449
+ "file_extension": ".py",
450
+ "mimetype": "text/x-python",
451
+ "name": "python",
452
+ "nbconvert_exporter": "python",
453
+ "pygments_lexer": "ipython3",
454
+ "version": "3.10.10"
455
+ }
456
+ },
457
+ "nbformat": 4,
458
+ "nbformat_minor": 5
459
+ }
Notebooks/raw_data.xlsx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8518b2905c0bb31fa4a15ee12505e6c4b2e375e4296a93b008044ecd81c047cf
3
+ size 24238491
app.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import base64
3
+
4
+ # Setting page config
5
+ st.set_page_config(page_title="Home", page_icon=":house:", layout="wide")
6
+
7
+ #-------------------------------- Background and custom CSS -------------------------------------#
8
+ #impliment background formating
9
+ def set_bg_hack(main_bg):
10
+ # set bg name
11
+ main_bg_ext = "jpg"
12
+ st.markdown(
13
+ f"""
14
+ <style>
15
+ .stApp {{
16
+ background: url(data:image/{main_bg_ext};base64,{base64.b64encode(open(main_bg, "rb").read()).decode()});
17
+ background-repeat: no-repeat;
18
+ background-position: right 50% bottom 95% ;
19
+ background-size: cover;
20
+ background-attachment: scroll;
21
+ }}
22
+ </style>
23
+ """,
24
+ unsafe_allow_html=True,
25
+ )
26
+
27
+ set_bg_hack("images/dark_bg_home.jpg")
28
+
29
+ # Setting custom css
30
+ css = f"""
31
+ <style>
32
+
33
+ [data-testid="stHeader"] {{
34
+ background: rgba(0,0,0,0);
35
+ }}
36
+
37
+ </style>
38
+ """
39
+ st.markdown(css, unsafe_allow_html=True)
40
+
41
+ #-------------------------------- Sidebar Modification -------------------------------------#
42
+ # Setting logo on sidebar
43
+ st.sidebar.image("images/logo.png", caption="About this app")
44
+ st.sidebar.markdown("##")
45
+ st.sidebar.markdown("##")
46
+ st.sidebar.markdown("##")
47
+ st.sidebar.markdown("[![GitHub](https://img.shields.io/badge/GitHub-Profile-blue?style=for-the-badge&logo=github)](https://github.com/prithush92)")
48
+ st.sidebar.markdown("[![LinkedIn](https://img.shields.io/badge/LinkedIn-Profile-blue?style=for-the-badge&logo=linkedin)](https://www.linkedin.com/in/prithush92/)")
49
+
50
+ # Page title
51
+ st.title("Airline Passenger Feedback Portal ✈️")
52
+
53
+ # Description
54
+ st.markdown("""
55
+ Welcome to the Airline Passenger Feedback Portal! This app analyzes passenger ratings and reviews and predicts how likely is a passenger to recommend the airline. \n
56
+ """)
57
+
58
+ # Features
59
+ st.header("🚩 Features", divider="red")
60
+ st.markdown("""
61
+ - **Analyze Passenger Ratings**: A Machine Learning model is deployed to predict customer recommendation status based on Ratings.
62
+ - **Review Sentiment Analysis**: Customer Reviews are deeply analyzed and sentiment analysis is performed to determine the sentiment of the review.
63
+ - **Overall Recommendation Prediction**: Finally using both the Ratings and Review Sentiment, Overall Recommendation Status of the user is predicted.
64
+ - **Airline Reviews Dashboard**: For the convenience of Airline Companies, a dashboard is designed to easily visualize Passenger Ratings
65
+ and Important Keywords in both Positive and Negative Reviews.
66
+ """)
67
+
68
+ # Technologies Used
69
+ st.header("🌐 Technologies Used", divider="blue")
70
+ st.markdown("""
71
+ - **SQLite**: Database management system for storing and retrieving passenger feedback data.
72
+ - **Pandas**: Data manipulation and analysis library for handling datasets.
73
+ - **NumPy**: Numerical computing library for performing mathematical operations.
74
+ - **Matplotlib**: Visualization library for creating insightful plots and charts.
75
+ - **WordCloud**: Visualization tool for generating word clouds from textual data.
76
+ - **TensorFlow**: Deep learning framework for building and training machine learning models.
77
+ """)
78
+
79
+ # About the Developer
80
+ st.header("πŸ‘¨πŸ»β€πŸ’» About the Developer", divider="green")
81
+ st.write("""
82
+ This app is developed by **Prithu Sharma**.
83
+ """)
84
+ st.markdown(
85
+ """
86
+ [![GitHub](https://img.shields.io/badge/GitHub-Profile-blue?style=for-the-badge&logo=github)](https://github.com/prithush92)
87
+ [![LinkedIn](https://img.shields.io/badge/LinkedIn-Profile-blue?style=for-the-badge&logo=linkedin)](https://www.linkedin.com/in/prithush92/)
88
+ """
89
+ )
90
+
91
+ # Footer
92
+ st.markdown("---")
93
+ st.write("Explore the app and make informed decisions based on passenger feedback!")
data.db ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e54875f79f986d64329099ed965236b0cf77ceee9a4139c70c64339aaa7fd2e
3
+ size 40427520
images/air_white.jpeg ADDED
images/air_white_flip.jpg ADDED
images/dark_bg_home.jpg ADDED
images/dashboard_bg.jpg ADDED
images/form_bg.jpg ADDED

Git LFS Details

  • SHA256: 90bf29c094e9ba1032ffde4e7b433839abc2a0f4858972038a445eda1d235609
  • Pointer size: 132 Bytes
  • Size of remote file: 1.44 MB
images/logo.png ADDED
nlp_model/fingerprint.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a50311c25f3ca296cb576f49c98420f5d1d944ec0729fd1365a70bb78fc6929
3
+ size 54
nlp_model/keras_metadata.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2161136a12f9a758f2283b7a7dcdea7a6304ee5b165b79218601b270e2d9d25
3
+ size 32086
nlp_model/saved_model.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37a501dc32d63688682a828760f2d109dec97473e0e39e761d83bc251c5e891f
3
+ size 11251034
nlp_model/variables/variables.data-00000-of-00001 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:719f227365b36776469b240ebe2e5a5a125b04a03a7af4691646a6e76eb05a41
3
+ size 1029836856
nlp_model/variables/variables.index ADDED
Binary file (15.8 kB). View file
 
pages/2_✈️_Feedback.py ADDED
@@ -0,0 +1,398 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Importing Necessary Libraries
2
+ import streamlit as st
3
+ import sqlite3
4
+ import numpy as np
5
+ import pandas as pd
6
+ import joblib
7
+ import tensorflow as tf
8
+ import re
9
+ import string
10
+ from nltk.tokenize import RegexpTokenizer
11
+ import spacy
12
+ import datetime
13
+ import base64
14
+
15
+ #-------------------------------- Setting Page Style -------------------------------------#
16
+ st.set_page_config(layout="wide", page_title="Customer Feedback", page_icon="✈️")
17
+
18
+ #impliment background formating
19
+ def set_bg_hack(main_bg):
20
+ # set bg name
21
+ main_bg_ext = "jpg"
22
+ st.markdown(
23
+ f"""
24
+ <style>
25
+ .stApp {{
26
+ background: url(data:image/{main_bg_ext};base64,{base64.b64encode(open(main_bg, "rb").read()).decode()});
27
+ background-repeat: no-repeat;
28
+ background-position: right 50% bottom 95% ;
29
+ background-size: cover;
30
+ background-attachment: scroll;
31
+ }}
32
+ </style>
33
+ """,
34
+ unsafe_allow_html=True,
35
+ )
36
+
37
+ set_bg_hack("images/form_bg.jpg")
38
+
39
+ css = f"""
40
+ <style>
41
+
42
+ [data-testid="stHeader"] {{
43
+ background: rgba(0,0,0,0);
44
+ }}
45
+
46
+ [data-testid="stExpander"] {{
47
+ background: rgba(0,0,0,0.4);
48
+ border: 2px solid #000071;
49
+ border-radius: 10px;
50
+ }}
51
+
52
+ </style>
53
+ """
54
+ st.markdown(css, unsafe_allow_html=True)
55
+
56
+ # Setting logo on sidebar
57
+ st.sidebar.image("images/logo.png", caption="Passenger Feedback Form")
58
+
59
+ #-------------------------------- Connecting to database and loading data -------------------------------------#
60
+ connection = sqlite3.connect('data.db')
61
+ df = pd.read_sql(sql="SELECT * FROM airline_reviews", con=connection, parse_dates=['review_date'])
62
+ connection.close()
63
+
64
+ #-------------------------------- Function to clean reviews -------------------------------------#
65
+
66
+ # Defining acronyms and contractions
67
+ acronyms_dict = pd.read_json("stopwords/acronym.json", typ="series")
68
+ contractions_dict = pd.read_json("stopwords/contractions.json", typ="series")
69
+
70
+ # Defining stopwords
71
+ alphabets = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"]
72
+ others = ["Γ£", "Γ₯", "Γ¬", "Γ»", "Γ»Βͺm", "ûó", "ûò", "ìñ", "Γ»Βͺre", "Γ»Βͺve", "Γ»Βͺ", "Γ»Βͺs", "ûówe", "Γ―", "ûï", "Ò€ℒ"]
73
+ common_words = ["flight", "fly", "airline", "via"]
74
+ airline_names = [airline.split()[0].lower() for airline in df['airline'].unique()]
75
+ stops = alphabets + others + common_words + airline_names
76
+ stops = list(set(stops))
77
+
78
+ # Defining Tokenizer
79
+ regexp = RegexpTokenizer("[\w']+")
80
+ nlp = spacy.load("en_core_web_sm")
81
+
82
+ # Defining function to clean reviews
83
+ def preprocess(text):
84
+
85
+ # lowercase
86
+ text = text.lower()
87
+
88
+ # remove whitespaces
89
+ text = text.strip()
90
+
91
+ # removing html tags
92
+ html = re.compile(r'<.*?>')
93
+ text = html.sub(r'', text)
94
+
95
+ # removing emoji patterns
96
+ emoji_pattern = re.compile("["
97
+ u"\U0001F600-\U0001F64F" # emoticons
98
+ u"\U0001F300-\U0001F5FF" # symbols & pictographs
99
+ u"\U0001F680-\U0001F6FF" # transport & map symbols
100
+ u"\U0001F1E0-\U0001F1FF" # flags (iOS)
101
+ u"\U00002702-\U000027B0"
102
+ u"\U000024C2-\U0001F251"
103
+ "]+", flags = re.UNICODE)
104
+ text = emoji_pattern.sub(r'', text)
105
+
106
+ # removing urls
107
+ http = "https?://\S+|www\.\S+"
108
+ pattern = r"({})".format(http)
109
+ text = re.sub(pattern, "", text)
110
+
111
+ # removing twitter usernames if they exist
112
+ pattern = r'@[\w_]+'
113
+ text = re.sub(pattern, "", text)
114
+
115
+ # Removing punctuations and numbers except ' and -
116
+ punct_str = string.punctuation + string.digits
117
+ punct_str = punct_str.replace("'", "")
118
+ punct_str = punct_str.replace("-", "")
119
+ text = text.translate(str.maketrans('', '', punct_str))
120
+
121
+ # Replacing "-" in text with empty space
122
+ text = text.replace("-", " ")
123
+
124
+ # Substituting acronyms
125
+ words = []
126
+ for word in regexp.tokenize(text):
127
+ if word in acronyms_dict.index:
128
+ words = words + acronyms_dict[word].split()
129
+ else:
130
+ words = words + word.split()
131
+ text = ' '.join(words) # acronyms
132
+
133
+ # Substituting Contractions
134
+ words = []
135
+ for word in regexp.tokenize(text):
136
+ if word in contractions_dict.index:
137
+ words = words + contractions_dict[word].split()
138
+ else:
139
+ words = words + word.split()
140
+ text = " ".join(words)
141
+
142
+ # Removing punctuations again
143
+ punct_str = string.punctuation
144
+ text = text.translate(str.maketrans('', '', punct_str))
145
+
146
+ # Lemmatization using spacy
147
+ text = " ".join([token.lemma_ for token in nlp(text)])
148
+
149
+ # Stopwords removal
150
+ text = ' '.join([word for word in regexp.tokenize(text) if word not in stops])
151
+
152
+ # Removing words with one alphabet occuring more than 3 times continuously
153
+ pattern = r'\b\w*?(.)\1{2,}\w*\b'
154
+ text = re.sub(pattern, "", text).strip()
155
+
156
+ # Removing words with less than 3 characters
157
+ pattern = r'\b\w{1,2}\b'
158
+ text = re.sub(pattern, "", text).strip()
159
+
160
+ # Removing all characters except alphabets and " " (space)
161
+ filter_text = string.ascii_letters + " "
162
+ text = "".join([chr for chr in text if chr in filter_text])
163
+
164
+ # Replacing multiple spaces with one single space
165
+ pattern = r'\s+'
166
+ text = re.sub(pattern, " ", text).strip()
167
+
168
+ # Removing certain POS tags from the text
169
+ remove_tags = ['PROPN', 'DET', 'CCONJ', 'PRON', 'AUX']
170
+ text = " ".join([token.text for token in nlp(text) if token.pos_ not in remove_tags])
171
+
172
+ # return final output
173
+ return text
174
+
175
+ #================================== Web App Designing Begins ==================================#
176
+
177
+ #-------------------------------- Container 1 for Heading -------------------------------------#
178
+ container_1 = st.container()
179
+ with container_1:
180
+ empty1, head2, empty3 = st.columns(spec = [1.5,3,1.5], gap = 'medium')
181
+ with empty1:
182
+ st.empty()
183
+ with head2:
184
+ st.markdown("<h1><center>Welcome Aboard</center></h1>",unsafe_allow_html=True)
185
+ st.markdown("<h2><center>Tell us about your Experience ✈️ </center></h2>", unsafe_allow_html=True)
186
+ with empty3:
187
+ st.empty()
188
+
189
+ #-------------------------------- Container 2 for main_content --------------------------------#
190
+ container_2 = st.container()
191
+ with container_2:
192
+ col1, col2, col3, col4 = st.columns(spec = [1,3,3,1], gap = 'medium')
193
+ with col1:
194
+ st.empty()
195
+
196
+ with col2:
197
+ expander_1 = st.expander(label = "**Your Trip Info**", expanded = True)
198
+ with expander_1:
199
+
200
+ author = st.text_input(
201
+ label = "Please Enter your Name",
202
+ placeholder = "Enter your name"
203
+ )
204
+
205
+ airline = st.selectbox(
206
+ label = "Select your Airline",
207
+ options = tuple(sorted(df['airline'].unique())),
208
+ index = 0,
209
+ placeholder = "Choose an option..."
210
+ )
211
+
212
+ traveller_type = st.selectbox(
213
+ label = "Select your Trip type",
214
+ options = ("Business", "Solo Leisure", "Couple Leisure", "Family Leisure"),
215
+ index = 0,
216
+ placeholder = "Choose an option..."
217
+ )
218
+
219
+ cabin = st.selectbox(
220
+ label = "Select your Seat Class",
221
+ options = ("Economy Class", "Premium Economy", "Business Class", "First Class"),
222
+ index = 0,
223
+ placeholder = "Choose an option..."
224
+ )
225
+
226
+ type_of_flight = st.radio(
227
+ label = "Select your Flight Type",
228
+ options = ("Direct", 'Indirect'),
229
+ index = 0,
230
+ )
231
+
232
+ frequency = st.radio(
233
+ label = "How often do you fly?",
234
+ options = ('Often', 'Occasionally', 'Rarely'),
235
+ index = 1,
236
+ )
237
+
238
+ with col3:
239
+ expander_2 = st.expander(label = "Your Ratings", expanded = True)
240
+ with expander_2:
241
+ seat_comfort = st.slider(
242
+ label = "How comfortable are you with your seat?",
243
+ min_value = 1,
244
+ max_value = 5,
245
+ value = 3
246
+ )
247
+
248
+ cabin_service = st.slider(
249
+ label = "Please Rate your Cabin Service",
250
+ min_value = 1,
251
+ max_value = 5,
252
+ value = 3
253
+ )
254
+
255
+ food_bev = st.slider(
256
+ label = "Please rate the quality of food/beverages",
257
+ min_value = 1,
258
+ max_value = 5,
259
+ value = 3
260
+ )
261
+
262
+ entertainment = st.slider(
263
+ label = "Please rate the Entertainment Service",
264
+ min_value = 1,
265
+ max_value = 5,
266
+ value = 3
267
+ )
268
+
269
+ ground_service = st.slider(
270
+ label = "Please rate the Ground Service",
271
+ min_value = 1,
272
+ max_value = 5,
273
+ value = 3
274
+ )
275
+
276
+ value_for_money = st.slider(
277
+ label = "Value for Money Rating",
278
+ min_value = 1,
279
+ max_value = 5,
280
+ value = 3
281
+ )
282
+
283
+ with col4:
284
+ st.empty()
285
+
286
+ #-------------------------------- Container 3 for Final Rating Slider and Customer Review--------------------------------#
287
+ container_3 = st.container()
288
+ with container_3:
289
+ empty1, head2, empty3 = st.columns(spec = [1,3,1], gap = 'medium')
290
+ with empty1:
291
+ st.empty()
292
+ with head2:
293
+ overall = st.slider(
294
+ label = "How was your overall experience with the Airline?",
295
+ min_value = 1,
296
+ max_value = 10,
297
+ value = 7
298
+ )
299
+
300
+ review = st.text_area("Enter your review")
301
+ with empty3:
302
+ st.empty()
303
+
304
+ #-------------------------------- Creating DataFrame to pass into ML model --------------------------------#
305
+ temp_df = pd.DataFrame(
306
+ data = [[airline, traveller_type, cabin, type_of_flight, frequency,
307
+ seat_comfort, cabin_service, food_bev, entertainment,
308
+ ground_service, value_for_money, overall]],
309
+ columns = ['airline', 'traveller_type', 'cabin', 'type_of_flight', 'frequency',
310
+ 'seat_comfort', 'cabin_service', 'food_bev', 'entertainment',
311
+ 'ground_service', 'value_for_money', 'overall']
312
+ )
313
+
314
+ # Loading ML model using joblib file
315
+ model = joblib.load('ratings_model.joblib')
316
+
317
+ # Defining a function to store the nlp_model in streamlit cache memory
318
+ @st.cache_resource
319
+ def cache_model(model_name):
320
+ model = tf.keras.models.load_model(model_name)
321
+ return model
322
+
323
+ # Loading the nlp_model
324
+ nlp_model = cache_model("nlp_model")
325
+
326
+ #-------------------------------- Container 4 for Final Predictions --------------------------------#
327
+
328
+ container_4 = st.container()
329
+ with container_4:
330
+ empty1, head2, empty3 = st.columns(spec = [1,3,1], gap = 'medium')
331
+ with empty1:
332
+ st.empty()
333
+
334
+ with head2:
335
+
336
+ # Creating a toggle button to save form to database
337
+ save_to_db = st.toggle("Save to Database")
338
+
339
+ # Creating a button to get prediction
340
+ if st.button('Submit'):
341
+ y_pred = model.predict(temp_df)
342
+ y_pred_prob = model.predict_proba(temp_df)
343
+
344
+ clean_review = preprocess(review)
345
+ review_pred_proba = nlp_model.predict([clean_review])
346
+ review_pred = np.where(review_pred_proba > 0.5, 1, 0)[0][0]
347
+
348
+ review_date = datetime.datetime.now()
349
+
350
+ # if sum of probabilities of both ratings model and nlp_model >=1 then the author has recommended the airline
351
+ if y_pred_prob[:,1] + review_pred_proba >= 1:
352
+ recommended = "yes"
353
+ if y_pred_prob[:,1] + review_pred_proba < 1:
354
+ recommended = "no"
355
+
356
+ append_df = pd.DataFrame(data = [[review_date, author, airline, traveller_type, cabin, type_of_flight, frequency,
357
+ seat_comfort, cabin_service, food_bev, entertainment,
358
+ ground_service, value_for_money, overall, review, clean_review, recommended]],
359
+ columns = ['review_date', 'author', 'airline', 'traveller_type', 'cabin', 'type_of_flight', 'frequency',
360
+ 'seat_comfort', 'cabin_service', 'food_bev', 'entertainment',
361
+ 'ground_service', 'value_for_money', 'overall', 'customer_review', 'review_clean', 'recommended'])
362
+
363
+ # If save_to_db toggle is True, then append to database
364
+ if save_to_db:
365
+ # Creating a connection to the database
366
+ connection = sqlite3.connect('data.db')
367
+ # Appending append_df to airline_reviews table in data.db
368
+ append_df.to_sql(name='airline_reviews', con=connection, if_exists='append', index=False)
369
+ # Closing the connection to the database
370
+ connection.close()
371
+
372
+ if review=="":
373
+ st.error("Please write your Review")
374
+ st.stop()
375
+
376
+ if clean_review=="":
377
+ st.error("Please write a proper review")
378
+ st.stop()
379
+
380
+ if author=="":
381
+ st.error("Please Enter your Name")
382
+ st.stop()
383
+
384
+ # Show the entered data
385
+ st.dataframe(append_df)
386
+
387
+ if (y_pred[0] == 'yes') & (review_pred == 1):
388
+ st.success("Thank you for your positive feedback! \nWe're delighted to hear that you had a great experience with our service.")
389
+ st.balloons()
390
+ elif (y_pred[0] == 'yes') & (review_pred == 0):
391
+ st.warning("We appreciate your positive rating, but we're sorry to hear about your concerns in the review. \nPlease share more details so we can address them and enhance your experience.")
392
+ elif (y_pred[0] == 'no') & (review_pred == 0):
393
+ st.error("We apologize for falling short of your expectations. \nYour feedback is valuable, and we're committed to improving. \nPlease provide specific details about your experience for us to better understand and address the issues.")
394
+ elif (y_pred[0] == 'no') & (review_pred == 1):
395
+ st.error("We're sorry to hear about your negative rating, but we're glad to see your positive comments in the review. \nWe'd like to learn more about your concerns to ensure we address any issues and enhance your satisfaction.")
396
+
397
+ with empty3:
398
+ st.empty()
pages/3_πŸ“Š_Dashboard.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import sqlite3
4
+ import numpy as np
5
+ import matplotlib.pyplot as plt
6
+ from wordcloud import WordCloud
7
+ from PIL import Image
8
+ import base64
9
+
10
+ #-------------------------------- Setting Page Style -------------------------------------#
11
+ st.set_option('deprecation.showPyplotGlobalUse', False)
12
+
13
+ st.set_page_config(page_title="Dashboard", page_icon=":bar_chart:", layout="wide")
14
+ # st.subheader("Dashboard")
15
+ # st.markdown("##")
16
+
17
+ #-------------------------------- Background and custom CSS -------------------------------------#
18
+ #impliment background formating
19
+ def set_bg_hack(main_bg):
20
+ # set bg name
21
+ main_bg_ext = "jpg"
22
+ st.markdown(
23
+ f"""
24
+ <style>
25
+ .stApp {{
26
+ background: url(data:image/{main_bg_ext};base64,{base64.b64encode(open(main_bg, "rb").read()).decode()});
27
+ background-repeat: no-repeat;
28
+ background-position: right 50% bottom 95% ;
29
+ background-size: cover;
30
+ background-attachment: scroll;
31
+ }}
32
+ </style>
33
+ """,
34
+ unsafe_allow_html=True,
35
+ )
36
+
37
+ set_bg_hack("images/dashboard_bg.jpg")
38
+
39
+ # Setting custom css
40
+ css = f"""
41
+ <style>
42
+
43
+ [data-testid="stHeader"] {{
44
+ background: rgba(0,0,0,0);
45
+ }}
46
+
47
+ </style>
48
+ """
49
+ st.markdown(css, unsafe_allow_html=True)
50
+
51
+ #-------------------------------- Connecting to database and loading data -------------------------------------#
52
+ connection = sqlite3.connect('data.db')
53
+ df = pd.read_sql(sql="SELECT * FROM airline_reviews", con=connection, parse_dates=['review_date'])
54
+
55
+ # Setting logo on sidebar
56
+ st.sidebar.image("images/logo.png", caption='Airline Reviews Dashboard')
57
+
58
+ #-------------------------------- Creating Filters --------------------------------#
59
+ st.sidebar.header("Filters")
60
+
61
+ airline = st.sidebar.selectbox(
62
+ label = "Select your Airline",
63
+ options=df["airline"].unique(),
64
+ index=6
65
+ )
66
+
67
+ traveller_type = st.sidebar.multiselect(
68
+ label = "Select Traveller Type",
69
+ options=df["traveller_type"].unique(),
70
+ default=df["traveller_type"].unique()
71
+ )
72
+
73
+ cabin = st.sidebar.multiselect(
74
+ label = "Select Cabin",
75
+ options=df["cabin"].unique(),
76
+ default=df["cabin"].unique()
77
+ )
78
+
79
+ type_of_flight = st.sidebar.multiselect(
80
+ label = "Select flight type",
81
+ options=df["type_of_flight"].unique(),
82
+ default=df["type_of_flight"].unique()
83
+ )
84
+
85
+ frequency = st.sidebar.multiselect(
86
+ label = "Select Frequency",
87
+ options=df["frequency"].unique(),
88
+ default=df["frequency"].unique()
89
+ )
90
+
91
+ df_selection = df.query(
92
+ "airline == @airline & traveller_type == @traveller_type & cabin == @cabin & type_of_flight == @type_of_flight & frequency == @frequency"
93
+ )
94
+
95
+ #-------------------------------- Defining Function to show KPIs --------------------------------#
96
+ def KPI():
97
+ with st.expander("View Data"):
98
+ showdata = st.multiselect(label='Filter:', options=df_selection.columns,
99
+ default=['review_date', 'author', 'airline', 'cabin', 'seat_comfort',
100
+ 'cabin_service', 'food_bev', 'entertainment',
101
+ 'ground_service', 'value_for_money',
102
+ 'overall', 'review_clean', 'recommended'])
103
+ st.write(df_selection[showdata])
104
+
105
+ # Designing KPIs
106
+ total_reviews = len(df_selection)
107
+ total_positive_reviews = len(df_selection[df_selection["recommended"] == 'yes'])
108
+ total_negative_reviews = len(df_selection[df_selection["recommended"] == 'no'])
109
+ positive_percentage = float((total_positive_reviews / total_reviews) * 100)
110
+ negative_percentage = float((total_negative_reviews / total_reviews) * 100)
111
+
112
+ # Creating columns for all KPIs
113
+ col1, col2, col3, col4, col5 = st.columns(5, gap="medium")
114
+
115
+ # Assigning KPIs to columns one by one
116
+ with col1:
117
+ st.info('Total Reviews', icon='πŸ”')
118
+ st.metric(label = 'Total Reviews', value=total_reviews)
119
+
120
+ with col2:
121
+ st.info('Total Positive Reviews', icon='βœ…')
122
+ st.metric(label = 'Total Positive Reviews', value=total_positive_reviews)
123
+
124
+ with col3:
125
+ st.info('Total Negative Reviews', icon='❌')
126
+ st.metric(label = 'Total Negative Reviews', value=total_negative_reviews)
127
+
128
+ with col4:
129
+ st.info('Positive Percentage', icon='πŸ‘')
130
+ st.metric(label = 'Positive Percentage', value=f"{positive_percentage:,.2f}")
131
+
132
+ with col5:
133
+ st.info('Negative Percentage', icon='πŸ‘Ž')
134
+ st.metric(label = 'Negative Percentage', value=f"{negative_percentage:,.2f}")
135
+
136
+ # st.markdown("---")
137
+
138
+ #-------------------------------- Defining Function to Wordcloud --------------------------------#
139
+ def graphs():
140
+
141
+ positive_review_text = " ".join(review for review in df_selection[df_selection["recommended"] == 'yes']['review_clean'])
142
+ negative_review_text = " ".join(review for review in df_selection[df_selection["recommended"] == 'no']['review_clean'])
143
+
144
+ plot1, plot2 = st.columns(2)
145
+
146
+ with plot1:
147
+ air_mask = np.array(Image.open("images/air_white.jpeg"))
148
+ pos_wordcloud = WordCloud(max_words=50,
149
+ mask = air_mask,
150
+ colormap="summer",
151
+ min_word_length=3,
152
+ background_color="black").generate(positive_review_text)
153
+ plt.imshow(pos_wordcloud, interpolation="bilinear")
154
+ plt.gcf().set_facecolor("black")
155
+ plt.axis("off")
156
+ plt.title("Positive Reviews", fontsize=15, fontweight="bold", color="green")
157
+ st.pyplot()
158
+
159
+ with plot2:
160
+ air_mask_flip = np.array(Image.open("images/air_white_flip.jpg"))
161
+ neg_wordcloud = WordCloud(max_words=50,
162
+ mask = air_mask_flip,
163
+ colormap="autumn",
164
+ min_word_length=3,
165
+ background_color="black").generate(negative_review_text)
166
+ plt.imshow(neg_wordcloud, interpolation="bilinear")
167
+ plt.gcf().set_facecolor("black")
168
+ plt.axis("off")
169
+ plt.title("Negative Reviews", fontsize=15, fontweight="bold", color="red")
170
+ st.pyplot()
171
+
172
+ #-------------------------------- Defining Function to showcase Average Ratings --------------------------------#
173
+ def Ratings():
174
+ # Creating an expander showcasing average ratings for all amenities
175
+ with st.expander("View Average Ratings", expanded=True):
176
+ avg_seat_comfort = float(df_selection["seat_comfort"].mean())
177
+ avg_cabin_service = float(df_selection["cabin_service"].mean())
178
+ avg_food_bev = float(df_selection["food_bev"].mean())
179
+ avg_entertainment = float(df_selection["entertainment"].mean())
180
+ avg_ground_service = float(df_selection["ground_service"].mean())
181
+ avg_value_for_money = float(df_selection["value_for_money"].mean())
182
+
183
+ avg1, avg2, avg3, avg4, avg5, avg6 = st.columns(6, gap="medium")
184
+
185
+ with avg1:
186
+ st.info('Seat Comfort', icon='πŸ’Ί')
187
+ st.metric(label = 'Average', value=f"{avg_seat_comfort:,.1f}")
188
+
189
+ with avg2:
190
+ st.info('Cabin Service', icon='πŸ›οΈ')
191
+ st.metric(label = 'Average', value=f"{avg_cabin_service:,.1f}")
192
+
193
+ with avg3:
194
+ st.info('Food & Beverage', icon='πŸ”')
195
+ st.metric(label = 'Average', value=f"{avg_food_bev:,.1f}")
196
+
197
+ with avg4:
198
+ st.info('Entertainment', icon='🎬')
199
+ st.metric(label = 'Average', value=f"{avg_entertainment:,.1f}")
200
+
201
+ with avg5:
202
+ st.info('Ground Service', icon='🚚')
203
+ st.metric(label = 'Average', value=f"{avg_ground_service:,.1f}")
204
+
205
+ with avg6:
206
+ st.info('Value for Money', icon='πŸ’°')
207
+ st.metric(label = 'Average', value=f"{avg_value_for_money:,.1f}")
208
+
209
+ # st.markdown("---")
210
+
211
+ #--------------------------------------- Calling Functions ----------------------------------------#
212
+ KPI()
213
+ graphs()
214
+ Ratings()
ratings_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67f9e41a2389e5d3550d8d551dd4c3568d91044964c92738c712325da7c2ec0b
3
+ size 18548591
raw_data.xlsx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8518b2905c0bb31fa4a15ee12505e6c4b2e375e4296a93b008044ecd81c047cf
3
+ size 24238491
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit==1.3.0
2
+ Pillow == 9.4.0
3
+ nltk == 3.8.1
4
+ numpy == 1.24.3
5
+ pandas == 2.0.3
6
+ tensorflow == 2.14.0
7
+ regex == 2022.7.9
8
+ joblib
9
+ spacy == 3.7.2
10
+ scikit-learn == 1.3.2
11
+ wordcloud == 1.9.3
12
+ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl
stopwords/acronym.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"aka":"also known as","asap":"as soon as possible","brb":"be right back","btw":"by the way","dob":"date of birth","faq":"frequently asked questions","fyi":"for your information","idk":"i don't know","idc":"i don't care","iirc":"if i recall correctly","imo":"in my opinion","irl":"in real life","lmk":"let me know","lol":"laugh out loud","ngl":"not gonna lie","noyb":"none of your business","nvm":"never mind","ofc":"of course","omg":"oh my god","pfa":"please find attached","rofl":"rolling on the floor laughing","stfu":"shut the fuck up","tba":"to be announced","tbc":"to be continued","tbd":"to be determined","tbh":"to be honest","ttyl":"talk to you later","wtf":"what the fuck","wth":"what the heck"}
stopwords/contractions.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"'aight":"alright","ain't":"are not","amn't":"am not","arencha":"are not you","aren't":"are not","'bout":"about","can't":"cannot","cap'n":"captain","'cause":"because","'cept":"except","could've":"could have","couldn't":"could not","couldn't've":"could not have","dammit":"damn it","daren't":"dare not","daresn't":"dare not","dasn't":"dare not","didn't":"did not","didn":"did not","doesn't":"does not","doin'":"doing","don't":"do not","dunno":"do not know","d'ye":"do you","e'en":"even","e'er":"ever","'em":"them","everybody's":"everybody is","everyone's":"everyone is","fo'c'sle":"forecastle","finna":"fixing to","'gainst":"against","g'day":"good day","gimme":"give me","giv'n":"given","gonna":"going to","gon't":"go not","gotcha":"got you","gotta":"got to","gtg":"got to go","hadn't":"had not","had've":"had have","hasn't":"has not","haven't":"have not","he'd":"he had","he'll":"he shall","helluva":"hell of a","he's":"he is","here's":"here is","he've":"he have","how'd":"how would","howdy":"how do you do","how'll":"how will","how're":"how are","how's":"how is","i'd":"i had","i'd've":"i would have","i'll":"i shall","i'm":"i am","imma":"i am about to","i'm'a":"i am about to","i'm'o":"i am going to","innit":"is it not","ion":"i do not","i've":"i have","isn't":"is not","it'd":"it would","it'll":"it shall","it's":"it is","iunno":"i do not know","kinda":"kind of","let's":"let us","li'l":"little","ma'am":"madam","mayn't":"may not","may've":"may have","methinks":"me thinks","mightn't":"might not","might've":"might have","mustn't":"must not","mustn't've":"must not have","must've":"must have","'neath":"beneath","needn't":"need not","nal":"and all","ne'er":"never","o'clock":"of the clock","o'er":"over","ol'":"old","oughtn't":"ought not","'round":"around","'s":"is","shalln't":"shall not","shan't":"shall not","she'd":"she had","she'll":"she shall","she's":"she is","should've":"should have","shouldn't":"should not","shouldn't've":"should not have","somebody's":"somebody is","someone's":"someone is","something's":"something is","so're":"so are","so's":"so is","so've":"so have","that'll":"that shall","that're":"that are","that's":"that is","that'd":"that would","there'd":"there had","there'll":"there shall","there're":"there are","there's":"there is","these're":"these are","these've":"these have","they'd":"they had","they'll":"they shall","they're":"they are","they've":"they have","this's":"this is","those're":"those are","those've":"those have","'thout":"without","'til":"until","'tis":"it is","to've":"to have","'twas":"it was","'tween":"between","'twhere":"it were","wanna":"want to","wasn't":"was not","we'd":"we had","we'd've":"we would have","we'll":"we shall","we're":"we are","we've":"we have","weren't":"were not","whatcha":"what are you","what'd":"what did","what'll":"what shall","what're":"what are","what's":"what is","what've":"what have","when's":"when is","where'd":"where did","where'll":"where shall","where're":"where are","where's":"where is","where've":"where have","which'd":"which had","which'll":"which shall","which're":"which are","which's":"which is","which've":"which have","who'd":"who would","who'd've":"who would have","who'll":"who shall","who're":"who are","who's":"who is","who've":"who have","why'd":"why did","why're":"why are","why's":"why is","willn't":"will not","won't":"will not","wonnot":"will not","would've":"would have","wouldn't":"would not","wouldn't've":"would not have","y'all":"you all","y'all'd've":"you all would have","y'all'd'n't've":"you all would not have","y'all're":"you all are","y'all'ren't":"you all are not","y'at":"you at","yes'm":"yes madam","yessir":"yes sir","you'd":"you had","you'll":"you shall","you're":"you are","you've":"you have","aight":"alright","aint":"are not","amnt":"am not","arent":"are not","cant":"cannot","cause":"because","couldve":"could have","couldnt":"could not","couldntve":"could not have","darent":"dare not","daresnt":"dare not","dasnt":"dare not","didnt":"did not","doesnt":"does not","doin":"doing","dont":"do not","eer":"ever","everybodys":"everybody is","everyones":"everyone is","gday":"good day","givn":"given","gont":"go not","hadnt":"had not","hadve":"had have","hasnt":"has not","havent":"have not","hed":"he had","hell":"he shall","hes":"he is","heve":"he have","howd":"how did","howll":"how will","howre":"how are","hows":"how is","idve":"i would have","ill":"i shall","im":"i am","ima":"i am about to","imo":"i am going to","ive":"i have","isnt":"is not","itd":"it would","itll":"it shall","its":"it is","lets":"let us","lil":"little","maam":"madam","maynt":"may not","mayve":"may have","mightnt":"might not","mightve":"might have","mustnt":"must not","mustntve":"must not have","mustve":"must have","neednt":"need not","neer":"never","oclock":"of the clock","oer":"over","ol":"old","oughtnt":"ought not","shallnt":"shall not","shant":"shall not","shed":"she had","shell":"she shall","shes":"she is","shouldve":"should have","shouldnt":"should not","shouldntve":"should not have","somebodys":"somebody is","someones":"someone is","somethings":"something is","thatll":"that shall","thatre":"that are","thatd":"that would","thered":"there had","therell":"there shall","therere":"there are","theres":"there is","thesere":"these are","theseve":"these have","theyd":"they had","theyll":"they shall","theyre":"they are","theyve":"they have","thiss":"this is","thosere":"those are","thoseve":"those have","tis":"it is","tove":"to have","twas":"it was","wasnt":"was not","wed":"we had","wedve":"we would have","were":"we are","weve":"we have","werent":"were not","whatd":"what did","whatll":"what shall","whatre":"what are","whats":"what is","whatve":"what have","whens":"when is","whered":"where did","wherell":"where shall","wherere":"where are","wheres":"where is","whereve":"where have","whichd":"which had","whichll":"which shall","whichre":"which are","whichs":"which is","whichve":"which have","whod":"who would","whodve":"who would have","wholl":"who shall","whore":"who are","whos":"who is","whove":"who have","whyd":"why did","whyre":"why are","whys":"why is","wont":"will not","wouldve":"would have","wouldnt":"would not","wouldntve":"would not have","yall":"you all","yalldve":"you all would have","yallre":"you all are","youd":"you had","youll":"you shall","youre":"you are","youve":"you have","'re":"are","thats":"that is"}