Spaces:
Runtime error
Runtime error
File size: 12,790 Bytes
5aa6b7f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 |
import streamlit as st
import pandas as pd
import numpy as np
import joblib
import tensorflow as tf
import re
import string
import nltk
from nltk.tokenize import RegexpTokenizer
from nltk.stem import WordNetLemmatizer
df = pd.read_csv("airline_reviews_cleaned.csv")
st.set_page_config(layout="wide", page_title="AeroAdvisor", page_icon="鉁堬笍")
css = f"""
<style>
[data-testid="stAppViewContainer"] > .main {{
background-image: url("https://cdn.discordapp.com/attachments/1075699203046641687/1165351110828101673/PhotoReal_From_a_distance_the_plane_appears_as_a_tiny_speck_ag_1.jpg?ex=654688cb&is=653413cb&hm=096dcc994304b93afd210555607d563f725d74c1dddfd392176f11e15076bcfa&");
background-size: 120%;
background-position: top left;
background-repeat: repeat;
background-attachment: local;
}}
[data-testid="stHeader"] {{
background: rgba(0,0,0,0);
}}
[data-testid="stExpander"] {{
background: rgba(0,0,0,0.5);
border: 2px solid #000071;
border-radius: 10px;
}}
</style>
"""
st.markdown(css, unsafe_allow_html=True)
#-------------------------------- Function to clean reviews -------------------------------------#
# Check if wordnet is installed
try:
nltk.find("corpora/popular.zip")
except LookupError:
nltk.download('popular')
# Defining acronyms
acronyms_dict = pd.read_json('acronym.json', typ = 'series')
# Defining contractions
contractions_dict = pd.read_json('contractions.json', typ = 'series')
# Defining stopwords
alphabets = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"]
others = ["茫", "氓", "矛", "没", "没陋m", "没贸", "没貌", "矛帽", "没陋re", "没陋ve", "没陋", "没陋s", "没贸we", "茂", "没茂"]
stops = alphabets + others
stops = list(set(stops))
# Defining tokenizer
regexp = RegexpTokenizer("[\w']+")
# Preprocessing
def preprocess(text):
text = text.lower() # lowercase
text = text.strip() # whitespaces
# Removing html tags
html = re.compile(r'<.*?>')
text = html.sub(r'', text) # html tags
# Removing emoji patterns
emoji_pattern = re.compile("["
u"\U0001F600-\U0001F64F" # emoticons
u"\U0001F300-\U0001F5FF" # symbols & pictographs
u"\U0001F680-\U0001F6FF" # transport & map symbols
u"\U0001F1E0-\U0001F1FF" # flags (iOS)
u"\U00002702-\U000027B0"
u"\U000024C2-\U0001F251"
"]+", flags = re.UNICODE)
text = emoji_pattern.sub(r'', text) # unicode char
# Removing urls
http = "https?://\S+|www\.\S+" # matching strings beginning with http (but not just "http")
pattern = r"({})".format(http) # creating pattern
text = re.sub(pattern, "", text) # remove urls
# Removing twitter usernames
pattern = r'@[\w_]+'
text = re.sub(pattern, "", text) # remove @twitter usernames
# Removing punctuations and numbers
punct_str = string.punctuation + string.digits
punct_str = punct_str.replace("'", "")
punct_str = punct_str.replace("-", "")
text = text.translate(str.maketrans('', '', punct_str)) # punctuation and numbers
# Replacing "-" in text with empty space
text = text.replace("-", " ") # "-"
# Substituting acronyms
words = []
for word in regexp.tokenize(text):
if word in acronyms_dict.index:
words = words + acronyms_dict[word].split()
else:
words = words + word.split()
text = ' '.join(words) # acronyms
# Substituting Contractions
words = []
for word in regexp.tokenize(text):
if word in contractions_dict.index:
words = words + contractions_dict[word].split()
else:
words = words + word.split()
text = " ".join(words) # contractions
punct_str = string.punctuation
text = text.translate(str.maketrans('', '', punct_str)) # punctuation again to remove "'"
# lemmatization
lemmatizer = WordNetLemmatizer()
text = " ".join([lemmatizer.lemmatize(word) for word in regexp.tokenize(text)]) # lemmatize
# Stopwords Removal
text = ' '.join([word for word in regexp.tokenize(text) if word not in stops]) # stopwords
# Removing all characters except alphabets and " " (space)
filter = string.ascii_letters + " "
text = "".join([chr for chr in text if chr in filter]) # remove all characters except alphabets and " " (space)
# Removing words with one alphabet occuring more than 3 times continuously
pattern = r'\b\w*?(.)\1{2,}\w*\b'
text = re.sub(pattern, "", text).strip() # remove words with one alphabet occuring more than 3 times continuously
# return final output
return text
#-------------------------------- Container 1 for Heading -------------------------------------#
container_1 = st.container()
with container_1:
empty1, head2, empty3 = st.columns(spec = [2,3,2], gap = 'medium')
with empty1:
st.empty()
with head2:
st.title("Welcome Aboard")
st.write("## Tell us about your Experience :airplane:")
with empty3:
st.empty()
#-------------------------------- Container 2 for main_content --------------------------------#
container_2 = st.container()
with container_2:
col1, col2, col3, col4 = st.columns(spec = [1,3,3,1], gap = 'medium')
with col1:
st.empty()
with col2:
expander_1 = st.expander(label = "Your Trip Info", expanded = True)
with expander_1:
airline = st.selectbox(
label = "Enter your Airline",
options = tuple(sorted(df['airline'].unique())),
index = None,
placeholder = "Choose an option..."
)
traveller_type = st.selectbox(
label = "Enter your trip type",
options = ("Business", "Solo Leisure", "Couple Leisure", "Family Leisure"),
index = None,
placeholder = "Choose an option..."
)
cabin = st.selectbox(
label = "Enter your seat class",
options = ("Economy Class", "Premium Economy", "Business Class", "First Class"),
index = None,
placeholder = "Choose an option..."
)
type_of_flight = st.radio(
label = "Enter your flight type",
options = ("Direct", 'Indirect'),
index = 0,
)
frequency = st.radio(
label = "How often do you fly?",
options = ('Often', 'Occasionally', 'Rarely'),
index = 1,
)
with col3:
expander_2 = st.expander(label = "Your Ratings", expanded = True)
with expander_2:
seat_comfort = st.slider(
label = "How comfortable are you with your seat?",
min_value = 1,
max_value = 5,
value = 3
)
cabin_service = st.slider(
label = "Please Rate your Cabin Service",
min_value = 1,
max_value = 5,
value = 3
)
food_bev = st.slider(
label = "Please rate the quality of food/beverages",
min_value = 1,
max_value = 5,
value = 3
)
entertainment = st.slider(
label = "Please rate the Entertainment Service",
min_value = 1,
max_value = 5,
value = 3
)
ground_service = st.slider(
label = "Please rate the Ground Service",
min_value = 1,
max_value = 5,
value = 3
)
value_for_money = st.slider(
label = "Value for Money Rating",
min_value = 1,
max_value = 5,
value = 3
)
with col4:
st.empty()
#-------------------------------- Container 3 for Final Rating Slider --------------------------------#
container_3 = st.container()
with container_3:
empty1, head2, empty3 = st.columns(spec = [1,3,1], gap = 'medium')
with empty1:
st.empty()
with head2:
overall = st.slider(
label = "How was your overall experience with the Airline?",
min_value = 1,
max_value = 10,
value = 7
)
review = st.text_area("Enter your review")
with empty3:
st.empty()
#-----------------------------------------------------------------------------------------------------#
# Creating DataFrame using values input by user
temp_df = pd.DataFrame(
data = [[airline, traveller_type, cabin, type_of_flight, frequency,
seat_comfort, cabin_service, food_bev, entertainment,
ground_service, value_for_money, overall]],
columns = ['airline', 'traveller_type', 'cabin', 'type_of_flight', 'frequency',
'seat_comfort', 'cabin_service', 'food_bev', 'entertainment',
'ground_service', 'value_for_money', 'overall']
)
# Loading Model using joblib file
model = joblib.load('airline_recommend.joblib')
# Defining a function to store the nlp_model in streamlit cache memory
@st.cache_resource
def cache_model(model_name):
model = tf.keras.models.load_model(model_name)
return model
# Loading the nlp_model
nlp_model = cache_model("nlp_model")
#-------------------------------- Container 4 for Final Predictions --------------------------------#
container_4 = st.container()
with container_4:
empty1, head2, empty3 = st.columns(spec = [1.2,2,1.5], gap = 'medium')
with empty1:
st.empty()
with head2:
# Creating a button to get prediction
if st.button('Get Prediction'):
y_pred = model.predict(temp_df)
y_pred_prob = model.predict_proba(temp_df)
if review=="":
st.warning("Please enter your review")
st.stop()
clean_review = preprocess(review)
review_pred_proba = nlp_model.predict([clean_review])
review_pred = np.where(review_pred_proba > 0.5, 1, 0)[0][0]
if (y_pred[0] == 'yes') & (review_pred == 1):
st.success("Thank you for your positive feedback! \nWe're delighted to hear that you had a great experience with our service.")
st.balloons()
elif (y_pred[0] == 'yes') & (review_pred == 0):
st.warning("We appreciate your positive rating, but we're sorry to hear about your concerns in the review. \nPlease share more details so we can address them and enhance your experience.")
elif (y_pred[0] == 'no') & (review_pred == 0):
st.error("We apologize for falling short of your expectations. \nYour feedback is valuable, and we're committed to improving. \nPlease provide specific details about your experience for us to better understand and address the issues.")
elif (y_pred[0] == 'no') & (review_pred == 1):
st.error("We're sorry to hear about your negative rating, but we're glad to see your positive comments in the review. \nWe'd like to learn more about your concerns to ensure we address any issues and enhance your satisfaction.")
with empty3:
st.empty() |