|
|
import streamlit as st |
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
import joblib |
|
|
import tensorflow as tf |
|
|
import re |
|
|
import string |
|
|
import nltk |
|
|
from nltk.tokenize import RegexpTokenizer |
|
|
from nltk.stem import WordNetLemmatizer |
|
|
|
|
|
df = pd.read_csv("airline_reviews_cleaned.csv") |
|
|
st.set_page_config(layout="wide", page_title="AeroAdvisor", page_icon="鉁堬笍") |
|
|
|
|
|
css = f""" |
|
|
<style> |
|
|
[data-testid="stAppViewContainer"] > .main {{ |
|
|
background-image: url("https://cdn.discordapp.com/attachments/1075699203046641687/1165351110828101673/PhotoReal_From_a_distance_the_plane_appears_as_a_tiny_speck_ag_1.jpg?ex=654688cb&is=653413cb&hm=096dcc994304b93afd210555607d563f725d74c1dddfd392176f11e15076bcfa&"); |
|
|
background-size: 120%; |
|
|
background-position: top left; |
|
|
background-repeat: repeat; |
|
|
background-attachment: local; |
|
|
}} |
|
|
|
|
|
[data-testid="stHeader"] {{ |
|
|
background: rgba(0,0,0,0); |
|
|
}} |
|
|
|
|
|
[data-testid="stExpander"] {{ |
|
|
background: rgba(0,0,0,0.5); |
|
|
border: 2px solid #000071; |
|
|
border-radius: 10px; |
|
|
}} |
|
|
</style> |
|
|
""" |
|
|
st.markdown(css, unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
nltk.find("corpora/popular.zip") |
|
|
except LookupError: |
|
|
nltk.download('popular') |
|
|
|
|
|
|
|
|
|
|
|
acronyms_dict = pd.read_json('acronym.json', typ = 'series') |
|
|
|
|
|
|
|
|
contractions_dict = pd.read_json('contractions.json', typ = 'series') |
|
|
|
|
|
|
|
|
|
|
|
alphabets = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"] |
|
|
others = ["茫", "氓", "矛", "没", "没陋m", "没贸", "没貌", "矛帽", "没陋re", "没陋ve", "没陋", "没陋s", "没贸we", "茂", "没茂"] |
|
|
stops = alphabets + others |
|
|
stops = list(set(stops)) |
|
|
|
|
|
|
|
|
regexp = RegexpTokenizer("[\w']+") |
|
|
|
|
|
|
|
|
def preprocess(text): |
|
|
|
|
|
text = text.lower() |
|
|
text = text.strip() |
|
|
|
|
|
|
|
|
html = re.compile(r'<.*?>') |
|
|
text = html.sub(r'', text) |
|
|
|
|
|
|
|
|
emoji_pattern = re.compile("[" |
|
|
u"\U0001F600-\U0001F64F" |
|
|
u"\U0001F300-\U0001F5FF" |
|
|
u"\U0001F680-\U0001F6FF" |
|
|
u"\U0001F1E0-\U0001F1FF" |
|
|
u"\U00002702-\U000027B0" |
|
|
u"\U000024C2-\U0001F251" |
|
|
"]+", flags = re.UNICODE) |
|
|
text = emoji_pattern.sub(r'', text) |
|
|
|
|
|
|
|
|
http = "https?://\S+|www\.\S+" |
|
|
pattern = r"({})".format(http) |
|
|
text = re.sub(pattern, "", text) |
|
|
|
|
|
|
|
|
pattern = r'@[\w_]+' |
|
|
text = re.sub(pattern, "", text) |
|
|
|
|
|
|
|
|
punct_str = string.punctuation + string.digits |
|
|
punct_str = punct_str.replace("'", "") |
|
|
punct_str = punct_str.replace("-", "") |
|
|
text = text.translate(str.maketrans('', '', punct_str)) |
|
|
|
|
|
|
|
|
text = text.replace("-", " ") |
|
|
|
|
|
|
|
|
words = [] |
|
|
for word in regexp.tokenize(text): |
|
|
if word in acronyms_dict.index: |
|
|
words = words + acronyms_dict[word].split() |
|
|
else: |
|
|
words = words + word.split() |
|
|
text = ' '.join(words) |
|
|
|
|
|
|
|
|
words = [] |
|
|
for word in regexp.tokenize(text): |
|
|
if word in contractions_dict.index: |
|
|
words = words + contractions_dict[word].split() |
|
|
else: |
|
|
words = words + word.split() |
|
|
text = " ".join(words) |
|
|
|
|
|
punct_str = string.punctuation |
|
|
text = text.translate(str.maketrans('', '', punct_str)) |
|
|
|
|
|
|
|
|
lemmatizer = WordNetLemmatizer() |
|
|
text = " ".join([lemmatizer.lemmatize(word) for word in regexp.tokenize(text)]) |
|
|
|
|
|
|
|
|
text = ' '.join([word for word in regexp.tokenize(text) if word not in stops]) |
|
|
|
|
|
|
|
|
filter = string.ascii_letters + " " |
|
|
text = "".join([chr for chr in text if chr in filter]) |
|
|
|
|
|
|
|
|
pattern = r'\b\w*?(.)\1{2,}\w*\b' |
|
|
text = re.sub(pattern, "", text).strip() |
|
|
|
|
|
|
|
|
return text |
|
|
|
|
|
|
|
|
container_1 = st.container() |
|
|
with container_1: |
|
|
empty1, head2, empty3 = st.columns(spec = [2,3,2], gap = 'medium') |
|
|
with empty1: |
|
|
st.empty() |
|
|
with head2: |
|
|
st.title("Welcome Aboard") |
|
|
st.write("## Tell us about your Experience :airplane:") |
|
|
with empty3: |
|
|
st.empty() |
|
|
|
|
|
|
|
|
container_2 = st.container() |
|
|
with container_2: |
|
|
col1, col2, col3, col4 = st.columns(spec = [1,3,3,1], gap = 'medium') |
|
|
with col1: |
|
|
st.empty() |
|
|
|
|
|
with col2: |
|
|
expander_1 = st.expander(label = "Your Trip Info", expanded = True) |
|
|
with expander_1: |
|
|
airline = st.selectbox( |
|
|
label = "Enter your Airline", |
|
|
options = tuple(sorted(df['airline'].unique())), |
|
|
index = None, |
|
|
|
|
|
) |
|
|
|
|
|
traveller_type = st.selectbox( |
|
|
label = "Enter your trip type", |
|
|
options = ("Business", "Solo Leisure", "Couple Leisure", "Family Leisure"), |
|
|
index = None, |
|
|
|
|
|
) |
|
|
|
|
|
cabin = st.selectbox( |
|
|
label = "Enter your seat class", |
|
|
options = ("Economy Class", "Premium Economy", "Business Class", "First Class"), |
|
|
index = None, |
|
|
|
|
|
) |
|
|
|
|
|
type_of_flight = st.radio( |
|
|
label = "Enter your flight type", |
|
|
options = ("Direct", 'Indirect'), |
|
|
index = 0, |
|
|
) |
|
|
|
|
|
frequency = st.radio( |
|
|
label = "How often do you fly?", |
|
|
options = ('Often', 'Occasionally', 'Rarely'), |
|
|
index = 1, |
|
|
) |
|
|
|
|
|
with col3: |
|
|
expander_2 = st.expander(label = "Your Ratings", expanded = True) |
|
|
with expander_2: |
|
|
seat_comfort = st.slider( |
|
|
label = "How comfortable are you with your seat?", |
|
|
min_value = 1, |
|
|
max_value = 5, |
|
|
value = 3 |
|
|
) |
|
|
|
|
|
cabin_service = st.slider( |
|
|
label = "Please Rate your Cabin Service", |
|
|
min_value = 1, |
|
|
max_value = 5, |
|
|
value = 3 |
|
|
) |
|
|
|
|
|
food_bev = st.slider( |
|
|
label = "Please rate the quality of food/beverages", |
|
|
min_value = 1, |
|
|
max_value = 5, |
|
|
value = 3 |
|
|
) |
|
|
|
|
|
entertainment = st.slider( |
|
|
label = "Please rate the Entertainment Service", |
|
|
min_value = 1, |
|
|
max_value = 5, |
|
|
value = 3 |
|
|
) |
|
|
|
|
|
ground_service = st.slider( |
|
|
label = "Please rate the Ground Service", |
|
|
min_value = 1, |
|
|
max_value = 5, |
|
|
value = 3 |
|
|
) |
|
|
|
|
|
value_for_money = st.slider( |
|
|
label = "Value for Money Rating", |
|
|
min_value = 1, |
|
|
max_value = 5, |
|
|
value = 3 |
|
|
) |
|
|
|
|
|
with col4: |
|
|
st.empty() |
|
|
|
|
|
|
|
|
container_3 = st.container() |
|
|
with container_3: |
|
|
empty1, head2, empty3 = st.columns(spec = [1,3,1], gap = 'medium') |
|
|
with empty1: |
|
|
st.empty() |
|
|
with head2: |
|
|
overall = st.slider( |
|
|
label = "How was your overall experience with the Airline?", |
|
|
min_value = 1, |
|
|
max_value = 10, |
|
|
value = 7 |
|
|
) |
|
|
|
|
|
review = st.text_area("Enter your review") |
|
|
with empty3: |
|
|
st.empty() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
temp_df = pd.DataFrame( |
|
|
data = [[airline, traveller_type, cabin, type_of_flight, frequency, |
|
|
seat_comfort, cabin_service, food_bev, entertainment, |
|
|
ground_service, value_for_money, overall]], |
|
|
columns = ['airline', 'traveller_type', 'cabin', 'type_of_flight', 'frequency', |
|
|
'seat_comfort', 'cabin_service', 'food_bev', 'entertainment', |
|
|
'ground_service', 'value_for_money', 'overall'] |
|
|
) |
|
|
|
|
|
|
|
|
model = joblib.load('ml_model.joblib') |
|
|
|
|
|
|
|
|
@st.cache_resource |
|
|
def cache_model(model_name): |
|
|
model = tf.keras.models.load_model(model_name) |
|
|
return model |
|
|
|
|
|
|
|
|
nlp_model = cache_model("nlp_model/nlp_model") |
|
|
|
|
|
|
|
|
|
|
|
container_4 = st.container() |
|
|
with container_4: |
|
|
empty1, head2, empty3 = st.columns(spec = [1.2,2,1.5], gap = 'medium') |
|
|
with empty1: |
|
|
st.empty() |
|
|
|
|
|
with head2: |
|
|
|
|
|
if st.button('Get Prediction'): |
|
|
y_pred = model.predict(temp_df) |
|
|
y_pred_prob = model.predict_proba(temp_df) |
|
|
|
|
|
if review=="": |
|
|
st.warning("Please enter your review") |
|
|
st.stop() |
|
|
|
|
|
clean_review = preprocess(review) |
|
|
review_pred_proba = nlp_model.predict([clean_review]) |
|
|
review_pred = np.where(review_pred_proba > 0.5, 1, 0)[0][0] |
|
|
|
|
|
if (y_pred[0] == 1) & (review_pred == 1): |
|
|
st.success("Thank you for your positive feedback! \nWe're delighted to hear that you had a great experience with our service.") |
|
|
st.balloons() |
|
|
elif (y_pred[0] == 1) & (review_pred == 0): |
|
|
st.warning("We appreciate your positive rating, but we're sorry to hear about your concerns in the review. \nPlease share more details so we can address them and enhance your experience.") |
|
|
elif (y_pred[0] == 0) & (review_pred == 0): |
|
|
st.error("We apologize for falling short of your expectations. \nYour feedback is valuable, and we're committed to improving. \nPlease provide specific details about your experience for us to better understand and address the issues.") |
|
|
elif (y_pred[0] == 0) & (review_pred == 1): |
|
|
st.error("We're sorry to hear about your negative rating, but we're glad to see your positive comments in the review. \nWe'd like to learn more about your concerns to ensure we address any issues and enhance your satisfaction.") |
|
|
|
|
|
with empty3: |
|
|
st.empty() |