|
|
|
|
|
import streamlit as st
|
|
|
import re
|
|
|
import pickle
|
|
|
import joblib
|
|
|
import nltk
|
|
|
import os
|
|
|
import numpy as np
|
|
|
import pandas as pd
|
|
|
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
|
|
from tensorflow import keras
|
|
|
from nltk.corpus import stopwords
|
|
|
from nltk.tokenize import word_tokenize
|
|
|
from nltk.stem import PorterStemmer
|
|
|
|
|
|
|
|
|
nltk_data_path = os.path.join("/tmp", "nltk_data")
|
|
|
os.makedirs(nltk_data_path, exist_ok=True)
|
|
|
nltk.data.path.append(nltk_data_path)
|
|
|
nltk.download("stopwords", download_dir=nltk_data_path)
|
|
|
nltk.download("punkt", download_dir=nltk_data_path)
|
|
|
|
|
|
|
|
|
st.markdown(
|
|
|
'<p style="color:gray; font-size:14px; font-style:italic;">'
|
|
|
'Loading models and resources from local storage... '
|
|
|
'Please be patient and DO NOT refresh the page :)'
|
|
|
'</p>',
|
|
|
unsafe_allow_html=True
|
|
|
)
|
|
|
|
|
|
|
|
|
@st.cache_resource
|
|
|
def load_sentiment_model():
|
|
|
path = "./src/best_model.keras"
|
|
|
return keras.models.load_model(path)
|
|
|
|
|
|
@st.cache_resource
|
|
|
def load_tokenizer_params():
|
|
|
tokenizer_path = "./src/tokenizer.pkl"
|
|
|
params_path = "./src/params.pkl"
|
|
|
with open(tokenizer_path, "rb") as f:
|
|
|
tokenizer = pickle.load(f)
|
|
|
with open(params_path, "rb") as f:
|
|
|
params = pickle.load(f)
|
|
|
return tokenizer, params
|
|
|
|
|
|
@st.cache_resource
|
|
|
def load_topic_models():
|
|
|
neg_path = "./src/fastopic_negative_model_10.pkl"
|
|
|
pos_path = "./src/fastopic_positive_model_10.pkl"
|
|
|
neg_model = joblib.load(neg_path)
|
|
|
pos_model = joblib.load(pos_path)
|
|
|
return neg_model, pos_model
|
|
|
|
|
|
|
|
|
sentiment_model = load_sentiment_model()
|
|
|
tokenizer, params = load_tokenizer_params()
|
|
|
topic_model_neg, topic_model_pos = load_topic_models()
|
|
|
|
|
|
max_len = params["max_len"]
|
|
|
|
|
|
|
|
|
negations = {"not", "no", "never"}
|
|
|
stpwrds_en = set(stopwords.words("english")) - negations
|
|
|
stemmer = PorterStemmer()
|
|
|
|
|
|
replacements = {
|
|
|
"sia": "sq",
|
|
|
"flown": "fly",
|
|
|
"flew": "fly",
|
|
|
"alway": "always",
|
|
|
"boarding": "board",
|
|
|
"told": "tell",
|
|
|
"said": "say",
|
|
|
"booked": "book",
|
|
|
"paid": "pay",
|
|
|
"well": "good",
|
|
|
"aircraft": "plane"
|
|
|
}
|
|
|
|
|
|
def text_preprocessing(text):
|
|
|
text = text.lower()
|
|
|
text = re.sub(r"\\n", " ", text)
|
|
|
text = text.strip()
|
|
|
text = re.sub(r'[^a-z0-9\s]', ' ', text)
|
|
|
tokens = word_tokenize(text)
|
|
|
tokens = [replacements.get(word, word) for word in tokens]
|
|
|
tokens = [word for word in tokens if word not in stpwrds_en]
|
|
|
tokens = [stemmer.stem(word) for word in tokens]
|
|
|
if len(tokens) == 0:
|
|
|
return "emptytext"
|
|
|
return ' '.join(tokens)
|
|
|
|
|
|
|
|
|
topic_labels_neg = {
|
|
|
0: "Service Attitude",
|
|
|
1: "Ticket Price",
|
|
|
2: "In-Flight Accommodation",
|
|
|
3: "Boarding & Luggage Issues",
|
|
|
4: "Refund & Payment Difficulties",
|
|
|
5: "Meal Quality",
|
|
|
6: "Accessibility & Assistance",
|
|
|
7: "Safety & Hygiene",
|
|
|
8: "Seat Comfort",
|
|
|
9: "Quality of Amenities"
|
|
|
}
|
|
|
|
|
|
topic_labels_pos = {
|
|
|
0: "Destination-based compliment",
|
|
|
1: "Seat & cabin comfort",
|
|
|
2: "Destination-based compliment",
|
|
|
3: "Transit accommodation",
|
|
|
4: "Meals & in-flight services",
|
|
|
5: "Meals & in-flight services",
|
|
|
6: "Seat & cabin comfort / Aircraft condition",
|
|
|
7: "Destination-based compliment",
|
|
|
8: "Miscellaneous experiences",
|
|
|
9: "Destination-based compliment"
|
|
|
}
|
|
|
|
|
|
|
|
|
def run():
|
|
|
|
|
|
st.subheader("Sentiment & Topic Prediction for SQ Customer Reviews")
|
|
|
|
|
|
st.markdown(
|
|
|
"""
|
|
|
This section will help you understand how the **ACRE** system works.
|
|
|
Simply fill in the form below with either a dummy or real customer review, and the system will:
|
|
|
|
|
|
1. **Preprocess** your review text (cleaning, tokenization, and stemming).
|
|
|
2. **Predict sentiment** (Positive or Negative) along with a confidence score.
|
|
|
3. **Identify the most relevant topic** associated with the review, based on the predicted sentiment.
|
|
|
|
|
|
Use this tool to simulate how Singapore Airlines can transform raw customer feedback into **structured, data-driven insights**.
|
|
|
"""
|
|
|
)
|
|
|
|
|
|
with st.form(key='SQ-sentiment-analysis'):
|
|
|
date = st.date_input("Review Date")
|
|
|
platform = st.selectbox('Review Platform', ('Mobile', 'Desktop'), index=0)
|
|
|
rating = st.number_input('Rating', min_value=0, max_value=5, value=3, step=1)
|
|
|
st.markdown('---')
|
|
|
text = st.text_input('Customer Review', value='--customer review--')
|
|
|
title = st.text_input('Review Title', value='--review title--')
|
|
|
vote = st.slider('Helpful Vote', min_value=0, max_value=200, value=50, step=1)
|
|
|
st.markdown('---')
|
|
|
submitted = st.form_submit_button('Predict')
|
|
|
|
|
|
if submitted:
|
|
|
st.markdown("---")
|
|
|
st.write("### Input Data")
|
|
|
data_inf = {
|
|
|
'published_date': date,
|
|
|
'published_platform': platform,
|
|
|
'rating': rating,
|
|
|
'type': 'Review',
|
|
|
'text': text,
|
|
|
'title': title,
|
|
|
'helpful_votes': vote
|
|
|
}
|
|
|
st.dataframe(pd.DataFrame([data_inf]))
|
|
|
|
|
|
|
|
|
processed = text_preprocessing(text)
|
|
|
seq = tokenizer.texts_to_sequences([processed])
|
|
|
padded = pad_sequences(seq, maxlen=max_len, padding="post", truncating="post")
|
|
|
|
|
|
|
|
|
pred_probs = sentiment_model.predict(padded)
|
|
|
|
|
|
if pred_probs.shape[1] == 1:
|
|
|
|
|
|
p_pos = float(pred_probs[0][0])
|
|
|
p_neg = 1 - p_pos
|
|
|
if p_pos >= 0.5:
|
|
|
sentiment_label = "Positive"
|
|
|
confidence = p_pos
|
|
|
else:
|
|
|
sentiment_label = "Negative"
|
|
|
confidence = p_neg
|
|
|
else:
|
|
|
|
|
|
pred_class = np.argmax(pred_probs, axis=1)[0]
|
|
|
label_map = {0: "Negative", 1: "Positive"}
|
|
|
sentiment_label = label_map[pred_class]
|
|
|
confidence = float(pred_probs[0][pred_class])
|
|
|
|
|
|
|
|
|
color = "green" if sentiment_label == "Positive" else "red"
|
|
|
st.markdown(
|
|
|
f"<p style='font-size:22px; font-weight:bold; color:{color};'>"
|
|
|
f"Predicted Sentiment: {sentiment_label} "
|
|
|
f"(Confidence: {confidence:.2f})</p>",
|
|
|
unsafe_allow_html=True
|
|
|
)
|
|
|
|
|
|
|
|
|
st.write("### Topic Modeling")
|
|
|
if sentiment_label == "Negative":
|
|
|
probs = topic_model_neg.transform([text])[0]
|
|
|
topic_id = int(np.argmax(probs))
|
|
|
topic_name = topic_labels_neg.get(topic_id, "Unknown Topic")
|
|
|
st.write("**Using Negative Model**")
|
|
|
else:
|
|
|
probs = topic_model_pos.transform([text])[0]
|
|
|
topic_id = int(np.argmax(probs))
|
|
|
topic_name = topic_labels_pos.get(topic_id, "Unknown Topic")
|
|
|
st.write("**Using Positive Model**")
|
|
|
|
|
|
|
|
|
st.markdown(
|
|
|
f"<p style='font-size:20px; font-weight:bold; color:{color};'>"
|
|
|
f"Topic {topic_id}: {topic_name}</p>",
|
|
|
unsafe_allow_html=True
|
|
|
)
|
|
|
|
|
|
|
|
|
st.write("**Probabilities:**", probs.tolist()) |