hanantonio's picture
Upload 2 files
293f9ac verified
# Import Libraries
import streamlit as st
import re
import pickle
import joblib
import nltk
import os
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow import keras
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
# --- Setup NLTK ---
nltk_data_path = os.path.join("/tmp", "nltk_data")
os.makedirs(nltk_data_path, exist_ok=True)
nltk.data.path.append(nltk_data_path)
nltk.download("stopwords", download_dir=nltk_data_path)
nltk.download("punkt", download_dir=nltk_data_path)
# --- Loading Info ---
st.markdown(
'<p style="color:gray; font-size:14px; font-style:italic;">'
'Loading models and resources from local storage... '
'Please be patient and DO NOT refresh the page :)'
'</p>',
unsafe_allow_html=True
)
# --- Cached Loading Functions ---
@st.cache_resource
def load_sentiment_model():
path = "./src/best_model.keras"
return keras.models.load_model(path)
@st.cache_resource
def load_tokenizer_params():
tokenizer_path = "./src/tokenizer.pkl"
params_path = "./src/params.pkl"
with open(tokenizer_path, "rb") as f:
tokenizer = pickle.load(f)
with open(params_path, "rb") as f:
params = pickle.load(f)
return tokenizer, params
@st.cache_resource
def load_topic_models():
neg_path = "./src/fastopic_negative_model_10.pkl"
pos_path = "./src/fastopic_positive_model_10.pkl"
neg_model = joblib.load(neg_path)
pos_model = joblib.load(pos_path)
return neg_model, pos_model
# --- Load all resources once ---
sentiment_model = load_sentiment_model()
tokenizer, params = load_tokenizer_params()
topic_model_neg, topic_model_pos = load_topic_models()
max_len = params["max_len"]
# --- Preprocessing Function (NLTK) ---
negations = {"not", "no", "never"}
stpwrds_en = set(stopwords.words("english")) - negations
stemmer = PorterStemmer()
replacements = {
"sia": "sq",
"flown": "fly",
"flew": "fly",
"alway": "always",
"boarding": "board",
"told": "tell",
"said": "say",
"booked": "book",
"paid": "pay",
"well": "good",
"aircraft": "plane"
}
def text_preprocessing(text):
text = text.lower()
text = re.sub(r"\\n", " ", text)
text = text.strip()
text = re.sub(r'[^a-z0-9\s]', ' ', text)
tokens = word_tokenize(text)
tokens = [replacements.get(word, word) for word in tokens]
tokens = [word for word in tokens if word not in stpwrds_en]
tokens = [stemmer.stem(word) for word in tokens]
if len(tokens) == 0:
return "emptytext"
return ' '.join(tokens)
# --- Topic Labels ---
topic_labels_neg = {
0: "Service Attitude",
1: "Ticket Price",
2: "In-Flight Accommodation",
3: "Boarding & Luggage Issues",
4: "Refund & Payment Difficulties",
5: "Meal Quality",
6: "Accessibility & Assistance",
7: "Safety & Hygiene",
8: "Seat Comfort",
9: "Quality of Amenities"
}
topic_labels_pos = {
0: "Destination-based compliment",
1: "Seat & cabin comfort",
2: "Destination-based compliment",
3: "Transit accommodation",
4: "Meals & in-flight services",
5: "Meals & in-flight services",
6: "Seat & cabin comfort / Aircraft condition",
7: "Destination-based compliment",
8: "Miscellaneous experiences",
9: "Destination-based compliment"
}
# --- Streamlit App ---
def run():
# st.title("ACRE - Automated Customer Review Analysis")
st.subheader("Sentiment & Topic Prediction for SQ Customer Reviews")
st.markdown(
"""
This section will help you understand how the **ACRE** system works.
Simply fill in the form below with either a dummy or real customer review, and the system will:
1. **Preprocess** your review text (cleaning, tokenization, and stemming).
2. **Predict sentiment** (Positive or Negative) along with a confidence score.
3. **Identify the most relevant topic** associated with the review, based on the predicted sentiment.
Use this tool to simulate how Singapore Airlines can transform raw customer feedback into **structured, data-driven insights**.
"""
)
with st.form(key='SQ-sentiment-analysis'):
date = st.date_input("Review Date")
platform = st.selectbox('Review Platform', ('Mobile', 'Desktop'), index=0)
rating = st.number_input('Rating', min_value=0, max_value=5, value=3, step=1)
st.markdown('---')
text = st.text_input('Customer Review', value='--customer review--')
title = st.text_input('Review Title', value='--review title--')
vote = st.slider('Helpful Vote', min_value=0, max_value=200, value=50, step=1)
st.markdown('---')
submitted = st.form_submit_button('Predict')
if submitted:
st.markdown("---")
st.write("### Input Data")
data_inf = {
'published_date': date,
'published_platform': platform,
'rating': rating,
'type': 'Review',
'text': text,
'title': title,
'helpful_votes': vote
}
st.dataframe(pd.DataFrame([data_inf]))
# Preprocess (pakai kolom 'text')
processed = text_preprocessing(text)
seq = tokenizer.texts_to_sequences([processed])
padded = pad_sequences(seq, maxlen=max_len, padding="post", truncating="post")
# Sentiment Prediction
pred_probs = sentiment_model.predict(padded)
if pred_probs.shape[1] == 1:
# Binary sigmoid
p_pos = float(pred_probs[0][0])
p_neg = 1 - p_pos
if p_pos >= 0.5:
sentiment_label = "Positive"
confidence = p_pos
else:
sentiment_label = "Negative"
confidence = p_neg
else:
# Softmax
pred_class = np.argmax(pred_probs, axis=1)[0]
label_map = {0: "Negative", 1: "Positive"}
sentiment_label = label_map[pred_class]
confidence = float(pred_probs[0][pred_class])
# --- Sentiment Output with Color ---
color = "green" if sentiment_label == "Positive" else "red"
st.markdown(
f"<p style='font-size:22px; font-weight:bold; color:{color};'>"
f"Predicted Sentiment: {sentiment_label} "
f"(Confidence: {confidence:.2f})</p>",
unsafe_allow_html=True
)
# Topic Prediction
st.write("### Topic Modeling")
if sentiment_label == "Negative":
probs = topic_model_neg.transform([text])[0]
topic_id = int(np.argmax(probs))
topic_name = topic_labels_neg.get(topic_id, "Unknown Topic")
st.write("**Using Negative Model**")
else:
probs = topic_model_pos.transform([text])[0]
topic_id = int(np.argmax(probs))
topic_name = topic_labels_pos.get(topic_id, "Unknown Topic")
st.write("**Using Positive Model**")
# --- Topic Output with Color ---
st.markdown(
f"<p style='font-size:20px; font-weight:bold; color:{color};'>"
f"Topic {topic_id}: {topic_name}</p>",
unsafe_allow_html=True
)
# Probabilities tetap ditampilkan
st.write("**Probabilities:**", probs.tolist())