| |
|
| |
|
| |
|
| | import streamlit as st
|
| | import re
|
| | import pickle
|
| | import joblib
|
| | import nltk
|
| | import os
|
| | import numpy as np
|
| | import pandas as pd
|
| | from tensorflow.keras.preprocessing.sequence import pad_sequences
|
| | from tensorflow import keras
|
| | from nltk.corpus import stopwords
|
| | from nltk.tokenize import word_tokenize
|
| | from nltk.stem import PorterStemmer
|
| | from huggingface_hub import hf_hub_download
|
| |
|
| |
|
| |
|
| |
|
| | nltk_data_path = os.path.join("/tmp", "nltk_data")
|
| | os.makedirs(nltk_data_path, exist_ok=True)
|
| | nltk.data.path.append(nltk_data_path)
|
| | nltk.download("stopwords", download_dir=nltk_data_path)
|
| | nltk.download("punkt", download_dir=nltk_data_path)
|
| |
|
| |
|
| |
|
| |
|
| | st.markdown(
|
| | '<p style="color:gray; font-size:14px; font-style:italic;">'
|
| | 'Loading models (≈200 MB) and resources... this may take a while on first run. '
|
| | 'Please be patient and DO NOT refresh the page :)'
|
| | '</p>',
|
| | unsafe_allow_html=True
|
| | )
|
| |
|
| |
|
| |
|
| |
|
| | repo_id = "BesottenJenny/acre-sentiment-models"
|
| |
|
| |
|
| |
|
| |
|
| | @st.cache_resource
|
| | def load_sentiment_model():
|
| | path = hf_hub_download(repo_id=repo_id, filename="best_model.keras")
|
| | return keras.models.load_model(path)
|
| |
|
| | @st.cache_resource
|
| | def load_tokenizer_params():
|
| | tokenizer_path = hf_hub_download(repo_id=repo_id, filename="tokenizer.pkl")
|
| | params_path = hf_hub_download(repo_id=repo_id, filename="params.pkl")
|
| | with open(tokenizer_path, "rb") as f:
|
| | tokenizer = pickle.load(f)
|
| | with open(params_path, "rb") as f:
|
| | params = pickle.load(f)
|
| | return tokenizer, params
|
| |
|
| | @st.cache_resource
|
| | def load_topic_models():
|
| | neg_path = hf_hub_download(repo_id=repo_id, filename="fastopic_negative_model.pkl")
|
| | pos_path = hf_hub_download(repo_id=repo_id, filename="fastopic_positive_model.pkl")
|
| | neg_model = joblib.load(neg_path)
|
| | pos_model = joblib.load(pos_path)
|
| | return neg_model, pos_model
|
| |
|
| |
|
| |
|
| |
|
| | sentiment_model = load_sentiment_model()
|
| | tokenizer, params = load_tokenizer_params()
|
| | topic_model_neg, topic_model_pos = load_topic_models()
|
| |
|
| | max_len = params["max_len"]
|
| |
|
| |
|
| |
|
| |
|
| | negations = {"not", "no", "never"}
|
| | stpwrds_en = set(stopwords.words("english")) - negations
|
| | stemmer = PorterStemmer()
|
| |
|
| | replacements = {
|
| | "sia": "sq",
|
| | "flown": "fly",
|
| | "flew": "fly",
|
| | "alway": "always",
|
| | "boarding": "board",
|
| | "told": "tell",
|
| | "said": "say",
|
| | "booked": "book",
|
| | "paid": "pay",
|
| | "well": "good",
|
| | "aircraft": "plane"
|
| | }
|
| |
|
| | def text_preprocessing(text):
|
| | text = text.lower()
|
| | text = re.sub(r"\\n", " ", text)
|
| | text = text.strip()
|
| | text = re.sub(r'[^a-z0-9\s]', ' ', text)
|
| | tokens = word_tokenize(text)
|
| | tokens = [replacements.get(word, word) for word in tokens]
|
| | tokens = [word for word in tokens if word not in stpwrds_en]
|
| | tokens = [stemmer.stem(word) for word in tokens]
|
| | if len(tokens) == 0:
|
| | return "emptytext"
|
| | return ' '.join(tokens)
|
| |
|
| |
|
| |
|
| |
|
| | def run():
|
| | st.title("ACRE - Automated Customer Review Analysis")
|
| | st.subheader("Sentiment & Topic Prediction for SQ Customer Reviews")
|
| | st.markdown(
|
| | """
|
| | This section will help you understand how the **ACRE** system works.
|
| | Simply fill in the form below with either a dummy or real customer review, and the system will:
|
| |
|
| | 1. **Preprocess** your review text (cleaning, tokenization, and stemming).
|
| | 2. **Predict sentiment** (Positive or Negative) along with a confidence score.
|
| | 3. **Identify the most relevant topic** associated with the review, based on the predicted sentiment.
|
| |
|
| | Use this tool to simulate how Singapore Airlines can transform raw customer feedback into **structured, data-driven insights**.
|
| | """
|
| | )
|
| |
|
| | with st.form(key='SQ-sentiment-analysis'):
|
| | date = st.date_input("Review Date")
|
| | platform = st.selectbox('Review Platform', ('Mobile', 'Desktop'), index=0)
|
| | rating = st.number_input('Rating', min_value=0, max_value=5, value=3, step=1)
|
| | st.markdown('---')
|
| | text = st.text_input('Customer Review', value='--customer review--')
|
| | title = st.text_input('Review Title', value='--review title--')
|
| | vote = st.slider('Helpful Vote', min_value=0, max_value=200, value=50, step=1)
|
| | st.markdown('---')
|
| | submitted = st.form_submit_button('Predict')
|
| |
|
| | if submitted:
|
| | st.markdown("---")
|
| | st.write("### Input Data")
|
| | data_inf = {
|
| | 'published_date': date,
|
| | 'published_platform': platform,
|
| | 'rating': rating,
|
| | 'type': 'Review',
|
| | 'text': text,
|
| | 'title': title,
|
| | 'helpful_votes': vote
|
| | }
|
| | st.dataframe(pd.DataFrame([data_inf]))
|
| |
|
| |
|
| | processed = text_preprocessing(text)
|
| | seq = tokenizer.texts_to_sequences([processed])
|
| | padded = pad_sequences(seq, maxlen=max_len, padding="post", truncating="post")
|
| |
|
| |
|
| | pred_probs = sentiment_model.predict(padded)
|
| | pred_class = np.argmax(pred_probs, axis=1)[0]
|
| | confidence = float(np.max(pred_probs))
|
| |
|
| | label_map = {0: "Negative", 1: "Positive"}
|
| | sentiment_label = label_map[pred_class]
|
| |
|
| | st.write("### Sentiment Prediction")
|
| | if sentiment_label == "Negative":
|
| | st.markdown(f"<h3 style='color:red;'>Predicted Sentiment: {sentiment_label}</h3>", unsafe_allow_html=True)
|
| | else:
|
| | st.markdown(f"<h3 style='color:green;'>Predicted Sentiment: {sentiment_label}</h3>", unsafe_allow_html=True)
|
| | st.write(f"**Confidence:** {confidence:.2f}")
|
| |
|
| |
|
| | st.write("### Topic Modeling")
|
| | if sentiment_label == "Negative":
|
| | topics, probs = topic_model_neg.transform([text])
|
| | st.write("**Using Negative Model**")
|
| | st.markdown(f"<p style='color:red;'>Topic ID(s): {topics}</p>", unsafe_allow_html=True)
|
| | else:
|
| | topics, probs = topic_model_pos.transform([text])
|
| | st.write("**Using Positive Model**")
|
| | st.markdown(f"<p style='color:green;'>Topic ID(s): {topics}</p>", unsafe_allow_html=True)
|
| |
|
| | st.write(f"**Probabilities:** {probs.tolist()}")
|
| |
|
| |
|
| |
|
| |
|
| | if __name__ == "__main__":
|
| | run() |