# ============================================ # Import Libraries # ============================================ import streamlit as st import re import pickle import joblib import nltk import os import numpy as np import pandas as pd from tensorflow.keras.preprocessing.sequence import pad_sequences from tensorflow import keras from nltk.corpus import stopwords from nltk.tokenize import word_tokenize from nltk.stem import PorterStemmer from huggingface_hub import hf_hub_download # ============================================ # Setup NLTK # ============================================ nltk_data_path = os.path.join("/tmp", "nltk_data") os.makedirs(nltk_data_path, exist_ok=True) nltk.data.path.append(nltk_data_path) nltk.download("stopwords", download_dir=nltk_data_path) nltk.download("punkt", download_dir=nltk_data_path) # ============================================ # Loading Info # ============================================ st.markdown( '
' 'Loading models (≈200 MB) and resources... this may take a while on first run. ' 'Please be patient and DO NOT refresh the page :)' '
', unsafe_allow_html=True ) # ============================================ # Hugging Face Hub Repo # ============================================ repo_id = "BesottenJenny/acre-sentiment-models" # ============================================ # Cached Loading Functions # ============================================ @st.cache_resource def load_sentiment_model(): path = hf_hub_download( repo_id=repo_id, filename="best_model.keras" ) return keras.models.load_model(path) @st.cache_resource def load_tokenizer_params(): tokenizer_path = hf_hub_download( repo_id=repo_id, filename="tokenizer.pkl" ) params_path = hf_hub_download( repo_id=repo_id, filename="params.pkl" ) with open(tokenizer_path, "rb") as f: tokenizer = pickle.load(f) with open(params_path, "rb") as f: params = pickle.load(f) return tokenizer, params @st.cache_resource def load_topic_models(): neg_path = hf_hub_download( repo_id=repo_id, filename="fastopic_negative_model.pkl" ) pos_path = hf_hub_download( repo_id=repo_id, filename="fastopic_positive_model.pkl" ) neg_model = joblib.load(neg_path) pos_model = joblib.load(pos_path) return neg_model, pos_model # ============================================ # Load all resources once # ============================================ sentiment_model = load_sentiment_model() tokenizer, params = load_tokenizer_params() topic_model_neg, topic_model_pos = load_topic_models() max_len = params["max_len"] # ============================================ # Preprocessing Function (NLTK) # ============================================ negations = {"not", "no", "never"} stpwrds_en = set(stopwords.words("english")) - negations stemmer = PorterStemmer() replacements = { "sia": "sq", "flown": "fly", "flew": "fly", "alway": "always", "boarding": "board", "told": "tell", "said": "say", "booked": "book", "paid": "pay", "well": "good", "aircraft": "plane" } def text_preprocessing(text): text = text.lower() text = re.sub(r"\\n", " ", text) text = text.strip() text = re.sub(r'[^a-z0-9\s]', ' ', text) tokens = word_tokenize(text) tokens = [replacements.get(word, word) for word in tokens] tokens = [word for word in tokens if word not in stpwrds_en] tokens = [stemmer.stem(word) for word in tokens] if len(tokens) == 0: return "emptytext" return ' '.join(tokens) # ============================================ # Streamlit App # ============================================ def run(): st.title("ACRE - Automated Customer Review Analysis") st.subheader("Sentiment & Topic Prediction for SQ Customer Reviews") st.markdown( """ This section will help you understand how the **ACRE** system works. Simply fill in the form below with either a dummy or real customer review, and the system will: 1. **Preprocess** your review text (cleaning, tokenization, and stemming). 2. **Predict sentiment** (Positive or Negative) along with a confidence score. 3. **Identify the most relevant topic** associated with the review, based on the predicted sentiment. Use this tool to simulate how Singapore Airlines can transform raw customer feedback into **structured, data-driven insights**. """ ) with st.form(key='SQ-sentiment-analysis'): date = st.date_input("Review Date") platform = st.selectbox('Review Platform', ('Mobile', 'Desktop'), index=0) rating = st.number_input('Rating', min_value=0, max_value=5, value=3, step=1) st.markdown('---') text = st.text_input('Customer Review', value='--customer review--') title = st.text_input('Review Title', value='--review title--') vote = st.slider('Helpful Vote', min_value=0, max_value=200, value=50, step=1) st.markdown('---') submitted = st.form_submit_button('Predict') if submitted: st.markdown("---") st.write("### Input Data") data_inf = { 'published_date': date, 'published_platform': platform, 'rating': rating, 'type': 'Review', 'text': text, 'title': title, 'helpful_votes': vote } st.dataframe(pd.DataFrame([data_inf])) # Preprocess processed = text_preprocessing(text) seq = tokenizer.texts_to_sequences([processed]) padded = pad_sequences(seq, maxlen=max_len, padding="post", truncating="post") # Sentiment Prediction pred_probs = sentiment_model.predict(padded) pred_class = np.argmax(pred_probs, axis=1)[0] confidence = float(np.max(pred_probs)) label_map = {0: "Negative", 1: "Positive"} sentiment_label = label_map[pred_class] st.write("### Sentiment Prediction") if sentiment_label == "Negative": st.markdown(f"Topic ID(s): {topics}
", unsafe_allow_html=True) else: topics, probs = topic_model_pos.transform([text]) st.write("**Using Positive Model**") st.markdown(f"Topic ID(s): {topics}
", unsafe_allow_html=True) st.write(f"**Probabilities:** {probs.tolist()}") # ============================================ # Run App # ============================================ if __name__ == "__main__": run()