Spaces:
Sleeping
Sleeping
| # ============================================ | |
| # Import Libraries | |
| # ============================================ | |
| import streamlit as st | |
| import re | |
| import pickle | |
| import joblib | |
| import nltk | |
| import os | |
| import numpy as np | |
| import pandas as pd | |
| from tensorflow.keras.preprocessing.sequence import pad_sequences | |
| from tensorflow import keras | |
| from nltk.corpus import stopwords | |
| from nltk.tokenize import word_tokenize | |
| from nltk.stem import PorterStemmer | |
| from huggingface_hub import hf_hub_download | |
| # ============================================ | |
| # Setup NLTK | |
| # ============================================ | |
| nltk_data_path = os.path.join("/tmp", "nltk_data") | |
| os.makedirs(nltk_data_path, exist_ok=True) | |
| nltk.data.path.append(nltk_data_path) | |
| nltk.download("stopwords", download_dir=nltk_data_path) | |
| nltk.download("punkt", download_dir=nltk_data_path) | |
| # ============================================ | |
| # Loading Info | |
| # ============================================ | |
| st.markdown( | |
| '<p style="color:gray; font-size:14px; font-style:italic;">' | |
| 'Loading models (≈200 MB) and resources... this may take a while on first run. ' | |
| 'Please be patient and DO NOT refresh the page :)' | |
| '</p>', | |
| unsafe_allow_html=True | |
| ) | |
| # ============================================ | |
| # Hugging Face Hub Repo | |
| # ============================================ | |
| repo_id = "BesottenJenny/acre-sentiment-models" | |
| # ============================================ | |
| # Cached Loading Functions | |
| # ============================================ | |
| def load_sentiment_model(): | |
| path = hf_hub_download( | |
| repo_id=repo_id, | |
| filename="best_model.keras" | |
| ) | |
| return keras.models.load_model(path) | |
| def load_tokenizer_params(): | |
| tokenizer_path = hf_hub_download( | |
| repo_id=repo_id, | |
| filename="tokenizer.pkl" | |
| ) | |
| params_path = hf_hub_download( | |
| repo_id=repo_id, | |
| filename="params.pkl" | |
| ) | |
| with open(tokenizer_path, "rb") as f: | |
| tokenizer = pickle.load(f) | |
| with open(params_path, "rb") as f: | |
| params = pickle.load(f) | |
| return tokenizer, params | |
| def load_topic_models(): | |
| neg_path = hf_hub_download( | |
| repo_id=repo_id, | |
| filename="fastopic_negative_model.pkl" | |
| ) | |
| pos_path = hf_hub_download( | |
| repo_id=repo_id, | |
| filename="fastopic_positive_model.pkl" | |
| ) | |
| neg_model = joblib.load(neg_path) | |
| pos_model = joblib.load(pos_path) | |
| return neg_model, pos_model | |
| # ============================================ | |
| # Load all resources once | |
| # ============================================ | |
| sentiment_model = load_sentiment_model() | |
| tokenizer, params = load_tokenizer_params() | |
| topic_model_neg, topic_model_pos = load_topic_models() | |
| max_len = params["max_len"] | |
| # ============================================ | |
| # Preprocessing Function (NLTK) | |
| # ============================================ | |
| negations = {"not", "no", "never"} | |
| stpwrds_en = set(stopwords.words("english")) - negations | |
| stemmer = PorterStemmer() | |
| replacements = { | |
| "sia": "sq", | |
| "flown": "fly", | |
| "flew": "fly", | |
| "alway": "always", | |
| "boarding": "board", | |
| "told": "tell", | |
| "said": "say", | |
| "booked": "book", | |
| "paid": "pay", | |
| "well": "good", | |
| "aircraft": "plane" | |
| } | |
| def text_preprocessing(text): | |
| text = text.lower() | |
| text = re.sub(r"\\n", " ", text) | |
| text = text.strip() | |
| text = re.sub(r'[^a-z0-9\s]', ' ', text) | |
| tokens = word_tokenize(text) | |
| tokens = [replacements.get(word, word) for word in tokens] | |
| tokens = [word for word in tokens if word not in stpwrds_en] | |
| tokens = [stemmer.stem(word) for word in tokens] | |
| if len(tokens) == 0: | |
| return "emptytext" | |
| return ' '.join(tokens) | |
| # ============================================ | |
| # Streamlit App | |
| # ============================================ | |
| def run(): | |
| st.title("ACRE - Automated Customer Review Analysis") | |
| st.subheader("Sentiment & Topic Prediction for SQ Customer Reviews") | |
| st.markdown( | |
| """ | |
| This section will help you understand how the **ACRE** system works. | |
| Simply fill in the form below with either a dummy or real customer review, and the system will: | |
| 1. **Preprocess** your review text (cleaning, tokenization, and stemming). | |
| 2. **Predict sentiment** (Positive or Negative) along with a confidence score. | |
| 3. **Identify the most relevant topic** associated with the review, based on the predicted sentiment. | |
| Use this tool to simulate how Singapore Airlines can transform raw customer feedback into **structured, data-driven insights**. | |
| """ | |
| ) | |
| with st.form(key='SQ-sentiment-analysis'): | |
| date = st.date_input("Review Date") | |
| platform = st.selectbox('Review Platform', ('Mobile', 'Desktop'), index=0) | |
| rating = st.number_input('Rating', min_value=0, max_value=5, value=3, step=1) | |
| st.markdown('---') | |
| text = st.text_input('Customer Review', value='--customer review--') | |
| title = st.text_input('Review Title', value='--review title--') | |
| vote = st.slider('Helpful Vote', min_value=0, max_value=200, value=50, step=1) | |
| st.markdown('---') | |
| submitted = st.form_submit_button('Predict') | |
| if submitted: | |
| st.markdown("---") | |
| st.write("### Input Data") | |
| data_inf = { | |
| 'published_date': date, | |
| 'published_platform': platform, | |
| 'rating': rating, | |
| 'type': 'Review', | |
| 'text': text, | |
| 'title': title, | |
| 'helpful_votes': vote | |
| } | |
| st.dataframe(pd.DataFrame([data_inf])) | |
| # Preprocess | |
| processed = text_preprocessing(text) | |
| seq = tokenizer.texts_to_sequences([processed]) | |
| padded = pad_sequences(seq, maxlen=max_len, padding="post", truncating="post") | |
| # Sentiment Prediction | |
| pred_probs = sentiment_model.predict(padded) | |
| pred_class = np.argmax(pred_probs, axis=1)[0] | |
| confidence = float(np.max(pred_probs)) | |
| label_map = {0: "Negative", 1: "Positive"} | |
| sentiment_label = label_map[pred_class] | |
| st.write("### Sentiment Prediction") | |
| if sentiment_label == "Negative": | |
| st.markdown(f"<h3 style='color:red;'>Predicted Sentiment: {sentiment_label}</h3>", unsafe_allow_html=True) | |
| else: | |
| st.markdown(f"<h3 style='color:green;'>Predicted Sentiment: {sentiment_label}</h3>", unsafe_allow_html=True) | |
| st.write(f"**Confidence:** {confidence:.2f}") | |
| # Topic Prediction | |
| st.write("### Topic Modeling") | |
| if sentiment_label == "Negative": | |
| topics, probs = topic_model_neg.transform([text]) | |
| st.write("**Using Negative Model**") | |
| st.markdown(f"<p style='color:red;'>Topic ID(s): {topics}</p>", unsafe_allow_html=True) | |
| else: | |
| topics, probs = topic_model_pos.transform([text]) | |
| st.write("**Using Positive Model**") | |
| st.markdown(f"<p style='color:green;'>Topic ID(s): {topics}</p>", unsafe_allow_html=True) | |
| st.write(f"**Probabilities:** {probs.tolist()}") | |
| # ============================================ | |
| # Run App | |
| # ============================================ | |
| if __name__ == "__main__": | |
| run() |