# Import Libraries import streamlit as st import re import pickle import joblib import nltk import os import numpy as np import pandas as pd from tensorflow.keras.preprocessing.sequence import pad_sequences from tensorflow import keras from nltk.corpus import stopwords from nltk.tokenize import word_tokenize from nltk.stem import PorterStemmer # --- Setup NLTK --- nltk_data_path = os.path.join("/tmp", "nltk_data") os.makedirs(nltk_data_path, exist_ok=True) nltk.data.path.append(nltk_data_path) nltk.download("stopwords", download_dir=nltk_data_path) nltk.download("punkt", download_dir=nltk_data_path) # --- Loading Info --- st.markdown( '
' 'Loading models and resources from local storage... ' 'Please be patient and DO NOT refresh the page :)' '
', unsafe_allow_html=True ) # --- Cached Loading Functions --- @st.cache_resource def load_sentiment_model(): path = "./src/best_model.keras" return keras.models.load_model(path) @st.cache_resource def load_tokenizer_params(): tokenizer_path = "./src/tokenizer.pkl" params_path = "./src/params.pkl" with open(tokenizer_path, "rb") as f: tokenizer = pickle.load(f) with open(params_path, "rb") as f: params = pickle.load(f) return tokenizer, params @st.cache_resource def load_topic_models(): neg_path = "./src/fastopic_negative_model_10.pkl" pos_path = "./src/fastopic_positive_model_10.pkl" neg_model = joblib.load(neg_path) pos_model = joblib.load(pos_path) return neg_model, pos_model # --- Load all resources once --- sentiment_model = load_sentiment_model() tokenizer, params = load_tokenizer_params() topic_model_neg, topic_model_pos = load_topic_models() max_len = params["max_len"] # --- Preprocessing Function (NLTK) --- negations = {"not", "no", "never"} stpwrds_en = set(stopwords.words("english")) - negations stemmer = PorterStemmer() replacements = { "sia": "sq", "flown": "fly", "flew": "fly", "alway": "always", "boarding": "board", "told": "tell", "said": "say", "booked": "book", "paid": "pay", "well": "good", "aircraft": "plane" } def text_preprocessing(text): text = text.lower() text = re.sub(r"\\n", " ", text) text = text.strip() text = re.sub(r'[^a-z0-9\s]', ' ', text) tokens = word_tokenize(text) tokens = [replacements.get(word, word) for word in tokens] tokens = [word for word in tokens if word not in stpwrds_en] tokens = [stemmer.stem(word) for word in tokens] if len(tokens) == 0: return "emptytext" return ' '.join(tokens) # --- Topic Labels --- topic_labels_neg = { 0: "Service Attitude", 1: "Ticket Price", 2: "In-Flight Accommodation", 3: "Boarding & Luggage Issues", 4: "Refund & Payment Difficulties", 5: "Meal Quality", 6: "Accessibility & Assistance", 7: "Safety & Hygiene", 8: "Seat Comfort", 9: "Quality of Amenities" } topic_labels_pos = { 0: "Destination-based compliment", 1: "Seat & cabin comfort", 2: "Destination-based compliment", 3: "Transit accommodation", 4: "Meals & in-flight services", 5: "Meals & in-flight services", 6: "Seat & cabin comfort / Aircraft condition", 7: "Destination-based compliment", 8: "Miscellaneous experiences", 9: "Destination-based compliment" } # --- Streamlit App --- def run(): # st.title("ACRE - Automated Customer Review Analysis") st.subheader("Sentiment & Topic Prediction for SQ Customer Reviews") st.markdown( """ This section will help you understand how the **ACRE** system works. Simply fill in the form below with either a dummy or real customer review, and the system will: 1. **Preprocess** your review text (cleaning, tokenization, and stemming). 2. **Predict sentiment** (Positive or Negative) along with a confidence score. 3. **Identify the most relevant topic** associated with the review, based on the predicted sentiment. Use this tool to simulate how Singapore Airlines can transform raw customer feedback into **structured, data-driven insights**. """ ) with st.form(key='SQ-sentiment-analysis'): date = st.date_input("Review Date") platform = st.selectbox('Review Platform', ('Mobile', 'Desktop'), index=0) rating = st.number_input('Rating', min_value=0, max_value=5, value=3, step=1) st.markdown('---') text = st.text_input('Customer Review', value='--customer review--') title = st.text_input('Review Title', value='--review title--') vote = st.slider('Helpful Vote', min_value=0, max_value=200, value=50, step=1) st.markdown('---') submitted = st.form_submit_button('Predict') if submitted: st.markdown("---") st.write("### Input Data") data_inf = { 'published_date': date, 'published_platform': platform, 'rating': rating, 'type': 'Review', 'text': text, 'title': title, 'helpful_votes': vote } st.dataframe(pd.DataFrame([data_inf])) # Preprocess (pakai kolom 'text') processed = text_preprocessing(text) seq = tokenizer.texts_to_sequences([processed]) padded = pad_sequences(seq, maxlen=max_len, padding="post", truncating="post") # Sentiment Prediction pred_probs = sentiment_model.predict(padded) if pred_probs.shape[1] == 1: # Binary sigmoid p_pos = float(pred_probs[0][0]) p_neg = 1 - p_pos if p_pos >= 0.5: sentiment_label = "Positive" confidence = p_pos else: sentiment_label = "Negative" confidence = p_neg else: # Softmax pred_class = np.argmax(pred_probs, axis=1)[0] label_map = {0: "Negative", 1: "Positive"} sentiment_label = label_map[pred_class] confidence = float(pred_probs[0][pred_class]) # --- Sentiment Output with Color --- color = "green" if sentiment_label == "Positive" else "red" st.markdown( f"" f"Predicted Sentiment: {sentiment_label} " f"(Confidence: {confidence:.2f})
", unsafe_allow_html=True ) # Topic Prediction st.write("### Topic Modeling") if sentiment_label == "Negative": probs = topic_model_neg.transform([text])[0] topic_id = int(np.argmax(probs)) topic_name = topic_labels_neg.get(topic_id, "Unknown Topic") st.write("**Using Negative Model**") else: probs = topic_model_pos.transform([text])[0] topic_id = int(np.argmax(probs)) topic_name = topic_labels_pos.get(topic_id, "Unknown Topic") st.write("**Using Positive Model**") # --- Topic Output with Color --- st.markdown( f"" f"Topic {topic_id}: {topic_name}
", unsafe_allow_html=True ) # Probabilities tetap ditampilkan st.write("**Probabilities:**", probs.tolist())