# prediction_compile.py # Import Libraries import streamlit as st import re import pickle import joblib import nltk import os import numpy as np import pandas as pd from tensorflow.keras.preprocessing.sequence import pad_sequences from tensorflow import keras from nltk.corpus import stopwords from nltk.tokenize import word_tokenize from nltk.stem import PorterStemmer # --- Setup NLTK --- nltk_data_path = os.path.join("/tmp", "nltk_data") os.makedirs(nltk_data_path, exist_ok=True) nltk.data.path.append(nltk_data_path) nltk.download("stopwords", download_dir=nltk_data_path) nltk.download("punkt", download_dir=nltk_data_path) # --- Loading Info --- st.markdown( '
' 'Loading models and resources from local storage... ' 'Please be patient and DO NOT refresh the page :)' '
', unsafe_allow_html=True ) # --- Cached Loading Functions --- @st.cache_resource def load_sentiment_model(): path = "./src/best_model.keras" return keras.models.load_model(path) @st.cache_resource def load_tokenizer_params(): tokenizer_path = "./src/tokenizer.pkl" params_path = "./src/params.pkl" with open(tokenizer_path, "rb") as f: tokenizer = pickle.load(f) with open(params_path, "rb") as f: params = pickle.load(f) return tokenizer, params @st.cache_resource def load_topic_models(): neg_path = "./src/fastopic_negative_model.pkl" pos_path = "./src/fastopic_positive_model.pkl" neg_model = joblib.load(neg_path) pos_model = joblib.load(pos_path) return neg_model, pos_model # --- Load all resources once --- sentiment_model = load_sentiment_model() tokenizer, params = load_tokenizer_params() topic_model_neg, topic_model_pos = load_topic_models() max_len = params["max_len"] # --- Preprocessing Function --- negations = {"not", "no", "never"} stpwrds_en = set(stopwords.words("english")) - negations stemmer = PorterStemmer() replacements = { "sia": "sq", "flown": "fly", "flew": "fly", "alway": "always", "boarding": "board", "told": "tell", "said": "say", "booked": "book", "paid": "pay", "well": "good", "aircraft": "plane" } def text_preprocessing(text): text = text.lower() text = re.sub(r"\\n", " ", text) text = text.strip() text = re.sub(r'[^a-z0-9\s]', ' ', text) tokens = word_tokenize(text) tokens = [replacements.get(word, word) for word in tokens] tokens = [word for word in tokens if word not in stpwrds_en] tokens = [stemmer.stem(word) for word in tokens] return "emptytext" if len(tokens) == 0 else ' '.join(tokens) # --- Topic Labels --- topic_labels_neg = { 1: "meal and entertainment service", 2: "refund, cancellation, and booking tickets policy", 3: "business class/premium facility", 4: "baggage limits and price", 5: "hidden charges" } topic_labels_pos = { 1: "good food and crew service", 2: "excellent economy seat", 3: "refund and cancellation policy", 4: "meals quality", 5: "accommodation and assistance" } # --- Streamlit App --- def run(): st.subheader("Sentiment & Topic Prediction for SQ Customer Reviews") st.markdown( """ Enter a customer review below to predict sentiment and topic. """ ) with st.form(key='SQ-sentiment-analysis'): text = st.text_input('Customer Review', value='--customer review--') submitted = st.form_submit_button('Predict') if submitted: # Preprocess processed = text_preprocessing(text) seq = tokenizer.texts_to_sequences([processed]) padded = pad_sequences(seq, maxlen=max_len, padding="post", truncating="post") # Sentiment Prediction pred_probs = sentiment_model.predict(padded) if pred_probs.shape[1] == 1: # Binary sigmoid p_pos = float(pred_probs[0][0]) p_neg = 1 - p_pos sentiment_label = "Positive" if p_pos >= 0.5 else "Negative" confidence = max(p_pos, p_neg) else: # Softmax pred_class = np.argmax(pred_probs, axis=1)[0] label_map = {0: "Negative", 1: "Positive"} sentiment_label = label_map[pred_class] confidence = float(pred_probs[0][pred_class]) color = "green" if sentiment_label == "Positive" else "red" st.markdown( f"" f"Predicted Sentiment: {sentiment_label} " f"(Confidence: {confidence:.2f})
", unsafe_allow_html=True ) # Topic Prediction st.write("### Topic Modeling") if sentiment_label == "Negative": probs = topic_model_neg.transform([text])[0] topic_id = int(np.argmax(probs)) + 1 topic_name = topic_labels_neg.get(topic_id, "Unknown Topic") st.write("**Using Negative Model**") else: probs = topic_model_pos.transform([text])[0] topic_id = int(np.argmax(probs)) + 1 topic_name = topic_labels_pos.get(topic_id, "Unknown Topic") st.write("**Using Positive Model**") # Output st.markdown( f"" f"Topic {topic_id}: {topic_name}
", unsafe_allow_html=True ) st.write("**Probabilities:**", probs.tolist()) if __name__ == "__main__": run()