| import streamlit as st
|
| import os
|
| import re
|
| import pickle
|
| import joblib
|
| import nltk
|
| import numpy as np
|
| import pandas as pd
|
| from tensorflow.keras.preprocessing.sequence import pad_sequences
|
| from tensorflow import keras
|
| from nltk.corpus import stopwords
|
| from nltk.tokenize import word_tokenize
|
| from nltk.stem import PorterStemmer
|
| from huggingface_hub import hf_hub_download
|
|
|
|
|
|
|
|
|
| nltk_data_path = os.path.join("/tmp", "nltk_data")
|
| os.makedirs(nltk_data_path, exist_ok=True)
|
| nltk.data.path.append(nltk_data_path)
|
| nltk.download("stopwords", download_dir=nltk_data_path)
|
| nltk.download("punkt", download_dir=nltk_data_path)
|
|
|
|
|
|
|
|
|
| repo_id = "BesottenJenny/acre-sentiment-models"
|
|
|
|
|
|
|
|
|
| @st.cache_resource
|
| def load_sentiment_model():
|
| path = hf_hub_download(repo_id=repo_id, filename="best_model.keras")
|
| return keras.models.load_model(path)
|
|
|
| @st.cache_resource
|
| def load_tokenizer_params():
|
| tokenizer_path = hf_hub_download(repo_id=repo_id, filename="tokenizer.pkl")
|
| params_path = hf_hub_download(repo_id=repo_id, filename="params.pkl")
|
| with open(tokenizer_path, "rb") as f:
|
| tokenizer = pickle.load(f)
|
| with open(params_path, "rb") as f:
|
| params = pickle.load(f)
|
| return tokenizer, params
|
|
|
| @st.cache_resource
|
| def load_topic_models():
|
| neg_path = hf_hub_download(repo_id=repo_id, filename="fastopic_negative_model.pkl")
|
| pos_path = hf_hub_download(repo_id=repo_id, filename="fastopic_positive_model.pkl")
|
| neg_model = joblib.load(neg_path)
|
| pos_model = joblib.load(pos_path)
|
| return neg_model, pos_model
|
|
|
|
|
| sentiment_model = load_sentiment_model()
|
| tokenizer, params = load_tokenizer_params()
|
| topic_model_neg, topic_model_pos = load_topic_models()
|
| max_len = params["max_len"]
|
|
|
|
|
|
|
|
|
| negations = {"not", "no", "never"}
|
| stpwrds_en = set(stopwords.words("english")) - negations
|
| stemmer = PorterStemmer()
|
|
|
| replacements = {
|
| "sia": "sq",
|
| "flown": "fly",
|
| "flew": "fly",
|
| "alway": "always",
|
| "boarding": "board",
|
| "told": "tell",
|
| "said": "say",
|
| "booked": "book",
|
| "paid": "pay",
|
| "well": "good",
|
| "aircraft": "plane"
|
| }
|
|
|
| def text_preprocessing(text):
|
| text = text.lower()
|
| text = re.sub(r"\n", " ", text)
|
| text = text.strip()
|
| text = re.sub(r'[^a-z0-9\s]', ' ', text)
|
| tokens = word_tokenize(text)
|
| tokens = [replacements.get(word, word) for word in tokens]
|
| tokens = [word for word in tokens if word not in stpwrds_en]
|
| tokens = [stemmer.stem(word) for word in tokens]
|
| return "emptytext" if len(tokens) == 0 else ' '.join(tokens)
|
|
|
|
|
|
|
|
|
| def run():
|
| st.title("ACRE - Automated Customer Review Analysis")
|
| st.subheader("Sentiment & Topic Prediction for SQ Customer Reviews")
|
| st.markdown(
|
| "Enter a customer review below to predict sentiment and topic."
|
| )
|
|
|
| with st.form(key='review_form'):
|
| text = st.text_area("Customer Review", value="--customer review--")
|
| submitted = st.form_submit_button("Predict")
|
|
|
| if submitted:
|
| processed = text_preprocessing(text)
|
| seq = tokenizer.texts_to_sequences([processed])
|
| padded = pad_sequences(seq, maxlen=max_len, padding="post", truncating="post")
|
|
|
|
|
| pred_probs = sentiment_model.predict(padded)
|
| pred_class = np.argmax(pred_probs, axis=1)[0]
|
| confidence = float(np.max(pred_probs))
|
| label_map = {0: "Negative", 1: "Positive"}
|
| sentiment_label = label_map[pred_class]
|
|
|
| st.write(f"**Sentiment:** {sentiment_label} (Confidence: {confidence:.2f})")
|
|
|
|
|
| if sentiment_label == "Negative":
|
| result = topic_model_neg.transform([text])
|
| else:
|
| result = topic_model_pos.transform([text])
|
|
|
| topics, probs = result
|
| st.write(f"**Topic ID(s):** {topics}")
|
| st.write(f"**Probabilities:** {probs.tolist()}")
|
|
|
| if __name__ == "__main__":
|
| run()
|
|
|