Sentiment_Analysis_And_Topic_Modelling

Sleeping

App Files Files Community

hanantonio commited on Sep 10, 2025

Commit

1dffecd

verified ·

1 Parent(s): 6a2eee4

Upload 3 files

Browse files

Files changed (4) hide show

.gitattributes +2 -0
src/Negative - Wordcloud.png +3 -0
src/Positive - Wordcloud.png +3 -0
src/prediction_compile.py +89 -48

.gitattributes CHANGED Viewed

@@ -39,3 +39,5 @@ src/Positive[[:space:]]-[[:space:]]Topic[[:space:]]Activities[[:space:]]Over[[:s
 src/src/best_model.keras filter=lfs diff=lfs merge=lfs -text
 src/src/Negative[[:space:]]-[[:space:]]Topic[[:space:]]Activities[[:space:]]Over[[:space:]]Time.png filter=lfs diff=lfs merge=lfs -text
 src/src/Positive[[:space:]]-[[:space:]]Topic[[:space:]]Activities[[:space:]]Over[[:space:]]Time.png filter=lfs diff=lfs merge=lfs -text

 src/src/best_model.keras filter=lfs diff=lfs merge=lfs -text
 src/src/Negative[[:space:]]-[[:space:]]Topic[[:space:]]Activities[[:space:]]Over[[:space:]]Time.png filter=lfs diff=lfs merge=lfs -text
 src/src/Positive[[:space:]]-[[:space:]]Topic[[:space:]]Activities[[:space:]]Over[[:space:]]Time.png filter=lfs diff=lfs merge=lfs -text
+src/Negative[[:space:]]-[[:space:]]Wordcloud.png filter=lfs diff=lfs merge=lfs -text
+src/Positive[[:space:]]-[[:space:]]Wordcloud.png filter=lfs diff=lfs merge=lfs -text

src/Negative - Wordcloud.png ADDED Viewed

Git LFS Details

SHA256: 7a899012e1cee65f0d4f7a137f8b5e25c3d3cc8fc09bb814a768bfc6c3bf68ea
Pointer size: 131 Bytes
Size of remote file: 282 kB

src/Positive - Wordcloud.png ADDED Viewed

Git LFS Details

SHA256: a0e0d0c36a41c744070fe168978381c744e40e717efa1d69926b48f1e89548b0
Pointer size: 131 Bytes
Size of remote file: 308 kB

src/prediction_compile.py CHANGED Viewed

@@ -1,9 +1,11 @@
 import streamlit as st
-import os
 import re
 import pickle
 import joblib
 import nltk
 import numpy as np
 import pandas as pd
 from tensorflow.keras.preprocessing.sequence import pad_sequences
@@ -11,34 +13,33 @@ from tensorflow import keras
 from nltk.corpus import stopwords
 from nltk.tokenize import word_tokenize
 from nltk.stem import PorterStemmer
-from huggingface_hub import hf_hub_download
-# =============================================
-# Setup NLTK with container-safe path
-# =============================================
 nltk_data_path = os.path.join("/tmp", "nltk_data")
 os.makedirs(nltk_data_path, exist_ok=True)
 nltk.data.path.append(nltk_data_path)
 nltk.download("stopwords", download_dir=nltk_data_path)
 nltk.download("punkt", download_dir=nltk_data_path)
-# =============================================
-# HF Hub repo
-# =============================================
-repo_id = "BesottenJenny/acre-sentiment-models"
-# =============================================
-# Cached loading functions
-# =============================================
 @st.cache_resource
 def load_sentiment_model():
-    path = hf_hub_download(repo_id=repo_id, filename="best_model.keras")
     return keras.models.load_model(path)
 @st.cache_resource
 def load_tokenizer_params():
-    tokenizer_path = hf_hub_download(repo_id=repo_id, filename="tokenizer.pkl")
-    params_path = hf_hub_download(repo_id=repo_id, filename="params.pkl")
     with open(tokenizer_path, "rb") as f:
         tokenizer = pickle.load(f)
     with open(params_path, "rb") as f:
@@ -47,21 +48,20 @@ def load_tokenizer_params():
 @st.cache_resource
 def load_topic_models():
-    neg_path = hf_hub_download(repo_id=repo_id, filename="fastopic_negative_model.pkl")
-    pos_path = hf_hub_download(repo_id=repo_id, filename="fastopic_positive_model.pkl")
     neg_model = joblib.load(neg_path)
     pos_model = joblib.load(pos_path)
     return neg_model, pos_model
-# Load models
 sentiment_model = load_sentiment_model()
 tokenizer, params = load_tokenizer_params()
 topic_model_neg, topic_model_pos = load_topic_models()
 max_len = params["max_len"]
-# =============================================
-# Text preprocessing
-# =============================================
 negations = {"not", "no", "never"}
 stpwrds_en = set(stopwords.words("english")) - negations
 stemmer = PorterStemmer()
@@ -82,7 +82,7 @@ replacements = {
 def text_preprocessing(text):
     text = text.lower()
-    text = re.sub(r"\n", " ", text)
     text = text.strip()
     text = re.sub(r'[^a-z0-9\s]', ' ', text)
     tokens = word_tokenize(text)
@@ -91,45 +91,86 @@ def text_preprocessing(text):
     tokens = [stemmer.stem(word) for word in tokens]
     return "emptytext" if len(tokens) == 0 else ' '.join(tokens)
-# =============================================
-# Streamlit App
-# =============================================
 def run():
-    st.title("ACRE - Automated Customer Review Analysis")
     st.subheader("Sentiment & Topic Prediction for SQ Customer Reviews")
     st.markdown(
-        "Enter a customer review below to predict sentiment and topic."
     )
-    with st.form(key='review_form'):
-        text = st.text_area("Customer Review", value="--customer review--")
-        submitted = st.form_submit_button("Predict")
     if submitted:
         processed = text_preprocessing(text)
         seq = tokenizer.texts_to_sequences([processed])
         padded = pad_sequences(seq, maxlen=max_len, padding="post", truncating="post")
-        # Sentiment
         pred_probs = sentiment_model.predict(padded)
-        pred_class = np.argmax(pred_probs, axis=1)[0]
-        confidence = float(np.max(pred_probs))
-        label_map = {0: "Negative", 1: "Positive"}
-        sentiment_label = label_map[pred_class]
-        st.write(f"**Sentiment:** {sentiment_label} (Confidence: {confidence:.2f})")
-        # Topic Modeling
-        result = topic_model_neg.transform([text]) if sentiment_label == "Negative" else topic_model_pos.transform([text])
-        if isinstance(result, tuple) and len(result) == 2:
-            topics, probs = result
-            st.write(f"**Topic ID(s):** {topics}")
-            st.write(f"**Probabilities:** {probs.tolist()}")
         else:
-            topics = result
-            st.write(f"**Topic ID(s):** {topics}")
-            st.write("**Probabilities:** Not available")
 if __name__ == "__main__":
     run()

+# prediction_compile.py
+# Import Libraries
 import streamlit as st
 import re
 import pickle
 import joblib
 import nltk
+import os
 import numpy as np
 import pandas as pd
 from tensorflow.keras.preprocessing.sequence import pad_sequences
 from nltk.corpus import stopwords
 from nltk.tokenize import word_tokenize
 from nltk.stem import PorterStemmer
+# --- Setup NLTK ---
 nltk_data_path = os.path.join("/tmp", "nltk_data")
 os.makedirs(nltk_data_path, exist_ok=True)
 nltk.data.path.append(nltk_data_path)
 nltk.download("stopwords", download_dir=nltk_data_path)
 nltk.download("punkt", download_dir=nltk_data_path)
+# --- Loading Info ---
+st.markdown(
+    '<p style="color:gray; font-size:14px; font-style:italic;">'
+    'Loading models and resources from local storage... '
+    'Please be patient and DO NOT refresh the page :)'
+    '</p>',
+    unsafe_allow_html=True
+)
+# --- Cached Loading Functions ---
 @st.cache_resource
 def load_sentiment_model():
+    path = "./src/best_model.keras"
     return keras.models.load_model(path)
 @st.cache_resource
 def load_tokenizer_params():
+    tokenizer_path = "./src/tokenizer.pkl"
+    params_path = "./src/params.pkl"
     with open(tokenizer_path, "rb") as f:
         tokenizer = pickle.load(f)
     with open(params_path, "rb") as f:
 @st.cache_resource
 def load_topic_models():
+    neg_path = "./src/fastopic_negative_model.pkl"
+    pos_path = "./src/fastopic_positive_model.pkl"
     neg_model = joblib.load(neg_path)
     pos_model = joblib.load(pos_path)
     return neg_model, pos_model
+# --- Load all resources once ---
 sentiment_model = load_sentiment_model()
 tokenizer, params = load_tokenizer_params()
 topic_model_neg, topic_model_pos = load_topic_models()
 max_len = params["max_len"]
+# --- Preprocessing Function ---
 negations = {"not", "no", "never"}
 stpwrds_en = set(stopwords.words("english")) - negations
 stemmer = PorterStemmer()
 def text_preprocessing(text):
     text = text.lower()
+    text = re.sub(r"\\n", " ", text)
     text = text.strip()
     text = re.sub(r'[^a-z0-9\s]', ' ', text)
     tokens = word_tokenize(text)
     tokens = [stemmer.stem(word) for word in tokens]
     return "emptytext" if len(tokens) == 0 else ' '.join(tokens)
+# --- Topic Labels ---
+topic_labels_neg = {
+    1: "meal and entertainment service",
+    2: "refund, cancellation, and booking tickets policy",
+    3: "business class/premium facility",
+    4: "baggage limits and price",
+    5: "hidden charges"
+}
+topic_labels_pos = {
+    1: "good food and crew service",
+    2: "excellent economy seat",
+    3: "refund and cancellation policy",
+    4: "meals quality",
+    5: "accommodation and assistance"
+}
+# --- Streamlit App ---
 def run():
     st.subheader("Sentiment & Topic Prediction for SQ Customer Reviews")
     st.markdown(
+    """
+    Enter a customer review below to predict sentiment and topic.
+    """
     )
+    with st.form(key='SQ-sentiment-analysis'):
+        text = st.text_input('Customer Review', value='--customer review--')
+        submitted = st.form_submit_button('Predict')
     if submitted:
+        # Preprocess
         processed = text_preprocessing(text)
         seq = tokenizer.texts_to_sequences([processed])
         padded = pad_sequences(seq, maxlen=max_len, padding="post", truncating="post")
+        # Sentiment Prediction
         pred_probs = sentiment_model.predict(padded)
+        if pred_probs.shape[1] == 1:
+            # Binary sigmoid
+            p_pos = float(pred_probs[0][0])
+            p_neg = 1 - p_pos
+            sentiment_label = "Positive" if p_pos >= 0.5 else "Negative"
+            confidence = max(p_pos, p_neg)
+        else:
+            # Softmax
+            pred_class = np.argmax(pred_probs, axis=1)[0]
+            label_map = {0: "Negative", 1: "Positive"}
+            sentiment_label = label_map[pred_class]
+            confidence = float(pred_probs[0][pred_class])
+        color = "green" if sentiment_label == "Positive" else "red"
+        st.markdown(
+            f"<p style='font-size:22px; font-weight:bold; color:{color};'>"
+            f"Predicted Sentiment: {sentiment_label} "
+            f"(Confidence: {confidence:.2f})</p>",
+            unsafe_allow_html=True
+        )
+        # Topic Prediction
+        st.write("### Topic Modeling")
+        if sentiment_label == "Negative":
+            probs = topic_model_neg.transform([text])[0]
+            topic_id = int(np.argmax(probs)) + 1
+            topic_name = topic_labels_neg.get(topic_id, "Unknown Topic")
+            st.write("**Using Negative Model**")
         else:
+            probs = topic_model_pos.transform([text])[0]
+            topic_id = int(np.argmax(probs)) + 1
+            topic_name = topic_labels_pos.get(topic_id, "Unknown Topic")
+            st.write("**Using Positive Model**")
+        # Output
+        st.markdown(
+            f"<p style='font-size:20px; font-weight:bold; color:{color};'>"
+            f"Topic {topic_id}: {topic_name}</p>",
+            unsafe_allow_html=True
+        )
+        st.write("**Probabilities:**", probs.tolist())
 if __name__ == "__main__":
     run()