Sentiment_Analysis_And_Topic_Modelling

Sleeping

App Files Files Community

hanantonio commited on Sep 11, 2025

Commit

d6da013

verified ·

1 Parent(s): d82d8c2

Upload 3 files

Browse files

Files changed (1) hide show

src/prediction_compile.py +26 -12

src/prediction_compile.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# prediction_compile.py
 # Import Libraries
 import streamlit as st
 import re
@@ -61,7 +60,7 @@ topic_model_neg, topic_model_pos = load_topic_models()
 max_len = params["max_len"]
-# --- Preprocessing Function ---
 negations = {"not", "no", "never"}
 stpwrds_en = set(stopwords.words("english")) - negations
 stemmer = PorterStemmer()
@@ -89,7 +88,9 @@ def text_preprocessing(text):
     tokens = [replacements.get(word, word) for word in tokens]
     tokens = [word for word in tokens if word not in stpwrds_en]
     tokens = [stemmer.stem(word) for word in tokens]
-    return "emptytext" if len(tokens) == 0 else ' '.join(tokens)
 # --- Topic Labels ---
 topic_labels_neg = {
@@ -110,11 +111,19 @@ topic_labels_pos = {
 # --- Streamlit App ---
 def run():
     st.subheader("Sentiment & Topic Prediction for SQ Customer Reviews")
     st.markdown(
     """
-    Enter a customer review below to predict sentiment and topic.
     """
     )
@@ -143,20 +152,24 @@ def run():
         }
         st.dataframe(pd.DataFrame([data_inf]))
-        # Preprocess
         processed = text_preprocessing(text)
         seq = tokenizer.texts_to_sequences([processed])
         padded = pad_sequences(seq, maxlen=max_len, padding="post", truncating="post")
         # Sentiment Prediction
         pred_probs = sentiment_model.predict(padded)
-        if pred_probs.shape[1] == 1:
             # Binary sigmoid
             p_pos = float(pred_probs[0][0])
             p_neg = 1 - p_pos
-            sentiment_label = "Positive" if p_pos >= 0.5 else "Negative"
-            confidence = max(p_pos, p_neg)
         else:
             # Softmax
             pred_class = np.argmax(pred_probs, axis=1)[0]
@@ -164,6 +177,7 @@ def run():
             sentiment_label = label_map[pred_class]
             confidence = float(pred_probs[0][pred_class])
         color = "green" if sentiment_label == "Positive" else "red"
         st.markdown(
             f"<p style='font-size:22px; font-weight:bold; color:{color};'>"
@@ -185,12 +199,12 @@ def run():
             topic_name = topic_labels_pos.get(topic_id, "Unknown Topic")
             st.write("**Using Positive Model**")
-        # Output
         st.markdown(
             f"<p style='font-size:20px; font-weight:bold; color:{color};'>"
             f"Topic {topic_id}: {topic_name}</p>",
             unsafe_allow_html=True
         )
-        st.write("**Probabilities:**", probs.tolist())

 # Import Libraries
 import streamlit as st
 import re
 max_len = params["max_len"]
+# --- Preprocessing Function (NLTK) ---
 negations = {"not", "no", "never"}
 stpwrds_en = set(stopwords.words("english")) - negations
 stemmer = PorterStemmer()
     tokens = [replacements.get(word, word) for word in tokens]
     tokens = [word for word in tokens if word not in stpwrds_en]
     tokens = [stemmer.stem(word) for word in tokens]
+    if len(tokens) == 0:
+        return "emptytext"
+    return ' '.join(tokens)
 # --- Topic Labels ---
 topic_labels_neg = {
 # --- Streamlit App ---
 def run():
+    # st.title("ACRE - Automated Customer Review Analysis")
     st.subheader("Sentiment & Topic Prediction for SQ Customer Reviews")
     st.markdown(
     """
+    This section will help you understand how the **ACRE** system works.
+    Simply fill in the form below with either a dummy or real customer review, and the system will:
+    1. **Preprocess** your review text (cleaning, tokenization, and stemming).
+    2. **Predict sentiment** (Positive or Negative) along with a confidence score.
+    3. **Identify the most relevant topic** associated with the review, based on the predicted sentiment.
+    Use this tool to simulate how Singapore Airlines can transform raw customer feedback into **structured, data-driven insights**.
     """
     )
         }
         st.dataframe(pd.DataFrame([data_inf]))
+        # Preprocess (pakai kolom 'text')
         processed = text_preprocessing(text)
         seq = tokenizer.texts_to_sequences([processed])
         padded = pad_sequences(seq, maxlen=max_len, padding="post", truncating="post")
         # Sentiment Prediction
         pred_probs = sentiment_model.predict(padded)
+        if pred_probs.shape[1] == 1:
             # Binary sigmoid
             p_pos = float(pred_probs[0][0])
             p_neg = 1 - p_pos
+            if p_pos >= 0.5:
+                sentiment_label = "Positive"
+                confidence = p_pos
+            else:
+                sentiment_label = "Negative"
+                confidence = p_neg
         else:
             # Softmax
             pred_class = np.argmax(pred_probs, axis=1)[0]
             sentiment_label = label_map[pred_class]
             confidence = float(pred_probs[0][pred_class])
+        # --- Sentiment Output with Color ---
         color = "green" if sentiment_label == "Positive" else "red"
         st.markdown(
             f"<p style='font-size:22px; font-weight:bold; color:{color};'>"
             topic_name = topic_labels_pos.get(topic_id, "Unknown Topic")
             st.write("**Using Positive Model**")
+        # --- Topic Output with Color ---
         st.markdown(
             f"<p style='font-size:20px; font-weight:bold; color:{color};'>"
             f"Topic {topic_id}: {topic_name}</p>",
             unsafe_allow_html=True
         )
+        # Probabilities tetap ditampilkan
+        st.write("**Probabilities:**", probs.tolist())