hanantonio commited on
Commit
d6da013
·
verified ·
1 Parent(s): d82d8c2

Upload 3 files

Browse files
Files changed (1) hide show
  1. src/prediction_compile.py +26 -12
src/prediction_compile.py CHANGED
@@ -1,4 +1,3 @@
1
- # prediction_compile.py
2
  # Import Libraries
3
  import streamlit as st
4
  import re
@@ -61,7 +60,7 @@ topic_model_neg, topic_model_pos = load_topic_models()
61
 
62
  max_len = params["max_len"]
63
 
64
- # --- Preprocessing Function ---
65
  negations = {"not", "no", "never"}
66
  stpwrds_en = set(stopwords.words("english")) - negations
67
  stemmer = PorterStemmer()
@@ -89,7 +88,9 @@ def text_preprocessing(text):
89
  tokens = [replacements.get(word, word) for word in tokens]
90
  tokens = [word for word in tokens if word not in stpwrds_en]
91
  tokens = [stemmer.stem(word) for word in tokens]
92
- return "emptytext" if len(tokens) == 0 else ' '.join(tokens)
 
 
93
 
94
  # --- Topic Labels ---
95
  topic_labels_neg = {
@@ -110,11 +111,19 @@ topic_labels_pos = {
110
 
111
  # --- Streamlit App ---
112
  def run():
 
113
  st.subheader("Sentiment & Topic Prediction for SQ Customer Reviews")
114
 
115
  st.markdown(
116
  """
117
- Enter a customer review below to predict sentiment and topic.
 
 
 
 
 
 
 
118
  """
119
  )
120
 
@@ -143,20 +152,24 @@ def run():
143
  }
144
  st.dataframe(pd.DataFrame([data_inf]))
145
 
146
-
147
- # Preprocess
148
  processed = text_preprocessing(text)
149
  seq = tokenizer.texts_to_sequences([processed])
150
  padded = pad_sequences(seq, maxlen=max_len, padding="post", truncating="post")
151
 
152
  # Sentiment Prediction
153
  pred_probs = sentiment_model.predict(padded)
154
- if pred_probs.shape[1] == 1:
 
155
  # Binary sigmoid
156
  p_pos = float(pred_probs[0][0])
157
  p_neg = 1 - p_pos
158
- sentiment_label = "Positive" if p_pos >= 0.5 else "Negative"
159
- confidence = max(p_pos, p_neg)
 
 
 
 
160
  else:
161
  # Softmax
162
  pred_class = np.argmax(pred_probs, axis=1)[0]
@@ -164,6 +177,7 @@ def run():
164
  sentiment_label = label_map[pred_class]
165
  confidence = float(pred_probs[0][pred_class])
166
 
 
167
  color = "green" if sentiment_label == "Positive" else "red"
168
  st.markdown(
169
  f"<p style='font-size:22px; font-weight:bold; color:{color};'>"
@@ -185,12 +199,12 @@ def run():
185
  topic_name = topic_labels_pos.get(topic_id, "Unknown Topic")
186
  st.write("**Using Positive Model**")
187
 
188
- # Output
189
  st.markdown(
190
  f"<p style='font-size:20px; font-weight:bold; color:{color};'>"
191
  f"Topic {topic_id}: {topic_name}</p>",
192
  unsafe_allow_html=True
193
  )
194
- st.write("**Probabilities:**", probs.tolist())
195
-
196
 
 
 
 
 
1
  # Import Libraries
2
  import streamlit as st
3
  import re
 
60
 
61
  max_len = params["max_len"]
62
 
63
+ # --- Preprocessing Function (NLTK) ---
64
  negations = {"not", "no", "never"}
65
  stpwrds_en = set(stopwords.words("english")) - negations
66
  stemmer = PorterStemmer()
 
88
  tokens = [replacements.get(word, word) for word in tokens]
89
  tokens = [word for word in tokens if word not in stpwrds_en]
90
  tokens = [stemmer.stem(word) for word in tokens]
91
+ if len(tokens) == 0:
92
+ return "emptytext"
93
+ return ' '.join(tokens)
94
 
95
  # --- Topic Labels ---
96
  topic_labels_neg = {
 
111
 
112
  # --- Streamlit App ---
113
  def run():
114
+ # st.title("ACRE - Automated Customer Review Analysis")
115
  st.subheader("Sentiment & Topic Prediction for SQ Customer Reviews")
116
 
117
  st.markdown(
118
  """
119
+ This section will help you understand how the **ACRE** system works.
120
+ Simply fill in the form below with either a dummy or real customer review, and the system will:
121
+
122
+ 1. **Preprocess** your review text (cleaning, tokenization, and stemming).
123
+ 2. **Predict sentiment** (Positive or Negative) along with a confidence score.
124
+ 3. **Identify the most relevant topic** associated with the review, based on the predicted sentiment.
125
+
126
+ Use this tool to simulate how Singapore Airlines can transform raw customer feedback into **structured, data-driven insights**.
127
  """
128
  )
129
 
 
152
  }
153
  st.dataframe(pd.DataFrame([data_inf]))
154
 
155
+ # Preprocess (pakai kolom 'text')
 
156
  processed = text_preprocessing(text)
157
  seq = tokenizer.texts_to_sequences([processed])
158
  padded = pad_sequences(seq, maxlen=max_len, padding="post", truncating="post")
159
 
160
  # Sentiment Prediction
161
  pred_probs = sentiment_model.predict(padded)
162
+
163
+ if pred_probs.shape[1] == 1:
164
  # Binary sigmoid
165
  p_pos = float(pred_probs[0][0])
166
  p_neg = 1 - p_pos
167
+ if p_pos >= 0.5:
168
+ sentiment_label = "Positive"
169
+ confidence = p_pos
170
+ else:
171
+ sentiment_label = "Negative"
172
+ confidence = p_neg
173
  else:
174
  # Softmax
175
  pred_class = np.argmax(pred_probs, axis=1)[0]
 
177
  sentiment_label = label_map[pred_class]
178
  confidence = float(pred_probs[0][pred_class])
179
 
180
+ # --- Sentiment Output with Color ---
181
  color = "green" if sentiment_label == "Positive" else "red"
182
  st.markdown(
183
  f"<p style='font-size:22px; font-weight:bold; color:{color};'>"
 
199
  topic_name = topic_labels_pos.get(topic_id, "Unknown Topic")
200
  st.write("**Using Positive Model**")
201
 
202
+ # --- Topic Output with Color ---
203
  st.markdown(
204
  f"<p style='font-size:20px; font-weight:bold; color:{color};'>"
205
  f"Topic {topic_id}: {topic_name}</p>",
206
  unsafe_allow_html=True
207
  )
 
 
208
 
209
+ # Probabilities tetap ditampilkan
210
+ st.write("**Probabilities:**", probs.tolist())