Hamusssss12 commited on
Commit
54ccd58
Β·
verified Β·
1 Parent(s): 5b14cd6

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +394 -0
  2. requirements.txt +7 -3
app.py ADDED
@@ -0,0 +1,394 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import time
3
+ import re
4
+ import numpy as np
5
+ import torch
6
+ import torch.nn.functional as F
7
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
8
+
9
+ # ==========================================
10
+ # βš™οΈ 1. PAGE SETUP
11
+ # ==========================================
12
+ st.set_page_config(page_title="Spotify ABSA Analyzer", page_icon="🎡", layout="wide")
13
+
14
+ # Custom CSS for clear visualization
15
+ st.markdown(
16
+ """
17
+ <style>
18
+ .main { background-color: #f8f9fa; }
19
+
20
+ /* Segment Box Styling */
21
+ .segment-box {
22
+ background-color: white;
23
+ padding: 20px;
24
+ border-radius: 10px;
25
+ box-shadow: 0 2px 5px rgba(0,0,0,0.05);
26
+ margin-bottom: 15px;
27
+ border-left: 6px solid #ccc;
28
+ }
29
+
30
+ /* Text inside the box */
31
+ .segment-text {
32
+ font-size: 1.15em;
33
+ font-family: sans-serif;
34
+ color: #212529;
35
+ margin-bottom: 12px;
36
+ }
37
+
38
+ /* Sentiment Badges */
39
+ .badge-pos {
40
+ background-color: #d4edda;
41
+ color: #155724;
42
+ padding: 4px 8px;
43
+ border-radius: 4px;
44
+ font-weight: bold;
45
+ font-size: 0.85em;
46
+ border: 1px solid #c3e6cb;
47
+ }
48
+ .badge-neg {
49
+ background-color: #f8d7da;
50
+ color: #721c24;
51
+ padding: 4px 8px;
52
+ border-radius: 4px;
53
+ font-weight: bold;
54
+ font-size: 0.85em;
55
+ border: 1px solid #f5c6cb;
56
+ }
57
+
58
+ /* Aspect Trigger Badges */
59
+ .trigger-badge {
60
+ display: inline-block;
61
+ background-color: #e2e6ea;
62
+ color: #495057;
63
+ padding: 4px 10px;
64
+ border-radius: 15px;
65
+ font-size: 0.85em;
66
+ margin-right: 8px;
67
+ margin-bottom: 5px;
68
+ border: 1px solid #ced4da;
69
+ }
70
+ .trigger-word {
71
+ background-color: #fff3cd;
72
+ padding: 0 3px;
73
+ border-radius: 3px;
74
+ font-weight: bold;
75
+ border-bottom: 1px solid #ffeeba;
76
+ }
77
+
78
+ /* Border Colors */
79
+ .border-pos { border-left-color: #28a745 !important; }
80
+ .border-neg { border-left-color: #dc3545 !important; }
81
+ </style>
82
+ """,
83
+ unsafe_allow_html=True,
84
+ )
85
+
86
+ # ==========================================
87
+ # 🧠 2. ASPECT DICTIONARY (Standard Structure)
88
+ # ==========================================
89
+
90
+ ASPECT_KEYWORDS = {
91
+ "en": {
92
+ "Audio Quality": [
93
+ "audio",
94
+ "sound",
95
+ "bass",
96
+ "treble",
97
+ "voice",
98
+ "music quality",
99
+ "volume",
100
+ "noise",
101
+ "hifi",
102
+ "dolby",
103
+ ],
104
+ "Price & Premium": [
105
+ "price",
106
+ "cost",
107
+ "premium",
108
+ "subscription",
109
+ "expensive",
110
+ "cheap",
111
+ "worth",
112
+ "pay",
113
+ "money",
114
+ "billing",
115
+ ],
116
+ "Ads (Iklan)": [
117
+ "ads",
118
+ "advertisement",
119
+ "commercial",
120
+ "interrupt",
121
+ "sponsor",
122
+ "unskippable",
123
+ ],
124
+ "App Stability": [
125
+ "crash",
126
+ "bug",
127
+ "error",
128
+ "slow",
129
+ "loading",
130
+ "lag",
131
+ "force close",
132
+ "glitch",
133
+ "stuck",
134
+ "freeze",
135
+ ],
136
+ "Content/Library": [
137
+ "song",
138
+ "playlist",
139
+ "library",
140
+ "genre",
141
+ "podcast",
142
+ "lyrics",
143
+ "collection",
144
+ "track",
145
+ "album",
146
+ ],
147
+ },
148
+ "id": {
149
+ "Audio Quality": [
150
+ "suara",
151
+ "audio",
152
+ "bass",
153
+ "bunyi",
154
+ "kualitas",
155
+ "jernih",
156
+ "cempreng",
157
+ "kresek",
158
+ "volume",
159
+ "vokal",
160
+ "dolby",
161
+ ],
162
+ "Price & Premium": [
163
+ "harga",
164
+ "bayar",
165
+ "mahal",
166
+ "murah",
167
+ "premium",
168
+ "langganan",
169
+ "boros",
170
+ "tagihan",
171
+ "uang",
172
+ "beli",
173
+ "berbayar",
174
+ ],
175
+ "Ads (Iklan)": ["iklan", "ads", "promosi", "tonton", "komersial", "ganggu"],
176
+ "App Stability": [
177
+ "crash",
178
+ "bug",
179
+ "error",
180
+ "lemot",
181
+ "keluar sendiri",
182
+ "macet",
183
+ "lag",
184
+ "lelet",
185
+ "berat",
186
+ "rusak",
187
+ "gagal",
188
+ "force close",
189
+ ],
190
+ "Content/Library": [
191
+ "lagu",
192
+ "musik",
193
+ "playlist",
194
+ "koleksi",
195
+ "podcast",
196
+ "lirik",
197
+ "genre",
198
+ "album",
199
+ "artis",
200
+ "katalog",
201
+ ],
202
+ },
203
+ }
204
+
205
+
206
+ def get_aspects_detailed(text, lang="en"):
207
+ """
208
+ Scans text for keywords.
209
+ Returns list: [{'aspect': 'Audio', 'trigger': 'bass'}, ...]
210
+ """
211
+ found_details = []
212
+ text_lower = text.lower()
213
+ keywords = ASPECT_KEYWORDS.get(lang, ASPECT_KEYWORDS["en"])
214
+
215
+ for aspect_category, keyword_list in keywords.items():
216
+ for key in keyword_list:
217
+ # Word boundary check (\b) to avoid partial matches
218
+ if re.search(r"\b" + re.escape(key) + r"\b", text_lower):
219
+ found_details.append({"aspect": aspect_category, "trigger": key})
220
+
221
+ return found_details
222
+
223
+
224
+ # ==========================================
225
+ # πŸ› οΈ 3. MODEL LOADER
226
+ # ==========================================
227
+
228
+
229
+ @st.cache_resource
230
+ def load_model_safe(lang_code):
231
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
232
+
233
+ if lang_code == "en":
234
+ local_path = "./models/transformer/english"
235
+ fallback_repo = "nlptown/bert-base-multilingual-uncased-sentiment"
236
+ else:
237
+ local_path = "./models/transformer/indonesian"
238
+ fallback_repo = "indobenchmark/indobert-base-p1"
239
+
240
+ try:
241
+ # Try loading local fine-tuned model
242
+ tokenizer = AutoTokenizer.from_pretrained(local_path)
243
+ model = AutoModelForSequenceClassification.from_pretrained(local_path).to(
244
+ device
245
+ )
246
+ msg = "Status: Using Local Fine-Tuned Model"
247
+ is_custom = True
248
+ except:
249
+ # Fallback to download from HuggingFace
250
+ tokenizer = AutoTokenizer.from_pretrained(fallback_repo)
251
+ model = AutoModelForSequenceClassification.from_pretrained(fallback_repo).to(
252
+ device
253
+ )
254
+ msg = f"Status: Using Generic Base Model ({fallback_repo})"
255
+ is_custom = False
256
+
257
+ return model, tokenizer, device, msg, is_custom
258
+
259
+
260
+ def predict_sentiment(text, model, tokenizer, device):
261
+ inputs = tokenizer(
262
+ text, return_tensors="pt", truncation=True, padding=True, max_length=128
263
+ ).to(device)
264
+ with torch.no_grad():
265
+ logits = model(**inputs).logits
266
+
267
+ # Binary Classification
268
+ if logits.shape[1] == 2:
269
+ probs = F.softmax(logits, dim=1).cpu().numpy()[0]
270
+ score = probs[1] # Probability of Positive
271
+ # Multiclass Fallback (1-5 stars)
272
+ else:
273
+ probs = F.softmax(logits, dim=1).cpu().numpy()[0]
274
+ score = np.sum(probs * np.array([0, 0.25, 0.5, 0.75, 1.0]))
275
+
276
+ return score
277
+
278
+
279
+ # ==========================================
280
+ # πŸ–₯️ 4. APP UI
281
+ # ==========================================
282
+
283
+
284
+ def main():
285
+ st.title("🎡 Spotify Review Inspector")
286
+ st.markdown("Analyze reviews to identify sentiment and aspect triggers.")
287
+
288
+ # --- SIDEBAR ---
289
+ with st.sidebar:
290
+ st.header("βš™οΈ Configuration")
291
+ lang = st.selectbox("Select Language", ["Indonesian", "English"], index=0)
292
+ lang_code = "id" if lang == "Indonesian" else "en"
293
+
294
+ st.divider()
295
+
296
+ # Load Model
297
+ with st.spinner("Initializing AI Engine..."):
298
+ model, tokenizer, device, msg, is_custom = load_model_safe(lang_code)
299
+
300
+ if is_custom:
301
+ st.success(msg)
302
+ else:
303
+ st.warning(msg)
304
+ st.caption(
305
+ "Tip: Ensure your `models` folder contains the extracted zip files for best results."
306
+ )
307
+
308
+ st.divider()
309
+
310
+ # DEBUG SECTION
311
+ with st.expander("πŸ“– View Dictionary (Debug)"):
312
+ st.write(f"**Current Dictionary ({lang_code.upper()}):**")
313
+ st.json(ASPECT_KEYWORDS[lang_code])
314
+
315
+ # --- MAIN INPUT ---
316
+ default_text = (
317
+ "Suaranya jernih banget enak didenger, tapi sayang harga premiumnya kemahalan buat pelajar."
318
+ if lang_code == "id"
319
+ else "The audio is crystal clear, but the premium price is too expensive."
320
+ )
321
+
322
+ user_input = st.text_area("Enter Review Text:", value=default_text, height=100)
323
+
324
+ if st.button("Analyze Sentiment", type="primary"):
325
+ st.markdown("### πŸ“Š Analysis Results")
326
+
327
+ # 1. Segmentation
328
+ # Split logic: punctuation or contrast words (but, however, tapi, namun)
329
+ if lang_code == "id":
330
+ split_regex = r"[.!?;]|\btapi\b|\bnamun\b|\bsedangkan\b"
331
+ else:
332
+ split_regex = r"[.!?;]|\bbut\b|\bhowever\b|\bwhile\b"
333
+
334
+ raw_segments = re.split(split_regex, user_input)
335
+ segments = [s.strip() for s in raw_segments if s.strip()]
336
+ if not segments:
337
+ segments = [user_input]
338
+
339
+ # 2. Process & Render
340
+ for i, segment in enumerate(segments):
341
+ # Predict
342
+ score = predict_sentiment(segment, model, tokenizer, device)
343
+ is_positive = score > 0.55
344
+
345
+ # Formatting
346
+ sentiment_label = "POSITIVE" if is_positive else "NEGATIVE"
347
+ border_class = "border-pos" if is_positive else "border-neg"
348
+ badge_class = "badge-pos" if is_positive else "badge-neg"
349
+
350
+ # Find Aspect Triggers
351
+ details = get_aspects_detailed(segment, lang_code)
352
+
353
+ # --- RENDER CARD ---
354
+ st.markdown(
355
+ f"""
356
+ <div class="segment-box {border_class}">
357
+ <div style="display:flex; align-items:center; margin-bottom:8px;">
358
+ <strong style="color:#888; margin-right:10px;">Segment {i+1}</strong>
359
+ <span class="{badge_class}">
360
+ {sentiment_label} ({score:.1%})
361
+ </span>
362
+ </div>
363
+ <div class="segment-text">"{segment}"</div>
364
+ """,
365
+ unsafe_allow_html=True,
366
+ )
367
+
368
+ # --- RENDER TRIGGERS ---
369
+ if details:
370
+ cols = (
371
+ st.columns(len(details)) if len(details) > 0 else [st.container()]
372
+ )
373
+ badges_html = ""
374
+ for det in details:
375
+ badges_html += f"""
376
+ <div class="trigger-badge">
377
+ <span>🏷️ {det['aspect']}</span>
378
+ <span style="font-size:0.8em; color:#666; margin-left:5px;">
379
+ (trigger: <span class="trigger-word">{det['trigger']}</span>)
380
+ </span>
381
+ </div>
382
+ """
383
+ st.markdown(f"<div>{badges_html}</div>", unsafe_allow_html=True)
384
+ else:
385
+ st.markdown(
386
+ "<small style='color:#999; font-style:italic;'>No specific aspect keywords detected (General Sentiment)</small>",
387
+ unsafe_allow_html=True,
388
+ )
389
+
390
+ st.markdown("</div>", unsafe_allow_html=True)
391
+
392
+
393
+ if __name__ == "__main__":
394
+ main()
requirements.txt CHANGED
@@ -1,3 +1,7 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
 
1
+ streamlit
2
+ torch
3
+ transformers
4
+ numpy
5
+ pandas
6
+ sastrawi
7
+ nltk