Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import time | |
| import re | |
| import numpy as np | |
| import torch | |
| import torch.nn.functional as F | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| # ========================================== | |
| # βοΈ 1. PAGE SETUP | |
| # ========================================== | |
| st.set_page_config(page_title="Spotify ABSA Analyzer", page_icon="π΅", layout="wide") | |
| # Custom CSS for clear visualization | |
| st.markdown( | |
| """ | |
| <style> | |
| .main { background-color: #f8f9fa; } | |
| /* Segment Box Styling */ | |
| .segment-box { | |
| background-color: white; | |
| padding: 20px; | |
| border-radius: 10px; | |
| box-shadow: 0 2px 5px rgba(0,0,0,0.05); | |
| margin-bottom: 15px; | |
| border-left: 6px solid #ccc; | |
| } | |
| /* Text inside the box */ | |
| .segment-text { | |
| font-size: 1.15em; | |
| font-family: sans-serif; | |
| color: #212529; | |
| margin-bottom: 12px; | |
| } | |
| /* Sentiment Badges */ | |
| .badge-pos { | |
| background-color: #d4edda; | |
| color: #155724; | |
| padding: 4px 8px; | |
| border-radius: 4px; | |
| font-weight: bold; | |
| font-size: 0.85em; | |
| border: 1px solid #c3e6cb; | |
| } | |
| .badge-neg { | |
| background-color: #f8d7da; | |
| color: #721c24; | |
| padding: 4px 8px; | |
| border-radius: 4px; | |
| font-weight: bold; | |
| font-size: 0.85em; | |
| border: 1px solid #f5c6cb; | |
| } | |
| /* Aspect Trigger Badges */ | |
| .trigger-badge { | |
| display: inline-block; | |
| background-color: #e2e6ea; | |
| color: #495057; | |
| padding: 4px 10px; | |
| border-radius: 15px; | |
| font-size: 0.85em; | |
| margin-right: 8px; | |
| margin-bottom: 5px; | |
| border: 1px solid #ced4da; | |
| } | |
| .trigger-word { | |
| background-color: #fff3cd; | |
| padding: 0 3px; | |
| border-radius: 3px; | |
| font-weight: bold; | |
| border-bottom: 1px solid #ffeeba; | |
| } | |
| /* Border Colors */ | |
| .border-pos { border-left-color: #28a745 !important; } | |
| .border-neg { border-left-color: #dc3545 !important; } | |
| </style> | |
| """, | |
| unsafe_allow_html=True, | |
| ) | |
| # ========================================== | |
| # π§ 2. ASPECT DICTIONARY (Standard Structure) | |
| # ========================================== | |
| ASPECT_KEYWORDS = { | |
| "en": { | |
| "Audio Quality": [ | |
| "audio", | |
| "sound", | |
| "bass", | |
| "treble", | |
| "voice", | |
| "music quality", | |
| "volume", | |
| "noise", | |
| "hifi", | |
| "dolby", | |
| ], | |
| "Price & Premium": [ | |
| "price", | |
| "cost", | |
| "premium", | |
| "subscription", | |
| "expensive", | |
| "cheap", | |
| "worth", | |
| "pay", | |
| "money", | |
| "billing", | |
| ], | |
| "Ads (Iklan)": [ | |
| "ads", | |
| "advertisement", | |
| "commercial", | |
| "interrupt", | |
| "sponsor", | |
| "unskippable", | |
| ], | |
| "App Stability": [ | |
| "crash", | |
| "bug", | |
| "error", | |
| "slow", | |
| "loading", | |
| "lag", | |
| "force close", | |
| "glitch", | |
| "stuck", | |
| "freeze", | |
| ], | |
| "Content/Library": [ | |
| "song", | |
| "playlist", | |
| "library", | |
| "genre", | |
| "podcast", | |
| "lyrics", | |
| "collection", | |
| "track", | |
| "album", | |
| ], | |
| }, | |
| "id": { | |
| "Audio Quality": [ | |
| "suara", | |
| "audio", | |
| "bass", | |
| "bunyi", | |
| "kualitas", | |
| "jernih", | |
| "cempreng", | |
| "kresek", | |
| "volume", | |
| "vokal", | |
| "dolby", | |
| ], | |
| "Price & Premium": [ | |
| "harga", | |
| "bayar", | |
| "mahal", | |
| "murah", | |
| "premium", | |
| "langganan", | |
| "boros", | |
| "tagihan", | |
| "uang", | |
| "beli", | |
| "berbayar", | |
| ], | |
| "Ads (Iklan)": ["iklan", "ads", "promosi", "tonton", "komersial", "ganggu"], | |
| "App Stability": [ | |
| "crash", | |
| "bug", | |
| "error", | |
| "lemot", | |
| "keluar sendiri", | |
| "macet", | |
| "lag", | |
| "lelet", | |
| "berat", | |
| "rusak", | |
| "gagal", | |
| "force close", | |
| ], | |
| "Content/Library": [ | |
| "lagu", | |
| "musik", | |
| "playlist", | |
| "koleksi", | |
| "podcast", | |
| "lirik", | |
| "genre", | |
| "album", | |
| "artis", | |
| "katalog", | |
| ], | |
| }, | |
| } | |
| def get_aspects_detailed(text, lang="en"): | |
| """ | |
| Scans text for keywords. | |
| Returns list: [{'aspect': 'Audio', 'trigger': 'bass'}, ...] | |
| """ | |
| found_details = [] | |
| text_lower = text.lower() | |
| keywords = ASPECT_KEYWORDS.get(lang, ASPECT_KEYWORDS["en"]) | |
| for aspect_category, keyword_list in keywords.items(): | |
| for key in keyword_list: | |
| # Word boundary check (\b) to avoid partial matches | |
| if re.search(r"\b" + re.escape(key) + r"\b", text_lower): | |
| found_details.append({"aspect": aspect_category, "trigger": key}) | |
| return found_details | |
| # ========================================== | |
| # π οΈ 3. MODEL LOADER | |
| # ========================================== | |
| def load_model_safe(lang_code): | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| if lang_code == "en": | |
| local_path = "Hamusssss12/spotify-absa-english" | |
| fallback_repo = "nlptown/bert-base-multilingual-uncased-sentiment" | |
| else: | |
| local_path = "Hamusssss12/spotify-absa-indonesian" | |
| fallback_repo = "indobenchmark/indobert-base-p1" | |
| try: | |
| # Try loading local fine-tuned model | |
| tokenizer = AutoTokenizer.from_pretrained(local_path) | |
| model = AutoModelForSequenceClassification.from_pretrained(local_path).to( | |
| device | |
| ) | |
| msg = "Status: Using Local Fine-Tuned Model" | |
| is_custom = True | |
| except: | |
| # Fallback to download from HuggingFace | |
| tokenizer = AutoTokenizer.from_pretrained(fallback_repo) | |
| model = AutoModelForSequenceClassification.from_pretrained(fallback_repo).to( | |
| device | |
| ) | |
| msg = f"Status: Using Generic Base Model ({fallback_repo})" | |
| is_custom = False | |
| return model, tokenizer, device, msg, is_custom | |
| def predict_sentiment(text, model, tokenizer, device): | |
| inputs = tokenizer( | |
| text, return_tensors="pt", truncation=True, padding=True, max_length=128 | |
| ).to(device) | |
| with torch.no_grad(): | |
| logits = model(**inputs).logits | |
| # Binary Classification | |
| if logits.shape[1] == 2: | |
| probs = F.softmax(logits, dim=1).cpu().numpy()[0] | |
| score = probs[1] # Probability of Positive | |
| # Multiclass Fallback (1-5 stars) | |
| else: | |
| probs = F.softmax(logits, dim=1).cpu().numpy()[0] | |
| score = np.sum(probs * np.array([0, 0.25, 0.5, 0.75, 1.0])) | |
| return score | |
| # ========================================== | |
| # π₯οΈ 4. APP UI | |
| # ========================================== | |
| def main(): | |
| st.title("π΅ Spotify Review Inspector") | |
| st.markdown("Analyze reviews to identify sentiment and aspect triggers.") | |
| # --- SIDEBAR --- | |
| with st.sidebar: | |
| st.header("βοΈ Configuration") | |
| lang = st.selectbox("Select Language", ["Indonesian", "English"], index=0) | |
| lang_code = "id" if lang == "Indonesian" else "en" | |
| st.divider() | |
| # Load Model | |
| with st.spinner("Initializing AI Engine..."): | |
| model, tokenizer, device, msg, is_custom = load_model_safe(lang_code) | |
| if is_custom: | |
| st.success(msg) | |
| else: | |
| st.warning(msg) | |
| st.caption( | |
| "Tip: Ensure your `models` folder contains the extracted zip files for best results." | |
| ) | |
| st.divider() | |
| # DEBUG SECTION | |
| with st.expander("π View Dictionary (Debug)"): | |
| st.write(f"**Current Dictionary ({lang_code.upper()}):**") | |
| st.json(ASPECT_KEYWORDS[lang_code]) | |
| # --- MAIN INPUT --- | |
| default_text = ( | |
| "Suaranya jernih banget enak didenger, tapi sayang harga premiumnya kemahalan buat pelajar." | |
| if lang_code == "id" | |
| else "The audio is crystal clear, but the premium price is too expensive." | |
| ) | |
| user_input = st.text_area("Enter Review Text:", value=default_text, height=100) | |
| if st.button("Analyze Sentiment", type="primary"): | |
| st.markdown("### π Analysis Results") | |
| # 1. Segmentation | |
| # Split logic: punctuation or contrast words (but, however, tapi, namun) | |
| if lang_code == "id": | |
| split_regex = r"[.!?;]|\btapi\b|\bnamun\b|\bsedangkan\b" | |
| else: | |
| split_regex = r"[.!?;]|\bbut\b|\bhowever\b|\bwhile\b" | |
| raw_segments = re.split(split_regex, user_input) | |
| segments = [s.strip() for s in raw_segments if s.strip()] | |
| if not segments: | |
| segments = [user_input] | |
| # 2. Process & Render | |
| for i, segment in enumerate(segments): | |
| # Predict | |
| score = predict_sentiment(segment, model, tokenizer, device) | |
| is_positive = score > 0.55 | |
| # Formatting | |
| sentiment_label = "POSITIVE" if is_positive else "NEGATIVE" | |
| border_class = "border-pos" if is_positive else "border-neg" | |
| badge_class = "badge-pos" if is_positive else "badge-neg" | |
| # Find Aspect Triggers | |
| details = get_aspects_detailed(segment, lang_code) | |
| # --- RENDER CARD --- | |
| st.markdown( | |
| f""" | |
| <div class="segment-box {border_class}"> | |
| <div style="display:flex; align-items:center; margin-bottom:8px;"> | |
| <strong style="color:#888; margin-right:10px;">Segment {i+1}</strong> | |
| <span class="{badge_class}"> | |
| {sentiment_label} ({score:.1%}) | |
| </span> | |
| </div> | |
| <div class="segment-text">"{segment}"</div> | |
| """, | |
| unsafe_allow_html=True, | |
| ) | |
| # --- RENDER TRIGGERS --- | |
| if details: | |
| cols = ( | |
| st.columns(len(details)) if len(details) > 0 else [st.container()] | |
| ) | |
| badges_html = "" | |
| for det in details: | |
| badges_html += f""" | |
| <div class="trigger-badge"> | |
| <span>π·οΈ {det['aspect']}</span> | |
| <span style="font-size:0.8em; color:#666; margin-left:5px;"> | |
| (trigger: <span class="trigger-word">{det['trigger']}</span>) | |
| </span> | |
| </div> | |
| """ | |
| st.markdown(f"<div>{badges_html}</div>", unsafe_allow_html=True) | |
| else: | |
| st.markdown( | |
| "<small style='color:#999; font-style:italic;'>No specific aspect keywords detected (General Sentiment)</small>", | |
| unsafe_allow_html=True, | |
| ) | |
| st.markdown("</div>", unsafe_allow_html=True) | |
| if __name__ == "__main__": | |
| main() | |