import streamlit as st import time import re import numpy as np import torch import torch.nn.functional as F from transformers import AutoTokenizer, AutoModelForSequenceClassification # ========================================== # ⚙️ 1. PAGE SETUP # ========================================== st.set_page_config(page_title="Spotify ABSA Analyzer", page_icon="🎵", layout="wide") # Custom CSS for clear visualization st.markdown( """ """, unsafe_allow_html=True, ) # ========================================== # 🧠 2. ASPECT DICTIONARY (Standard Structure) # ========================================== ASPECT_KEYWORDS = { "en": { "Audio Quality": [ "audio", "sound", "bass", "treble", "voice", "music quality", "volume", "noise", "hifi", "dolby", ], "Price & Premium": [ "price", "cost", "premium", "subscription", "expensive", "cheap", "worth", "pay", "money", "billing", ], "Ads (Iklan)": [ "ads", "advertisement", "commercial", "interrupt", "sponsor", "unskippable", ], "App Stability": [ "crash", "bug", "error", "slow", "loading", "lag", "force close", "glitch", "stuck", "freeze", ], "Content/Library": [ "song", "playlist", "library", "genre", "podcast", "lyrics", "collection", "track", "album", ], }, "id": { "Audio Quality": [ "suara", "audio", "bass", "bunyi", "kualitas", "jernih", "cempreng", "kresek", "volume", "vokal", "dolby", ], "Price & Premium": [ "harga", "bayar", "mahal", "murah", "premium", "langganan", "boros", "tagihan", "uang", "beli", "berbayar", ], "Ads (Iklan)": ["iklan", "ads", "promosi", "tonton", "komersial", "ganggu"], "App Stability": [ "crash", "bug", "error", "lemot", "keluar sendiri", "macet", "lag", "lelet", "berat", "rusak", "gagal", "force close", ], "Content/Library": [ "lagu", "musik", "playlist", "koleksi", "podcast", "lirik", "genre", "album", "artis", "katalog", ], }, } def get_aspects_detailed(text, lang="en"): """ Scans text for keywords. Returns list: [{'aspect': 'Audio', 'trigger': 'bass'}, ...] """ found_details = [] text_lower = text.lower() keywords = ASPECT_KEYWORDS.get(lang, ASPECT_KEYWORDS["en"]) for aspect_category, keyword_list in keywords.items(): for key in keyword_list: # Word boundary check (\b) to avoid partial matches if re.search(r"\b" + re.escape(key) + r"\b", text_lower): found_details.append({"aspect": aspect_category, "trigger": key}) return found_details # ========================================== # 🛠️ 3. MODEL LOADER # ========================================== @st.cache_resource def load_model_safe(lang_code): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if lang_code == "en": local_path = "Hamusssss12/spotify-absa-english" fallback_repo = "nlptown/bert-base-multilingual-uncased-sentiment" else: local_path = "Hamusssss12/spotify-absa-indonesian" fallback_repo = "indobenchmark/indobert-base-p1" try: # Try loading local fine-tuned model tokenizer = AutoTokenizer.from_pretrained(local_path) model = AutoModelForSequenceClassification.from_pretrained(local_path).to( device ) msg = "Status: Using Local Fine-Tuned Model" is_custom = True except: # Fallback to download from HuggingFace tokenizer = AutoTokenizer.from_pretrained(fallback_repo) model = AutoModelForSequenceClassification.from_pretrained(fallback_repo).to( device ) msg = f"Status: Using Generic Base Model ({fallback_repo})" is_custom = False return model, tokenizer, device, msg, is_custom def predict_sentiment(text, model, tokenizer, device): inputs = tokenizer( text, return_tensors="pt", truncation=True, padding=True, max_length=128 ).to(device) with torch.no_grad(): logits = model(**inputs).logits # Binary Classification if logits.shape[1] == 2: probs = F.softmax(logits, dim=1).cpu().numpy()[0] score = probs[1] # Probability of Positive # Multiclass Fallback (1-5 stars) else: probs = F.softmax(logits, dim=1).cpu().numpy()[0] score = np.sum(probs * np.array([0, 0.25, 0.5, 0.75, 1.0])) return score # ========================================== # 🖥️ 4. APP UI # ========================================== def main(): st.title("🎵 Spotify Review Inspector") st.markdown("Analyze reviews to identify sentiment and aspect triggers.") # --- SIDEBAR --- with st.sidebar: st.header("⚙️ Configuration") lang = st.selectbox("Select Language", ["Indonesian", "English"], index=0) lang_code = "id" if lang == "Indonesian" else "en" st.divider() # Load Model with st.spinner("Initializing AI Engine..."): model, tokenizer, device, msg, is_custom = load_model_safe(lang_code) if is_custom: st.success(msg) else: st.warning(msg) st.caption( "Tip: Ensure your `models` folder contains the extracted zip files for best results." ) st.divider() # DEBUG SECTION with st.expander("📖 View Dictionary (Debug)"): st.write(f"**Current Dictionary ({lang_code.upper()}):**") st.json(ASPECT_KEYWORDS[lang_code]) # --- MAIN INPUT --- default_text = ( "Suaranya jernih banget enak didenger, tapi sayang harga premiumnya kemahalan buat pelajar." if lang_code == "id" else "The audio is crystal clear, but the premium price is too expensive." ) user_input = st.text_area("Enter Review Text:", value=default_text, height=100) if st.button("Analyze Sentiment", type="primary"): st.markdown("### 📊 Analysis Results") # 1. Segmentation # Split logic: punctuation or contrast words (but, however, tapi, namun) if lang_code == "id": split_regex = r"[.!?;]|\btapi\b|\bnamun\b|\bsedangkan\b" else: split_regex = r"[.!?;]|\bbut\b|\bhowever\b|\bwhile\b" raw_segments = re.split(split_regex, user_input) segments = [s.strip() for s in raw_segments if s.strip()] if not segments: segments = [user_input] # 2. Process & Render for i, segment in enumerate(segments): # Predict score = predict_sentiment(segment, model, tokenizer, device) is_positive = score > 0.55 # Formatting sentiment_label = "POSITIVE" if is_positive else "NEGATIVE" border_class = "border-pos" if is_positive else "border-neg" badge_class = "badge-pos" if is_positive else "badge-neg" # Find Aspect Triggers details = get_aspects_detailed(segment, lang_code) # --- RENDER CARD --- st.markdown( f"""
Segment {i+1} {sentiment_label} ({score:.1%})
"{segment}"
""", unsafe_allow_html=True, ) # --- RENDER TRIGGERS --- if details: cols = ( st.columns(len(details)) if len(details) > 0 else [st.container()] ) badges_html = "" for det in details: badges_html += f"""
🏷️ {det['aspect']} (trigger: {det['trigger']})
""" st.markdown(f"
{badges_html}
", unsafe_allow_html=True) else: st.markdown( "No specific aspect keywords detected (General Sentiment)", unsafe_allow_html=True, ) st.markdown("
", unsafe_allow_html=True) if __name__ == "__main__": main()