Spaces:

Hamusssss12
/

ABSA

Sleeping

App Files Files Community

Hamusssss12 commited on Dec 13, 2025

Commit

54ccd58

verified ·

1 Parent(s): 5b14cd6

Upload 2 files

Browse files

Files changed (2) hide show

app.py +394 -0
requirements.txt +7 -3

app.py ADDED Viewed

	@@ -0,0 +1,394 @@

+import streamlit as st
+import time
+import re
+import numpy as np
+import torch
+import torch.nn.functional as F
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+# ==========================================
+# ⚙️ 1. PAGE SETUP
+# ==========================================
+st.set_page_config(page_title="Spotify ABSA Analyzer", page_icon="🎵", layout="wide")
+# Custom CSS for clear visualization
+st.markdown(
+    """
+    <style>
+    .main { background-color: #f8f9fa; }
+    /* Segment Box Styling */
+    .segment-box {
+        background-color: white;
+        padding: 20px;
+        border-radius: 10px;
+        box-shadow: 0 2px 5px rgba(0,0,0,0.05);
+        margin-bottom: 15px;
+        border-left: 6px solid #ccc;
+    }
+    /* Text inside the box */
+    .segment-text {
+        font-size: 1.15em;
+        font-family: sans-serif;
+        color: #212529;
+        margin-bottom: 12px;
+    }
+    /* Sentiment Badges */
+    .badge-pos {
+        background-color: #d4edda;
+        color: #155724;
+        padding: 4px 8px;
+        border-radius: 4px;
+        font-weight: bold;
+        font-size: 0.85em;
+        border: 1px solid #c3e6cb;
+    }
+    .badge-neg {
+        background-color: #f8d7da;
+        color: #721c24;
+        padding: 4px 8px;
+        border-radius: 4px;
+        font-weight: bold;
+        font-size: 0.85em;
+        border: 1px solid #f5c6cb;
+    }
+    /* Aspect Trigger Badges */
+    .trigger-badge {
+        display: inline-block;
+        background-color: #e2e6ea;
+        color: #495057;
+        padding: 4px 10px;
+        border-radius: 15px;
+        font-size: 0.85em;
+        margin-right: 8px;
+        margin-bottom: 5px;
+        border: 1px solid #ced4da;
+    }
+    .trigger-word {
+        background-color: #fff3cd;
+        padding: 0 3px;
+        border-radius: 3px;
+        font-weight: bold;
+        border-bottom: 1px solid #ffeeba;
+    }
+    /* Border Colors */
+    .border-pos { border-left-color: #28a745 !important; }
+    .border-neg { border-left-color: #dc3545 !important; }
+    </style>
+""",
+    unsafe_allow_html=True,
+)
+# ==========================================
+# 🧠 2. ASPECT DICTIONARY (Standard Structure)
+# ==========================================
+ASPECT_KEYWORDS = {
+    "en": {
+        "Audio Quality": [
+            "audio",
+            "sound",
+            "bass",
+            "treble",
+            "voice",
+            "music quality",
+            "volume",
+            "noise",
+            "hifi",
+            "dolby",
+        ],
+        "Price & Premium": [
+            "price",
+            "cost",
+            "premium",
+            "subscription",
+            "expensive",
+            "cheap",
+            "worth",
+            "pay",
+            "money",
+            "billing",
+        ],
+        "Ads (Iklan)": [
+            "ads",
+            "advertisement",
+            "commercial",
+            "interrupt",
+            "sponsor",
+            "unskippable",
+        ],
+        "App Stability": [
+            "crash",
+            "bug",
+            "error",
+            "slow",
+            "loading",
+            "lag",
+            "force close",
+            "glitch",
+            "stuck",
+            "freeze",
+        ],
+        "Content/Library": [
+            "song",
+            "playlist",
+            "library",
+            "genre",
+            "podcast",
+            "lyrics",
+            "collection",
+            "track",
+            "album",
+        ],
+    },
+    "id": {
+        "Audio Quality": [
+            "suara",
+            "audio",
+            "bass",
+            "bunyi",
+            "kualitas",
+            "jernih",
+            "cempreng",
+            "kresek",
+            "volume",
+            "vokal",
+            "dolby",
+        ],
+        "Price & Premium": [
+            "harga",
+            "bayar",
+            "mahal",
+            "murah",
+            "premium",
+            "langganan",
+            "boros",
+            "tagihan",
+            "uang",
+            "beli",
+            "berbayar",
+        ],
+        "Ads (Iklan)": ["iklan", "ads", "promosi", "tonton", "komersial", "ganggu"],
+        "App Stability": [
+            "crash",
+            "bug",
+            "error",
+            "lemot",
+            "keluar sendiri",
+            "macet",
+            "lag",
+            "lelet",
+            "berat",
+            "rusak",
+            "gagal",
+            "force close",
+        ],
+        "Content/Library": [
+            "lagu",
+            "musik",
+            "playlist",
+            "koleksi",
+            "podcast",
+            "lirik",
+            "genre",
+            "album",
+            "artis",
+            "katalog",
+        ],
+    },
+}
+def get_aspects_detailed(text, lang="en"):
+    """
+    Scans text for keywords.
+    Returns list: [{'aspect': 'Audio', 'trigger': 'bass'}, ...]
+    """
+    found_details = []
+    text_lower = text.lower()
+    keywords = ASPECT_KEYWORDS.get(lang, ASPECT_KEYWORDS["en"])
+    for aspect_category, keyword_list in keywords.items():
+        for key in keyword_list:
+            # Word boundary check (\b) to avoid partial matches
+            if re.search(r"\b" + re.escape(key) + r"\b", text_lower):
+                found_details.append({"aspect": aspect_category, "trigger": key})
+    return found_details
+# ==========================================
+# 🛠️ 3. MODEL LOADER
+# ==========================================
+@st.cache_resource
+def load_model_safe(lang_code):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    if lang_code == "en":
+        local_path = "./models/transformer/english"
+        fallback_repo = "nlptown/bert-base-multilingual-uncased-sentiment"
+    else:
+        local_path = "./models/transformer/indonesian"
+        fallback_repo = "indobenchmark/indobert-base-p1"
+    try:
+        # Try loading local fine-tuned model
+        tokenizer = AutoTokenizer.from_pretrained(local_path)
+        model = AutoModelForSequenceClassification.from_pretrained(local_path).to(
+            device
+        )
+        msg = "Status: Using Local Fine-Tuned Model"
+        is_custom = True
+    except:
+        # Fallback to download from HuggingFace
+        tokenizer = AutoTokenizer.from_pretrained(fallback_repo)
+        model = AutoModelForSequenceClassification.from_pretrained(fallback_repo).to(
+            device
+        )
+        msg = f"Status: Using Generic Base Model ({fallback_repo})"
+        is_custom = False
+    return model, tokenizer, device, msg, is_custom
+def predict_sentiment(text, model, tokenizer, device):
+    inputs = tokenizer(
+        text, return_tensors="pt", truncation=True, padding=True, max_length=128
+    ).to(device)
+    with torch.no_grad():
+        logits = model(**inputs).logits
+        # Binary Classification
+        if logits.shape[1] == 2:
+            probs = F.softmax(logits, dim=1).cpu().numpy()[0]
+            score = probs[1]  # Probability of Positive
+        # Multiclass Fallback (1-5 stars)
+        else:
+            probs = F.softmax(logits, dim=1).cpu().numpy()[0]
+            score = np.sum(probs * np.array([0, 0.25, 0.5, 0.75, 1.0]))
+    return score
+# ==========================================
+# 🖥️ 4. APP UI
+# ==========================================
+def main():
+    st.title("🎵 Spotify Review Inspector")
+    st.markdown("Analyze reviews to identify sentiment and aspect triggers.")
+    # --- SIDEBAR ---
+    with st.sidebar:
+        st.header("⚙️ Configuration")
+        lang = st.selectbox("Select Language", ["Indonesian", "English"], index=0)
+        lang_code = "id" if lang == "Indonesian" else "en"
+        st.divider()
+        # Load Model
+        with st.spinner("Initializing AI Engine..."):
+            model, tokenizer, device, msg, is_custom = load_model_safe(lang_code)
+        if is_custom:
+            st.success(msg)
+        else:
+            st.warning(msg)
+            st.caption(
+                "Tip: Ensure your `models` folder contains the extracted zip files for best results."
+            )
+        st.divider()
+        # DEBUG SECTION
+        with st.expander("📖 View Dictionary (Debug)"):
+            st.write(f"**Current Dictionary ({lang_code.upper()}):**")
+            st.json(ASPECT_KEYWORDS[lang_code])
+    # --- MAIN INPUT ---
+    default_text = (
+        "Suaranya jernih banget enak didenger, tapi sayang harga premiumnya kemahalan buat pelajar."
+        if lang_code == "id"
+        else "The audio is crystal clear, but the premium price is too expensive."
+    )
+    user_input = st.text_area("Enter Review Text:", value=default_text, height=100)
+    if st.button("Analyze Sentiment", type="primary"):
+        st.markdown("### 📊 Analysis Results")
+        # 1. Segmentation
+        # Split logic: punctuation or contrast words (but, however, tapi, namun)
+        if lang_code == "id":
+            split_regex = r"[.!?;]|\btapi\b|\bnamun\b|\bsedangkan\b"
+        else:
+            split_regex = r"[.!?;]|\bbut\b|\bhowever\b|\bwhile\b"
+        raw_segments = re.split(split_regex, user_input)
+        segments = [s.strip() for s in raw_segments if s.strip()]
+        if not segments:
+            segments = [user_input]
+        # 2. Process & Render
+        for i, segment in enumerate(segments):
+            # Predict
+            score = predict_sentiment(segment, model, tokenizer, device)
+            is_positive = score > 0.55
+            # Formatting
+            sentiment_label = "POSITIVE" if is_positive else "NEGATIVE"
+            border_class = "border-pos" if is_positive else "border-neg"
+            badge_class = "badge-pos" if is_positive else "badge-neg"
+            # Find Aspect Triggers
+            details = get_aspects_detailed(segment, lang_code)
+            # --- RENDER CARD ---
+            st.markdown(
+                f"""
+            <div class="segment-box {border_class}">
+                <div style="display:flex; align-items:center; margin-bottom:8px;">
+                    <strong style="color:#888; margin-right:10px;">Segment {i+1}</strong>
+                    <span class="{badge_class}">
+                        {sentiment_label} ({score:.1%})
+                    </span>
+                </div>
+                <div class="segment-text">"{segment}"</div>
+            """,
+                unsafe_allow_html=True,
+            )
+            # --- RENDER TRIGGERS ---
+            if details:
+                cols = (
+                    st.columns(len(details)) if len(details) > 0 else [st.container()]
+                )
+                badges_html = ""
+                for det in details:
+                    badges_html += f"""
+                    <div class="trigger-badge">
+                        <span>🏷️ {det['aspect']}</span>
+                        <span style="font-size:0.8em; color:#666; margin-left:5px;">
+                            (trigger: <span class="trigger-word">{det['trigger']}</span>)
+                        </span>
+                    </div>
+                    """
+                st.markdown(f"<div>{badges_html}</div>", unsafe_allow_html=True)
+            else:
+                st.markdown(
+                    "<small style='color:#999; font-style:italic;'>No specific aspect keywords detected (General Sentiment)</small>",
+                    unsafe_allow_html=True,
+                )
+            st.markdown("</div>", unsafe_allow_html=True)
+if __name__ == "__main__":
+    main()

requirements.txt CHANGED Viewed

@@ -1,3 +1,7 @@
-altair
-pandas
-streamlit

+streamlit
+torch
+transformers
+numpy
+pandas
+sastrawi
+nltk