Spaces:

Hamusssss12
/

ABSA

Sleeping

File size: 11,451 Bytes

import streamlit as st
import time
import re
import numpy as np
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# ==========================================
# ⚙️ 1. PAGE SETUP
# ==========================================
st.set_page_config(page_title="Spotify ABSA Analyzer", page_icon="🎵", layout="wide")

# Custom CSS for clear visualization
st.markdown(
    """
    <style>
    .main { background-color: #f8f9fa; }
    
    /* Segment Box Styling */
    .segment-box {
        background-color: white;
        padding: 20px;
        border-radius: 10px;
        box-shadow: 0 2px 5px rgba(0,0,0,0.05);
        margin-bottom: 15px;
        border-left: 6px solid #ccc;
    }
    
    /* Text inside the box */
    .segment-text {
        font-size: 1.15em;
        font-family: sans-serif;
        color: #212529;
        margin-bottom: 12px;
    }

    /* Sentiment Badges */
    .badge-pos {
        background-color: #d4edda;
        color: #155724;
        padding: 4px 8px;
        border-radius: 4px;
        font-weight: bold;
        font-size: 0.85em;
        border: 1px solid #c3e6cb;
    }
    .badge-neg {
        background-color: #f8d7da;
        color: #721c24;
        padding: 4px 8px;
        border-radius: 4px;
        font-weight: bold;
        font-size: 0.85em;
        border: 1px solid #f5c6cb;
    }

    /* Aspect Trigger Badges */
    .trigger-badge {
        display: inline-block;
        background-color: #e2e6ea;
        color: #495057;
        padding: 4px 10px;
        border-radius: 15px;
        font-size: 0.85em;
        margin-right: 8px;
        margin-bottom: 5px;
        border: 1px solid #ced4da;
    }
    .trigger-word {
        background-color: #fff3cd;
        padding: 0 3px;
        border-radius: 3px;
        font-weight: bold;
        border-bottom: 1px solid #ffeeba;
    }

    /* Border Colors */
    .border-pos { border-left-color: #28a745 !important; }
    .border-neg { border-left-color: #dc3545 !important; }
    </style>
""",
    unsafe_allow_html=True,
)

# ==========================================
# 🧠 2. ASPECT DICTIONARY (Standard Structure)
# ==========================================

ASPECT_KEYWORDS = {
    "en": {
        "Audio Quality": [
            "audio",
            "sound",
            "bass",
            "treble",
            "voice",
            "music quality",
            "volume",
            "noise",
            "hifi",
            "dolby",
        ],
        "Price & Premium": [
            "price",
            "cost",
            "premium",
            "subscription",
            "expensive",
            "cheap",
            "worth",
            "pay",
            "money",
            "billing",
        ],
        "Ads (Iklan)": [
            "ads",
            "advertisement",
            "commercial",
            "interrupt",
            "sponsor",
            "unskippable",
        ],
        "App Stability": [
            "crash",
            "bug",
            "error",
            "slow",
            "loading",
            "lag",
            "force close",
            "glitch",
            "stuck",
            "freeze",
        ],
        "Content/Library": [
            "song",
            "playlist",
            "library",
            "genre",
            "podcast",
            "lyrics",
            "collection",
            "track",
            "album",
        ],
    },
    "id": {
        "Audio Quality": [
            "suara",
            "audio",
            "bass",
            "bunyi",
            "kualitas",
            "jernih",
            "cempreng",
            "kresek",
            "volume",
            "vokal",
            "dolby",
        ],
        "Price & Premium": [
            "harga",
            "bayar",
            "mahal",
            "murah",
            "premium",
            "langganan",
            "boros",
            "tagihan",
            "uang",
            "beli",
            "berbayar",
        ],
        "Ads (Iklan)": ["iklan", "ads", "promosi", "tonton", "komersial", "ganggu"],
        "App Stability": [
            "crash",
            "bug",
            "error",
            "lemot",
            "keluar sendiri",
            "macet",
            "lag",
            "lelet",
            "berat",
            "rusak",
            "gagal",
            "force close",
        ],
        "Content/Library": [
            "lagu",
            "musik",
            "playlist",
            "koleksi",
            "podcast",
            "lirik",
            "genre",
            "album",
            "artis",
            "katalog",
        ],
    },
}


def get_aspects_detailed(text, lang="en"):
    """
    Scans text for keywords.
    Returns list: [{'aspect': 'Audio', 'trigger': 'bass'}, ...]
    """
    found_details = []
    text_lower = text.lower()
    keywords = ASPECT_KEYWORDS.get(lang, ASPECT_KEYWORDS["en"])

    for aspect_category, keyword_list in keywords.items():
        for key in keyword_list:
            # Word boundary check (\b) to avoid partial matches
            if re.search(r"\b" + re.escape(key) + r"\b", text_lower):
                found_details.append({"aspect": aspect_category, "trigger": key})

    return found_details


# ==========================================
# 🛠️ 3. MODEL LOADER
# ==========================================


@st.cache_resource
def load_model_safe(lang_code):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    if lang_code == "en":
        local_path = "Hamusssss12/spotify-absa-english"
        fallback_repo = "nlptown/bert-base-multilingual-uncased-sentiment"
    else:
        local_path = "Hamusssss12/spotify-absa-indonesian"
        fallback_repo = "indobenchmark/indobert-base-p1"

    try:
        # Try loading local fine-tuned model
        tokenizer = AutoTokenizer.from_pretrained(local_path)
        model = AutoModelForSequenceClassification.from_pretrained(local_path).to(
            device
        )
        msg = "Status: Using Local Fine-Tuned Model"
        is_custom = True
    except:
        # Fallback to download from HuggingFace
        tokenizer = AutoTokenizer.from_pretrained(fallback_repo)
        model = AutoModelForSequenceClassification.from_pretrained(fallback_repo).to(
            device
        )
        msg = f"Status: Using Generic Base Model ({fallback_repo})"
        is_custom = False

    return model, tokenizer, device, msg, is_custom


def predict_sentiment(text, model, tokenizer, device):
    inputs = tokenizer(
        text, return_tensors="pt", truncation=True, padding=True, max_length=128
    ).to(device)
    with torch.no_grad():
        logits = model(**inputs).logits

        # Binary Classification
        if logits.shape[1] == 2:
            probs = F.softmax(logits, dim=1).cpu().numpy()[0]
            score = probs[1]  # Probability of Positive
        # Multiclass Fallback (1-5 stars)
        else:
            probs = F.softmax(logits, dim=1).cpu().numpy()[0]
            score = np.sum(probs * np.array([0, 0.25, 0.5, 0.75, 1.0]))

    return score


# ==========================================
# 🖥️ 4. APP UI
# ==========================================


def main():
    st.title("🎵 Spotify Review Inspector")
    st.markdown("Analyze reviews to identify sentiment and aspect triggers.")

    # --- SIDEBAR ---
    with st.sidebar:
        st.header("⚙️ Configuration")
        lang = st.selectbox("Select Language", ["Indonesian", "English"], index=0)
        lang_code = "id" if lang == "Indonesian" else "en"

        st.divider()

        # Load Model
        with st.spinner("Initializing AI Engine..."):
            model, tokenizer, device, msg, is_custom = load_model_safe(lang_code)

        if is_custom:
            st.success(msg)
        else:
            st.warning(msg)
            st.caption(
                "Tip: Ensure your `models` folder contains the extracted zip files for best results."
            )

        st.divider()

        # DEBUG SECTION
        with st.expander("📖 View Dictionary (Debug)"):
            st.write(f"**Current Dictionary ({lang_code.upper()}):**")
            st.json(ASPECT_KEYWORDS[lang_code])

    # --- MAIN INPUT ---
    default_text = (
        "Suaranya jernih banget enak didenger, tapi sayang harga premiumnya kemahalan buat pelajar."
        if lang_code == "id"
        else "The audio is crystal clear, but the premium price is too expensive."
    )

    user_input = st.text_area("Enter Review Text:", value=default_text, height=100)

    if st.button("Analyze Sentiment", type="primary"):
        st.markdown("### 📊 Analysis Results")

        # 1. Segmentation
        # Split logic: punctuation or contrast words (but, however, tapi, namun)
        if lang_code == "id":
            split_regex = r"[.!?;]|\btapi\b|\bnamun\b|\bsedangkan\b"
        else:
            split_regex = r"[.!?;]|\bbut\b|\bhowever\b|\bwhile\b"

        raw_segments = re.split(split_regex, user_input)
        segments = [s.strip() for s in raw_segments if s.strip()]
        if not segments:
            segments = [user_input]

        # 2. Process & Render
        for i, segment in enumerate(segments):
            # Predict
            score = predict_sentiment(segment, model, tokenizer, device)
            is_positive = score > 0.55

            # Formatting
            sentiment_label = "POSITIVE" if is_positive else "NEGATIVE"
            border_class = "border-pos" if is_positive else "border-neg"
            badge_class = "badge-pos" if is_positive else "badge-neg"

            # Find Aspect Triggers
            details = get_aspects_detailed(segment, lang_code)

            # --- RENDER CARD ---
            st.markdown(
                f"""
            <div class="segment-box {border_class}">
                <div style="display:flex; align-items:center; margin-bottom:8px;">
                    <strong style="color:#888; margin-right:10px;">Segment {i+1}</strong>
                    <span class="{badge_class}">
                        {sentiment_label} ({score:.1%})
                    </span>
                </div>
                <div class="segment-text">"{segment}"</div>
            """,
                unsafe_allow_html=True,
            )

            # --- RENDER TRIGGERS ---
            if details:
                cols = (
                    st.columns(len(details)) if len(details) > 0 else [st.container()]
                )
                badges_html = ""
                for det in details:
                    badges_html += f"""
                    <div class="trigger-badge">
                        <span>🏷️ {det['aspect']}</span>
                        <span style="font-size:0.8em; color:#666; margin-left:5px;">
                            (trigger: <span class="trigger-word">{det['trigger']}</span>)
                        </span>
                    </div>
                    """
                st.markdown(f"<div>{badges_html}</div>", unsafe_allow_html=True)
            else:
                st.markdown(
                    "<small style='color:#999; font-style:italic;'>No specific aspect keywords detected (General Sentiment)</small>",
                    unsafe_allow_html=True,
                )

            st.markdown("</div>", unsafe_allow_html=True)


if __name__ == "__main__":
    main()