ABSA / src /streamlit_app.py
Hamusssss12's picture
Update src/streamlit_app.py
bd46b3c verified
import streamlit as st
import time
import re
import numpy as np
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForSequenceClassification
# ==========================================
# βš™οΈ 1. PAGE SETUP
# ==========================================
st.set_page_config(page_title="Spotify ABSA Analyzer", page_icon="🎡", layout="wide")
# Custom CSS for clear visualization
st.markdown(
"""
<style>
.main { background-color: #f8f9fa; }
/* Segment Box Styling */
.segment-box {
background-color: white;
padding: 20px;
border-radius: 10px;
box-shadow: 0 2px 5px rgba(0,0,0,0.05);
margin-bottom: 15px;
border-left: 6px solid #ccc;
}
/* Text inside the box */
.segment-text {
font-size: 1.15em;
font-family: sans-serif;
color: #212529;
margin-bottom: 12px;
}
/* Sentiment Badges */
.badge-pos {
background-color: #d4edda;
color: #155724;
padding: 4px 8px;
border-radius: 4px;
font-weight: bold;
font-size: 0.85em;
border: 1px solid #c3e6cb;
}
.badge-neg {
background-color: #f8d7da;
color: #721c24;
padding: 4px 8px;
border-radius: 4px;
font-weight: bold;
font-size: 0.85em;
border: 1px solid #f5c6cb;
}
/* Aspect Trigger Badges */
.trigger-badge {
display: inline-block;
background-color: #e2e6ea;
color: #495057;
padding: 4px 10px;
border-radius: 15px;
font-size: 0.85em;
margin-right: 8px;
margin-bottom: 5px;
border: 1px solid #ced4da;
}
.trigger-word {
background-color: #fff3cd;
padding: 0 3px;
border-radius: 3px;
font-weight: bold;
border-bottom: 1px solid #ffeeba;
}
/* Border Colors */
.border-pos { border-left-color: #28a745 !important; }
.border-neg { border-left-color: #dc3545 !important; }
</style>
""",
unsafe_allow_html=True,
)
# ==========================================
# 🧠 2. ASPECT DICTIONARY (Standard Structure)
# ==========================================
ASPECT_KEYWORDS = {
"en": {
"Audio Quality": [
"audio",
"sound",
"bass",
"treble",
"voice",
"music quality",
"volume",
"noise",
"hifi",
"dolby",
],
"Price & Premium": [
"price",
"cost",
"premium",
"subscription",
"expensive",
"cheap",
"worth",
"pay",
"money",
"billing",
],
"Ads (Iklan)": [
"ads",
"advertisement",
"commercial",
"interrupt",
"sponsor",
"unskippable",
],
"App Stability": [
"crash",
"bug",
"error",
"slow",
"loading",
"lag",
"force close",
"glitch",
"stuck",
"freeze",
],
"Content/Library": [
"song",
"playlist",
"library",
"genre",
"podcast",
"lyrics",
"collection",
"track",
"album",
],
},
"id": {
"Audio Quality": [
"suara",
"audio",
"bass",
"bunyi",
"kualitas",
"jernih",
"cempreng",
"kresek",
"volume",
"vokal",
"dolby",
],
"Price & Premium": [
"harga",
"bayar",
"mahal",
"murah",
"premium",
"langganan",
"boros",
"tagihan",
"uang",
"beli",
"berbayar",
],
"Ads (Iklan)": ["iklan", "ads", "promosi", "tonton", "komersial", "ganggu"],
"App Stability": [
"crash",
"bug",
"error",
"lemot",
"keluar sendiri",
"macet",
"lag",
"lelet",
"berat",
"rusak",
"gagal",
"force close",
],
"Content/Library": [
"lagu",
"musik",
"playlist",
"koleksi",
"podcast",
"lirik",
"genre",
"album",
"artis",
"katalog",
],
},
}
def get_aspects_detailed(text, lang="en"):
"""
Scans text for keywords.
Returns list: [{'aspect': 'Audio', 'trigger': 'bass'}, ...]
"""
found_details = []
text_lower = text.lower()
keywords = ASPECT_KEYWORDS.get(lang, ASPECT_KEYWORDS["en"])
for aspect_category, keyword_list in keywords.items():
for key in keyword_list:
# Word boundary check (\b) to avoid partial matches
if re.search(r"\b" + re.escape(key) + r"\b", text_lower):
found_details.append({"aspect": aspect_category, "trigger": key})
return found_details
# ==========================================
# πŸ› οΈ 3. MODEL LOADER
# ==========================================
@st.cache_resource
def load_model_safe(lang_code):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if lang_code == "en":
local_path = "Hamusssss12/spotify-absa-english"
fallback_repo = "nlptown/bert-base-multilingual-uncased-sentiment"
else:
local_path = "Hamusssss12/spotify-absa-indonesian"
fallback_repo = "indobenchmark/indobert-base-p1"
try:
# Try loading local fine-tuned model
tokenizer = AutoTokenizer.from_pretrained(local_path)
model = AutoModelForSequenceClassification.from_pretrained(local_path).to(
device
)
msg = "Status: Using Local Fine-Tuned Model"
is_custom = True
except:
# Fallback to download from HuggingFace
tokenizer = AutoTokenizer.from_pretrained(fallback_repo)
model = AutoModelForSequenceClassification.from_pretrained(fallback_repo).to(
device
)
msg = f"Status: Using Generic Base Model ({fallback_repo})"
is_custom = False
return model, tokenizer, device, msg, is_custom
def predict_sentiment(text, model, tokenizer, device):
inputs = tokenizer(
text, return_tensors="pt", truncation=True, padding=True, max_length=128
).to(device)
with torch.no_grad():
logits = model(**inputs).logits
# Binary Classification
if logits.shape[1] == 2:
probs = F.softmax(logits, dim=1).cpu().numpy()[0]
score = probs[1] # Probability of Positive
# Multiclass Fallback (1-5 stars)
else:
probs = F.softmax(logits, dim=1).cpu().numpy()[0]
score = np.sum(probs * np.array([0, 0.25, 0.5, 0.75, 1.0]))
return score
# ==========================================
# πŸ–₯️ 4. APP UI
# ==========================================
def main():
st.title("🎡 Spotify Review Inspector")
st.markdown("Analyze reviews to identify sentiment and aspect triggers.")
# --- SIDEBAR ---
with st.sidebar:
st.header("βš™οΈ Configuration")
lang = st.selectbox("Select Language", ["Indonesian", "English"], index=0)
lang_code = "id" if lang == "Indonesian" else "en"
st.divider()
# Load Model
with st.spinner("Initializing AI Engine..."):
model, tokenizer, device, msg, is_custom = load_model_safe(lang_code)
if is_custom:
st.success(msg)
else:
st.warning(msg)
st.caption(
"Tip: Ensure your `models` folder contains the extracted zip files for best results."
)
st.divider()
# DEBUG SECTION
with st.expander("πŸ“– View Dictionary (Debug)"):
st.write(f"**Current Dictionary ({lang_code.upper()}):**")
st.json(ASPECT_KEYWORDS[lang_code])
# --- MAIN INPUT ---
default_text = (
"Suaranya jernih banget enak didenger, tapi sayang harga premiumnya kemahalan buat pelajar."
if lang_code == "id"
else "The audio is crystal clear, but the premium price is too expensive."
)
user_input = st.text_area("Enter Review Text:", value=default_text, height=100)
if st.button("Analyze Sentiment", type="primary"):
st.markdown("### πŸ“Š Analysis Results")
# 1. Segmentation
# Split logic: punctuation or contrast words (but, however, tapi, namun)
if lang_code == "id":
split_regex = r"[.!?;]|\btapi\b|\bnamun\b|\bsedangkan\b"
else:
split_regex = r"[.!?;]|\bbut\b|\bhowever\b|\bwhile\b"
raw_segments = re.split(split_regex, user_input)
segments = [s.strip() for s in raw_segments if s.strip()]
if not segments:
segments = [user_input]
# 2. Process & Render
for i, segment in enumerate(segments):
# Predict
score = predict_sentiment(segment, model, tokenizer, device)
is_positive = score > 0.55
# Formatting
sentiment_label = "POSITIVE" if is_positive else "NEGATIVE"
border_class = "border-pos" if is_positive else "border-neg"
badge_class = "badge-pos" if is_positive else "badge-neg"
# Find Aspect Triggers
details = get_aspects_detailed(segment, lang_code)
# --- RENDER CARD ---
st.markdown(
f"""
<div class="segment-box {border_class}">
<div style="display:flex; align-items:center; margin-bottom:8px;">
<strong style="color:#888; margin-right:10px;">Segment {i+1}</strong>
<span class="{badge_class}">
{sentiment_label} ({score:.1%})
</span>
</div>
<div class="segment-text">"{segment}"</div>
""",
unsafe_allow_html=True,
)
# --- RENDER TRIGGERS ---
if details:
cols = (
st.columns(len(details)) if len(details) > 0 else [st.container()]
)
badges_html = ""
for det in details:
badges_html += f"""
<div class="trigger-badge">
<span>🏷️ {det['aspect']}</span>
<span style="font-size:0.8em; color:#666; margin-left:5px;">
(trigger: <span class="trigger-word">{det['trigger']}</span>)
</span>
</div>
"""
st.markdown(f"<div>{badges_html}</div>", unsafe_allow_html=True)
else:
st.markdown(
"<small style='color:#999; font-style:italic;'>No specific aspect keywords detected (General Sentiment)</small>",
unsafe_allow_html=True,
)
st.markdown("</div>", unsafe_allow_html=True)
if __name__ == "__main__":
main()