Spaces:

Hamusssss12
/

ABSA

Sleeping

App Files Files Community

ABSA / src /streamlit_app.py

Hamusssss12

Update src/streamlit_app.py

bd46b3c verified 5 months ago

raw

history blame contribute delete

11.5 kB

	import streamlit as st
	import time
	import re
	import numpy as np
	import torch
	import torch.nn.functional as F
	from transformers import AutoTokenizer, AutoModelForSequenceClassification

	# ==========================================
	# ⚙️ 1. PAGE SETUP
	# ==========================================
	st.set_page_config(page_title="Spotify ABSA Analyzer", page_icon="🎵", layout="wide")

	# Custom CSS for clear visualization
	st.markdown(
	"""
	<style>
	.main { background-color: #f8f9fa; }

	/* Segment Box Styling */
	.segment-box {
	background-color: white;
	padding: 20px;
	border-radius: 10px;
	box-shadow: 0 2px 5px rgba(0,0,0,0.05);
	margin-bottom: 15px;
	border-left: 6px solid #ccc;
	}

	/* Text inside the box */
	.segment-text {
	font-size: 1.15em;
	font-family: sans-serif;
	color: #212529;
	margin-bottom: 12px;
	}

	/* Sentiment Badges */
	.badge-pos {
	background-color: #d4edda;
	color: #155724;
	padding: 4px 8px;
	border-radius: 4px;
	font-weight: bold;
	font-size: 0.85em;
	border: 1px solid #c3e6cb;
	}
	.badge-neg {
	background-color: #f8d7da;
	color: #721c24;
	padding: 4px 8px;
	border-radius: 4px;
	font-weight: bold;
	font-size: 0.85em;
	border: 1px solid #f5c6cb;
	}

	/* Aspect Trigger Badges */
	.trigger-badge {
	display: inline-block;
	background-color: #e2e6ea;
	color: #495057;
	padding: 4px 10px;
	border-radius: 15px;
	font-size: 0.85em;
	margin-right: 8px;
	margin-bottom: 5px;
	border: 1px solid #ced4da;
	}
	.trigger-word {
	background-color: #fff3cd;
	padding: 0 3px;
	border-radius: 3px;
	font-weight: bold;
	border-bottom: 1px solid #ffeeba;
	}

	/* Border Colors */
	.border-pos { border-left-color: #28a745 !important; }
	.border-neg { border-left-color: #dc3545 !important; }
	</style>
	""",
	unsafe_allow_html=True,
	)

	# ==========================================
	# 🧠 2. ASPECT DICTIONARY (Standard Structure)
	# ==========================================

	ASPECT_KEYWORDS = {
	"en": {
	"Audio Quality": [
	"audio",
	"sound",
	"bass",
	"treble",
	"voice",
	"music quality",
	"volume",
	"noise",
	"hifi",
	"dolby",
	],
	"Price & Premium": [
	"price",
	"cost",
	"premium",
	"subscription",
	"expensive",
	"cheap",
	"worth",
	"pay",
	"money",
	"billing",
	],
	"Ads (Iklan)": [
	"ads",
	"advertisement",
	"commercial",
	"interrupt",
	"sponsor",
	"unskippable",
	],
	"App Stability": [
	"crash",
	"bug",
	"error",
	"slow",
	"loading",
	"lag",
	"force close",
	"glitch",
	"stuck",
	"freeze",
	],
	"Content/Library": [
	"song",
	"playlist",
	"library",
	"genre",
	"podcast",
	"lyrics",
	"collection",
	"track",
	"album",
	],
	},
	"id": {
	"Audio Quality": [
	"suara",
	"audio",
	"bass",
	"bunyi",
	"kualitas",
	"jernih",
	"cempreng",
	"kresek",
	"volume",
	"vokal",
	"dolby",
	],
	"Price & Premium": [
	"harga",
	"bayar",
	"mahal",
	"murah",
	"premium",
	"langganan",
	"boros",
	"tagihan",
	"uang",
	"beli",
	"berbayar",
	],
	"Ads (Iklan)": ["iklan", "ads", "promosi", "tonton", "komersial", "ganggu"],
	"App Stability": [
	"crash",
	"bug",
	"error",
	"lemot",
	"keluar sendiri",
	"macet",
	"lag",
	"lelet",
	"berat",
	"rusak",
	"gagal",
	"force close",
	],
	"Content/Library": [
	"lagu",
	"musik",
	"playlist",
	"koleksi",
	"podcast",
	"lirik",
	"genre",
	"album",
	"artis",
	"katalog",
	],
	},
	}


	def get_aspects_detailed(text, lang="en"):
	"""
	Scans text for keywords.
	Returns list: [{'aspect': 'Audio', 'trigger': 'bass'}, ...]
	"""
	found_details = []
	text_lower = text.lower()
	keywords = ASPECT_KEYWORDS.get(lang, ASPECT_KEYWORDS["en"])

	for aspect_category, keyword_list in keywords.items():
	for key in keyword_list:
	# Word boundary check (\b) to avoid partial matches
	if re.search(r"\b" + re.escape(key) + r"\b", text_lower):
	found_details.append({"aspect": aspect_category, "trigger": key})

	return found_details


	# ==========================================
	# 🛠️ 3. MODEL LOADER
	# ==========================================


	@st.cache_resource
	def load_model_safe(lang_code):
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	if lang_code == "en":
	local_path = "Hamusssss12/spotify-absa-english"
	fallback_repo = "nlptown/bert-base-multilingual-uncased-sentiment"
	else:
	local_path = "Hamusssss12/spotify-absa-indonesian"
	fallback_repo = "indobenchmark/indobert-base-p1"

	try:
	# Try loading local fine-tuned model
	tokenizer = AutoTokenizer.from_pretrained(local_path)
	model = AutoModelForSequenceClassification.from_pretrained(local_path).to(
	device
	)
	msg = "Status: Using Local Fine-Tuned Model"
	is_custom = True
	except:
	# Fallback to download from HuggingFace
	tokenizer = AutoTokenizer.from_pretrained(fallback_repo)
	model = AutoModelForSequenceClassification.from_pretrained(fallback_repo).to(
	device
	)
	msg = f"Status: Using Generic Base Model ({fallback_repo})"
	is_custom = False

	return model, tokenizer, device, msg, is_custom


	def predict_sentiment(text, model, tokenizer, device):
	inputs = tokenizer(
	text, return_tensors="pt", truncation=True, padding=True, max_length=128
	).to(device)
	with torch.no_grad():
	logits = model(**inputs).logits

	# Binary Classification
	if logits.shape[1] == 2:
	probs = F.softmax(logits, dim=1).cpu().numpy()[0]
	score = probs[1] # Probability of Positive
	# Multiclass Fallback (1-5 stars)
	else:
	probs = F.softmax(logits, dim=1).cpu().numpy()[0]
	score = np.sum(probs * np.array([0, 0.25, 0.5, 0.75, 1.0]))

	return score


	# ==========================================
	# 🖥️ 4. APP UI
	# ==========================================


	def main():
	st.title("🎵 Spotify Review Inspector")
	st.markdown("Analyze reviews to identify sentiment and aspect triggers.")

	# --- SIDEBAR ---
	with st.sidebar:
	st.header("⚙️ Configuration")
	lang = st.selectbox("Select Language", ["Indonesian", "English"], index=0)
	lang_code = "id" if lang == "Indonesian" else "en"

	st.divider()

	# Load Model
	with st.spinner("Initializing AI Engine..."):
	model, tokenizer, device, msg, is_custom = load_model_safe(lang_code)

	if is_custom:
	st.success(msg)
	else:
	st.warning(msg)
	st.caption(
	"Tip: Ensure your `models` folder contains the extracted zip files for best results."
	)

	st.divider()

	# DEBUG SECTION
	with st.expander("📖 View Dictionary (Debug)"):
	st.write(f"Current Dictionary ({lang_code.upper()}):")
	st.json(ASPECT_KEYWORDS[lang_code])

	# --- MAIN INPUT ---
	default_text = (
	"Suaranya jernih banget enak didenger, tapi sayang harga premiumnya kemahalan buat pelajar."
	if lang_code == "id"
	else "The audio is crystal clear, but the premium price is too expensive."
	)

	user_input = st.text_area("Enter Review Text:", value=default_text, height=100)

	if st.button("Analyze Sentiment", type="primary"):
	st.markdown("### 📊 Analysis Results")

	# 1. Segmentation
	# Split logic: punctuation or contrast words (but, however, tapi, namun)
	if lang_code == "id":
	split_regex = r"[.!?;]\|\btapi\b\|\bnamun\b\|\bsedangkan\b"
	else:
	split_regex = r"[.!?;]\|\bbut\b\|\bhowever\b\|\bwhile\b"

	raw_segments = re.split(split_regex, user_input)
	segments = [s.strip() for s in raw_segments if s.strip()]
	if not segments:
	segments = [user_input]

	# 2. Process & Render
	for i, segment in enumerate(segments):
	# Predict
	score = predict_sentiment(segment, model, tokenizer, device)
	is_positive = score > 0.55

	# Formatting
	sentiment_label = "POSITIVE" if is_positive else "NEGATIVE"
	border_class = "border-pos" if is_positive else "border-neg"
	badge_class = "badge-pos" if is_positive else "badge-neg"

	# Find Aspect Triggers
	details = get_aspects_detailed(segment, lang_code)

	# --- RENDER CARD ---
	st.markdown(
	f"""
	<div class="segment-box {border_class}">
	<div style="display:flex; align-items:center; margin-bottom:8px;">
	<strong style="color:#888; margin-right:10px;">Segment {i+1}</strong>
	<span class="{badge_class}">
	{sentiment_label} ({score:.1%})
	</span>
	</div>
	<div class="segment-text">"{segment}"</div>
	""",
	unsafe_allow_html=True,
	)

	# --- RENDER TRIGGERS ---
	if details:
	cols = (
	st.columns(len(details)) if len(details) > 0 else [st.container()]
	)
	badges_html = ""
	for det in details:
	badges_html += f"""
	<div class="trigger-badge">
	<span>🏷️ {det['aspect']}</span>
	<span style="font-size:0.8em; color:#666; margin-left:5px;">
	(trigger: <span class="trigger-word">{det['trigger']}</span>)
	</span>
	</div>
	"""
	st.markdown(f"<div>{badges_html}</div>", unsafe_allow_html=True)
	else:
	st.markdown(
	"<small style='color:#999; font-style:italic;'>No specific aspect keywords detected (General Sentiment)</small>",
	unsafe_allow_html=True,
	)

	st.markdown("</div>", unsafe_allow_html=True)


	if __name__ == "__main__":
	main()