noranisa's picture
Create services/absa.py
23ac9a8 verified
"""
services/absa.py
Aspect-Based Sentiment Analysis (ABSA) untuk Bahasa Indonesia.
Pendekatan:
1. Ekstrak aspek dari teks menggunakan lexicon + dependency pattern
2. Tentukan sentimen per aspek menggunakan window context
3. Agregasi hasil per kategori aspek
Kategori aspek yang didukung (domain-agnostic):
- harga/biaya : harga, mahal, murah, biaya, tarif, ongkos
- kualitas/produk : kualitas, bagus, jelek, rusak, bagus, produk
- pelayanan/service : pelayanan, layanan, respon, lambat, cepat, ramah
- lokasi/tempat : lokasi, tempat, jarak, strategis, jauh, dekat
- kebijakan : kebijakan, aturan, regulasi, keputusan, program
- pemimpin/tokoh : pemimpin, presiden, gubernur, menteri, pejabat
- ekonomi : ekonomi, inflasi, harga, pendapatan, gaji, subsidi
- pendidikan : pendidikan, sekolah, kampus, belajar, kurikulum
- kesehatan : kesehatan, rumah sakit, dokter, obat, vaksin
- infrastruktur : jalan, infrastruktur, gedung, fasilitas, listrik
"""
import re
from collections import defaultdict
from typing import Optional
# ─────────────────────────────────────────────
# ASPECT LEXICON
# ─────────────────────────────────────────────
ASPECT_LEXICON = {
'harga': [
'harga','mahal','murah','biaya','tarif','ongkos','harganya',
'cost','price','bayar','bayaran','budget','anggaran','tagihan',
'cicilan','kredit','diskon','promo','gratis','terjangkau'
],
'kualitas': [
'kualitas','bagus','jelek','buruk','rusak','cacat','produk',
'barang','mutu','kualiti','quality','performa','fitur','spesifikasi',
'durable','tahan lama','awet','rapuh','boros'
],
'pelayanan': [
'pelayanan','layanan','servis','service','respon','respons','lambat',
'cepat','ramah','kasar','profesional','sopan','membantu','helpful',
'cs','customer service','admin','operator','staff','petugas'
],
'lokasi': [
'lokasi','tempat','jarak','strategis','jauh','dekat','akses',
'parkir','alamat','wilayah','daerah','kawasan','lingkungan'
],
'kebijakan': [
'kebijakan','aturan','regulasi','keputusan','program','peraturan',
'undang','hukum','sanksi','denda','izin','prosedur','birokrasi',
'pemerintah','pemerintahan','politik','implementasi'
],
'pemimpin': [
'pemimpin','presiden','gubernur','menteri','pejabat','bupati',
'walikota','anggota','dewan','partai','calon','kandidat','tokoh',
'figur','kepala','direktur','ceo','pimpinan'
],
'ekonomi': [
'ekonomi','inflasi','deflasi','pendapatan','gaji','upah','subsidi',
'pajak','ekspor','impor','investasi','pertumbuhan','resesi','utang',
'pinjaman','modal','bisnis','usaha','umkm'
],
'pendidikan': [
'pendidikan','sekolah','kampus','belajar','kurikulum','guru','dosen',
'mahasiswa','siswa','nilai','ujian','beasiswa','biaya sekolah',
'spp','kuliah','universitas','sd','smp','sma'
],
'kesehatan': [
'kesehatan','rumah sakit','dokter','obat','vaksin','rs','puskesmas',
'bpjs','asuransi','rawat','operasi','penyakit','covid','virus',
'faskes','apotek','tenaga medis','perawat'
],
'infrastruktur': [
'jalan','infrastruktur','gedung','fasilitas','listrik','air','banjir',
'macet','transportasi','tol','jembatan','bandar udara','pelabuhan',
'internet','sinyal','jaringan','konstruksi'
],
}
# ─────────────────────────────────────────────
# SENTIMENT LEXICON PER ASPECT
# ─────────────────────────────────────────────
SENTIMENT_POS = {
'bagus','baik','bagus','mantap','keren','hebat','suka','senang','puas',
'meningkat','naik','maju','berkembang','berhasil','sukses','bagus',
'terjangkau','murah','gratis','ramah','cepat','tepat','profesional',
'strategis','dekat','mudah','lancar','aman','nyaman','bersih',
'good','great','nice','excellent','best','amazing','happy','love',
'wonderful','perfect','outstanding','satisfied','recommended',
'mendukung','setuju','approve','pro','positif','memuji','bangga',
}
SENTIMENT_NEG = {
'buruk','jelek','rusak','parah','kecewa','mahal','lambat','lama',
'susah','sulit','ribet','boros','kasar','curang','korup','gagal',
'turun','menurun','anjlok','jatuh','krisis','masalah','bermasalah',
'berbahaya','bahaya','mengecewakan','tidak puas','kapok',
'bad','worst','terrible','awful','poor','horrible','hate','dislike',
'expensive','slow','failed','disappointed','useless','waste',
'menolak','menentang','against','kontra','negatif','mencela','kritik',
'bohong','tipu','menipu','korupsi','tidak setuju',
}
NEGATION_WORDS = {
'tidak','bukan','belum','tak','gak','ga','nggak','ngga','jangan',
'no','not','never','dont',"don't",'without','tanpa',
}
INTENSIFIER_POS = {'sangat','banget','sekali','amat','luar biasa','super','paling','bgt'}
INTENSIFIER_NEG = {'kurang','agak','sedikit','hampir','nyaris'}
def _get_aspect(token: str) -> Optional[str]:
"""Cari aspek untuk satu token."""
token = token.lower()
for aspect, keywords in ASPECT_LEXICON.items():
if token in keywords or any(kw in token for kw in keywords if len(kw) > 4):
return aspect
return None
def _sentiment_score_window(tokens: list, center_idx: int, window: int = 4) -> float:
"""
Hitung skor sentimen dalam window Β±N kata dari posisi aspek.
Pertimbangkan negasi dan intensifier.
Return: float positif = positif, negatif = negatif, 0 = netral
"""
start = max(0, center_idx - window)
end = min(len(tokens), center_idx + window + 1)
window_tokens = tokens[start:end]
score = 0.0
negated = False
intensify = 1.0
for i, tok in enumerate(window_tokens):
tl = tok.lower()
if tl in NEGATION_WORDS:
negated = True
continue
if tl in INTENSIFIER_POS:
intensify = 1.5
continue
if tl in INTENSIFIER_NEG:
intensify = 0.6
continue
if tl in SENTIMENT_POS:
s = 1.0 * intensify
score += -s if negated else s
negated = False
intensify = 1.0
elif tl in SENTIMENT_NEG:
s = -1.0 * intensify
score += -s if negated else s
negated = False
intensify = 1.0
return score
def _score_to_label(score: float) -> str:
if score > 0.3: return "Positive"
if score < -0.3: return "Negative"
return "Neutral"
def extract_aspects(text: str) -> list[dict]:
"""
Ekstrak aspek dan sentimen dari satu teks.
Return: list of {aspect, sentiment, score, mention, context}
"""
if not text or len(text.strip()) < 5:
return []
# Tokenisasi sederhana
clean = re.sub(r'[^\w\s]', ' ', text.lower())
tokens = clean.split()
results = []
seen_aspects = set()
for i, token in enumerate(tokens):
aspect = _get_aspect(token)
if aspect is None:
continue
# Hindari duplikat aspek dalam satu kalimat
if aspect in seen_aspects:
continue
seen_aspects.add(aspect)
score = _sentiment_score_window(tokens, i)
label = _score_to_label(score)
# Context window untuk display
start = max(0, i - 3)
end = min(len(tokens), i + 4)
context = ' '.join(tokens[start:end])
results.append({
'aspect': aspect,
'sentiment': label,
'score': round(score, 3),
'mention': token,
'context': context,
})
return results
def analyze_absa(texts: list[str]) -> dict:
"""
Jalankan ABSA pada list teks.
Return:
{
'per_text': list of per-text results,
'aggregate': {aspect: {Positive: N, Negative: N, Neutral: N, dominant: str}},
'top_aspects': sorted list of most-mentioned aspects,
'aspect_sentiment_map': {aspect: dominant_sentiment}
}
"""
per_text = []
aggregate = defaultdict(lambda: {'Positive': 0, 'Negative': 0, 'Neutral': 0, 'total': 0})
for text in texts[:80]: # batasi untuk performa
aspects = extract_aspects(text)
per_text.append({'text': text[:100], 'aspects': aspects})
for a in aspects:
aggregate[a['aspect']][a['sentiment']] += 1
aggregate[a['aspect']]['total'] += 1
# Kalkulasi dominan per aspek
agg_result = {}
for aspect, counts in aggregate.items():
t = counts['total'] or 1
dominant = max(
['Positive', 'Negative', 'Neutral'],
key=lambda s: counts[s]
)
agg_result[aspect] = {
'Positive': counts['Positive'],
'Negative': counts['Negative'],
'Neutral': counts['Neutral'],
'total': counts['total'],
'pos_pct': round(counts['Positive'] / t * 100, 1),
'neg_pct': round(counts['Negative'] / t * 100, 1),
'neu_pct': round(counts['Neutral'] / t * 100, 1),
'dominant': dominant,
}
# Sort by total mentions
top_aspects = sorted(
agg_result.items(),
key=lambda x: x[1]['total'],
reverse=True
)
aspect_sentiment_map = {
asp: data['dominant']
for asp, data in top_aspects
}
return {
'per_text': per_text[:20], # kirim sample ke frontend
'aggregate': agg_result,
'top_aspects': [{'aspect': a, **d} for a, d in top_aspects[:8]],
'aspect_sentiment_map': aspect_sentiment_map,
'total_texts_analyzed': len(texts),
'aspects_found': len(agg_result),
}