""" services/absa.py Aspect-Based Sentiment Analysis (ABSA) untuk Bahasa Indonesia. Pendekatan: 1. Ekstrak aspek dari teks menggunakan lexicon + dependency pattern 2. Tentukan sentimen per aspek menggunakan window context 3. Agregasi hasil per kategori aspek Kategori aspek yang didukung (domain-agnostic): - harga/biaya : harga, mahal, murah, biaya, tarif, ongkos - kualitas/produk : kualitas, bagus, jelek, rusak, bagus, produk - pelayanan/service : pelayanan, layanan, respon, lambat, cepat, ramah - lokasi/tempat : lokasi, tempat, jarak, strategis, jauh, dekat - kebijakan : kebijakan, aturan, regulasi, keputusan, program - pemimpin/tokoh : pemimpin, presiden, gubernur, menteri, pejabat - ekonomi : ekonomi, inflasi, harga, pendapatan, gaji, subsidi - pendidikan : pendidikan, sekolah, kampus, belajar, kurikulum - kesehatan : kesehatan, rumah sakit, dokter, obat, vaksin - infrastruktur : jalan, infrastruktur, gedung, fasilitas, listrik """ import re from collections import defaultdict from typing import Optional # ───────────────────────────────────────────── # ASPECT LEXICON # ───────────────────────────────────────────── ASPECT_LEXICON = { 'harga': [ 'harga','mahal','murah','biaya','tarif','ongkos','harganya', 'cost','price','bayar','bayaran','budget','anggaran','tagihan', 'cicilan','kredit','diskon','promo','gratis','terjangkau' ], 'kualitas': [ 'kualitas','bagus','jelek','buruk','rusak','cacat','produk', 'barang','mutu','kualiti','quality','performa','fitur','spesifikasi', 'durable','tahan lama','awet','rapuh','boros' ], 'pelayanan': [ 'pelayanan','layanan','servis','service','respon','respons','lambat', 'cepat','ramah','kasar','profesional','sopan','membantu','helpful', 'cs','customer service','admin','operator','staff','petugas' ], 'lokasi': [ 'lokasi','tempat','jarak','strategis','jauh','dekat','akses', 'parkir','alamat','wilayah','daerah','kawasan','lingkungan' ], 'kebijakan': [ 'kebijakan','aturan','regulasi','keputusan','program','peraturan', 'undang','hukum','sanksi','denda','izin','prosedur','birokrasi', 'pemerintah','pemerintahan','politik','implementasi' ], 'pemimpin': [ 'pemimpin','presiden','gubernur','menteri','pejabat','bupati', 'walikota','anggota','dewan','partai','calon','kandidat','tokoh', 'figur','kepala','direktur','ceo','pimpinan' ], 'ekonomi': [ 'ekonomi','inflasi','deflasi','pendapatan','gaji','upah','subsidi', 'pajak','ekspor','impor','investasi','pertumbuhan','resesi','utang', 'pinjaman','modal','bisnis','usaha','umkm' ], 'pendidikan': [ 'pendidikan','sekolah','kampus','belajar','kurikulum','guru','dosen', 'mahasiswa','siswa','nilai','ujian','beasiswa','biaya sekolah', 'spp','kuliah','universitas','sd','smp','sma' ], 'kesehatan': [ 'kesehatan','rumah sakit','dokter','obat','vaksin','rs','puskesmas', 'bpjs','asuransi','rawat','operasi','penyakit','covid','virus', 'faskes','apotek','tenaga medis','perawat' ], 'infrastruktur': [ 'jalan','infrastruktur','gedung','fasilitas','listrik','air','banjir', 'macet','transportasi','tol','jembatan','bandar udara','pelabuhan', 'internet','sinyal','jaringan','konstruksi' ], } # ───────────────────────────────────────────── # SENTIMENT LEXICON PER ASPECT # ───────────────────────────────────────────── SENTIMENT_POS = { 'bagus','baik','bagus','mantap','keren','hebat','suka','senang','puas', 'meningkat','naik','maju','berkembang','berhasil','sukses','bagus', 'terjangkau','murah','gratis','ramah','cepat','tepat','profesional', 'strategis','dekat','mudah','lancar','aman','nyaman','bersih', 'good','great','nice','excellent','best','amazing','happy','love', 'wonderful','perfect','outstanding','satisfied','recommended', 'mendukung','setuju','approve','pro','positif','memuji','bangga', } SENTIMENT_NEG = { 'buruk','jelek','rusak','parah','kecewa','mahal','lambat','lama', 'susah','sulit','ribet','boros','kasar','curang','korup','gagal', 'turun','menurun','anjlok','jatuh','krisis','masalah','bermasalah', 'berbahaya','bahaya','mengecewakan','tidak puas','kapok', 'bad','worst','terrible','awful','poor','horrible','hate','dislike', 'expensive','slow','failed','disappointed','useless','waste', 'menolak','menentang','against','kontra','negatif','mencela','kritik', 'bohong','tipu','menipu','korupsi','tidak setuju', } NEGATION_WORDS = { 'tidak','bukan','belum','tak','gak','ga','nggak','ngga','jangan', 'no','not','never','dont',"don't",'without','tanpa', } INTENSIFIER_POS = {'sangat','banget','sekali','amat','luar biasa','super','paling','bgt'} INTENSIFIER_NEG = {'kurang','agak','sedikit','hampir','nyaris'} def _get_aspect(token: str) -> Optional[str]: """Cari aspek untuk satu token.""" token = token.lower() for aspect, keywords in ASPECT_LEXICON.items(): if token in keywords or any(kw in token for kw in keywords if len(kw) > 4): return aspect return None def _sentiment_score_window(tokens: list, center_idx: int, window: int = 4) -> float: """ Hitung skor sentimen dalam window ±N kata dari posisi aspek. Pertimbangkan negasi dan intensifier. Return: float positif = positif, negatif = negatif, 0 = netral """ start = max(0, center_idx - window) end = min(len(tokens), center_idx + window + 1) window_tokens = tokens[start:end] score = 0.0 negated = False intensify = 1.0 for i, tok in enumerate(window_tokens): tl = tok.lower() if tl in NEGATION_WORDS: negated = True continue if tl in INTENSIFIER_POS: intensify = 1.5 continue if tl in INTENSIFIER_NEG: intensify = 0.6 continue if tl in SENTIMENT_POS: s = 1.0 * intensify score += -s if negated else s negated = False intensify = 1.0 elif tl in SENTIMENT_NEG: s = -1.0 * intensify score += -s if negated else s negated = False intensify = 1.0 return score def _score_to_label(score: float) -> str: if score > 0.3: return "Positive" if score < -0.3: return "Negative" return "Neutral" def extract_aspects(text: str) -> list[dict]: """ Ekstrak aspek dan sentimen dari satu teks. Return: list of {aspect, sentiment, score, mention, context} """ if not text or len(text.strip()) < 5: return [] # Tokenisasi sederhana clean = re.sub(r'[^\w\s]', ' ', text.lower()) tokens = clean.split() results = [] seen_aspects = set() for i, token in enumerate(tokens): aspect = _get_aspect(token) if aspect is None: continue # Hindari duplikat aspek dalam satu kalimat if aspect in seen_aspects: continue seen_aspects.add(aspect) score = _sentiment_score_window(tokens, i) label = _score_to_label(score) # Context window untuk display start = max(0, i - 3) end = min(len(tokens), i + 4) context = ' '.join(tokens[start:end]) results.append({ 'aspect': aspect, 'sentiment': label, 'score': round(score, 3), 'mention': token, 'context': context, }) return results def analyze_absa(texts: list[str]) -> dict: """ Jalankan ABSA pada list teks. Return: { 'per_text': list of per-text results, 'aggregate': {aspect: {Positive: N, Negative: N, Neutral: N, dominant: str}}, 'top_aspects': sorted list of most-mentioned aspects, 'aspect_sentiment_map': {aspect: dominant_sentiment} } """ per_text = [] aggregate = defaultdict(lambda: {'Positive': 0, 'Negative': 0, 'Neutral': 0, 'total': 0}) for text in texts[:80]: # batasi untuk performa aspects = extract_aspects(text) per_text.append({'text': text[:100], 'aspects': aspects}) for a in aspects: aggregate[a['aspect']][a['sentiment']] += 1 aggregate[a['aspect']]['total'] += 1 # Kalkulasi dominan per aspek agg_result = {} for aspect, counts in aggregate.items(): t = counts['total'] or 1 dominant = max( ['Positive', 'Negative', 'Neutral'], key=lambda s: counts[s] ) agg_result[aspect] = { 'Positive': counts['Positive'], 'Negative': counts['Negative'], 'Neutral': counts['Neutral'], 'total': counts['total'], 'pos_pct': round(counts['Positive'] / t * 100, 1), 'neg_pct': round(counts['Negative'] / t * 100, 1), 'neu_pct': round(counts['Neutral'] / t * 100, 1), 'dominant': dominant, } # Sort by total mentions top_aspects = sorted( agg_result.items(), key=lambda x: x[1]['total'], reverse=True ) aspect_sentiment_map = { asp: data['dominant'] for asp, data in top_aspects } return { 'per_text': per_text[:20], # kirim sample ke frontend 'aggregate': agg_result, 'top_aspects': [{'aspect': a, **d} for a, d in top_aspects[:8]], 'aspect_sentiment_map': aspect_sentiment_map, 'total_texts_analyzed': len(texts), 'aspects_found': len(agg_result), }