Spaces:
Sleeping
Sleeping
| """ | |
| services/absa.py | |
| Aspect-Based Sentiment Analysis (ABSA) untuk Bahasa Indonesia. | |
| Pendekatan: | |
| 1. Ekstrak aspek dari teks menggunakan lexicon + dependency pattern | |
| 2. Tentukan sentimen per aspek menggunakan window context | |
| 3. Agregasi hasil per kategori aspek | |
| Kategori aspek yang didukung (domain-agnostic): | |
| - harga/biaya : harga, mahal, murah, biaya, tarif, ongkos | |
| - kualitas/produk : kualitas, bagus, jelek, rusak, bagus, produk | |
| - pelayanan/service : pelayanan, layanan, respon, lambat, cepat, ramah | |
| - lokasi/tempat : lokasi, tempat, jarak, strategis, jauh, dekat | |
| - kebijakan : kebijakan, aturan, regulasi, keputusan, program | |
| - pemimpin/tokoh : pemimpin, presiden, gubernur, menteri, pejabat | |
| - ekonomi : ekonomi, inflasi, harga, pendapatan, gaji, subsidi | |
| - pendidikan : pendidikan, sekolah, kampus, belajar, kurikulum | |
| - kesehatan : kesehatan, rumah sakit, dokter, obat, vaksin | |
| - infrastruktur : jalan, infrastruktur, gedung, fasilitas, listrik | |
| """ | |
| import re | |
| from collections import defaultdict | |
| from typing import Optional | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # ASPECT LEXICON | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| ASPECT_LEXICON = { | |
| 'harga': [ | |
| 'harga','mahal','murah','biaya','tarif','ongkos','harganya', | |
| 'cost','price','bayar','bayaran','budget','anggaran','tagihan', | |
| 'cicilan','kredit','diskon','promo','gratis','terjangkau' | |
| ], | |
| 'kualitas': [ | |
| 'kualitas','bagus','jelek','buruk','rusak','cacat','produk', | |
| 'barang','mutu','kualiti','quality','performa','fitur','spesifikasi', | |
| 'durable','tahan lama','awet','rapuh','boros' | |
| ], | |
| 'pelayanan': [ | |
| 'pelayanan','layanan','servis','service','respon','respons','lambat', | |
| 'cepat','ramah','kasar','profesional','sopan','membantu','helpful', | |
| 'cs','customer service','admin','operator','staff','petugas' | |
| ], | |
| 'lokasi': [ | |
| 'lokasi','tempat','jarak','strategis','jauh','dekat','akses', | |
| 'parkir','alamat','wilayah','daerah','kawasan','lingkungan' | |
| ], | |
| 'kebijakan': [ | |
| 'kebijakan','aturan','regulasi','keputusan','program','peraturan', | |
| 'undang','hukum','sanksi','denda','izin','prosedur','birokrasi', | |
| 'pemerintah','pemerintahan','politik','implementasi' | |
| ], | |
| 'pemimpin': [ | |
| 'pemimpin','presiden','gubernur','menteri','pejabat','bupati', | |
| 'walikota','anggota','dewan','partai','calon','kandidat','tokoh', | |
| 'figur','kepala','direktur','ceo','pimpinan' | |
| ], | |
| 'ekonomi': [ | |
| 'ekonomi','inflasi','deflasi','pendapatan','gaji','upah','subsidi', | |
| 'pajak','ekspor','impor','investasi','pertumbuhan','resesi','utang', | |
| 'pinjaman','modal','bisnis','usaha','umkm' | |
| ], | |
| 'pendidikan': [ | |
| 'pendidikan','sekolah','kampus','belajar','kurikulum','guru','dosen', | |
| 'mahasiswa','siswa','nilai','ujian','beasiswa','biaya sekolah', | |
| 'spp','kuliah','universitas','sd','smp','sma' | |
| ], | |
| 'kesehatan': [ | |
| 'kesehatan','rumah sakit','dokter','obat','vaksin','rs','puskesmas', | |
| 'bpjs','asuransi','rawat','operasi','penyakit','covid','virus', | |
| 'faskes','apotek','tenaga medis','perawat' | |
| ], | |
| 'infrastruktur': [ | |
| 'jalan','infrastruktur','gedung','fasilitas','listrik','air','banjir', | |
| 'macet','transportasi','tol','jembatan','bandar udara','pelabuhan', | |
| 'internet','sinyal','jaringan','konstruksi' | |
| ], | |
| } | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # SENTIMENT LEXICON PER ASPECT | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| SENTIMENT_POS = { | |
| 'bagus','baik','bagus','mantap','keren','hebat','suka','senang','puas', | |
| 'meningkat','naik','maju','berkembang','berhasil','sukses','bagus', | |
| 'terjangkau','murah','gratis','ramah','cepat','tepat','profesional', | |
| 'strategis','dekat','mudah','lancar','aman','nyaman','bersih', | |
| 'good','great','nice','excellent','best','amazing','happy','love', | |
| 'wonderful','perfect','outstanding','satisfied','recommended', | |
| 'mendukung','setuju','approve','pro','positif','memuji','bangga', | |
| } | |
| SENTIMENT_NEG = { | |
| 'buruk','jelek','rusak','parah','kecewa','mahal','lambat','lama', | |
| 'susah','sulit','ribet','boros','kasar','curang','korup','gagal', | |
| 'turun','menurun','anjlok','jatuh','krisis','masalah','bermasalah', | |
| 'berbahaya','bahaya','mengecewakan','tidak puas','kapok', | |
| 'bad','worst','terrible','awful','poor','horrible','hate','dislike', | |
| 'expensive','slow','failed','disappointed','useless','waste', | |
| 'menolak','menentang','against','kontra','negatif','mencela','kritik', | |
| 'bohong','tipu','menipu','korupsi','tidak setuju', | |
| } | |
| NEGATION_WORDS = { | |
| 'tidak','bukan','belum','tak','gak','ga','nggak','ngga','jangan', | |
| 'no','not','never','dont',"don't",'without','tanpa', | |
| } | |
| INTENSIFIER_POS = {'sangat','banget','sekali','amat','luar biasa','super','paling','bgt'} | |
| INTENSIFIER_NEG = {'kurang','agak','sedikit','hampir','nyaris'} | |
| def _get_aspect(token: str) -> Optional[str]: | |
| """Cari aspek untuk satu token.""" | |
| token = token.lower() | |
| for aspect, keywords in ASPECT_LEXICON.items(): | |
| if token in keywords or any(kw in token for kw in keywords if len(kw) > 4): | |
| return aspect | |
| return None | |
| def _sentiment_score_window(tokens: list, center_idx: int, window: int = 4) -> float: | |
| """ | |
| Hitung skor sentimen dalam window Β±N kata dari posisi aspek. | |
| Pertimbangkan negasi dan intensifier. | |
| Return: float positif = positif, negatif = negatif, 0 = netral | |
| """ | |
| start = max(0, center_idx - window) | |
| end = min(len(tokens), center_idx + window + 1) | |
| window_tokens = tokens[start:end] | |
| score = 0.0 | |
| negated = False | |
| intensify = 1.0 | |
| for i, tok in enumerate(window_tokens): | |
| tl = tok.lower() | |
| if tl in NEGATION_WORDS: | |
| negated = True | |
| continue | |
| if tl in INTENSIFIER_POS: | |
| intensify = 1.5 | |
| continue | |
| if tl in INTENSIFIER_NEG: | |
| intensify = 0.6 | |
| continue | |
| if tl in SENTIMENT_POS: | |
| s = 1.0 * intensify | |
| score += -s if negated else s | |
| negated = False | |
| intensify = 1.0 | |
| elif tl in SENTIMENT_NEG: | |
| s = -1.0 * intensify | |
| score += -s if negated else s | |
| negated = False | |
| intensify = 1.0 | |
| return score | |
| def _score_to_label(score: float) -> str: | |
| if score > 0.3: return "Positive" | |
| if score < -0.3: return "Negative" | |
| return "Neutral" | |
| def extract_aspects(text: str) -> list[dict]: | |
| """ | |
| Ekstrak aspek dan sentimen dari satu teks. | |
| Return: list of {aspect, sentiment, score, mention, context} | |
| """ | |
| if not text or len(text.strip()) < 5: | |
| return [] | |
| # Tokenisasi sederhana | |
| clean = re.sub(r'[^\w\s]', ' ', text.lower()) | |
| tokens = clean.split() | |
| results = [] | |
| seen_aspects = set() | |
| for i, token in enumerate(tokens): | |
| aspect = _get_aspect(token) | |
| if aspect is None: | |
| continue | |
| # Hindari duplikat aspek dalam satu kalimat | |
| if aspect in seen_aspects: | |
| continue | |
| seen_aspects.add(aspect) | |
| score = _sentiment_score_window(tokens, i) | |
| label = _score_to_label(score) | |
| # Context window untuk display | |
| start = max(0, i - 3) | |
| end = min(len(tokens), i + 4) | |
| context = ' '.join(tokens[start:end]) | |
| results.append({ | |
| 'aspect': aspect, | |
| 'sentiment': label, | |
| 'score': round(score, 3), | |
| 'mention': token, | |
| 'context': context, | |
| }) | |
| return results | |
| def analyze_absa(texts: list[str]) -> dict: | |
| """ | |
| Jalankan ABSA pada list teks. | |
| Return: | |
| { | |
| 'per_text': list of per-text results, | |
| 'aggregate': {aspect: {Positive: N, Negative: N, Neutral: N, dominant: str}}, | |
| 'top_aspects': sorted list of most-mentioned aspects, | |
| 'aspect_sentiment_map': {aspect: dominant_sentiment} | |
| } | |
| """ | |
| per_text = [] | |
| aggregate = defaultdict(lambda: {'Positive': 0, 'Negative': 0, 'Neutral': 0, 'total': 0}) | |
| for text in texts[:80]: # batasi untuk performa | |
| aspects = extract_aspects(text) | |
| per_text.append({'text': text[:100], 'aspects': aspects}) | |
| for a in aspects: | |
| aggregate[a['aspect']][a['sentiment']] += 1 | |
| aggregate[a['aspect']]['total'] += 1 | |
| # Kalkulasi dominan per aspek | |
| agg_result = {} | |
| for aspect, counts in aggregate.items(): | |
| t = counts['total'] or 1 | |
| dominant = max( | |
| ['Positive', 'Negative', 'Neutral'], | |
| key=lambda s: counts[s] | |
| ) | |
| agg_result[aspect] = { | |
| 'Positive': counts['Positive'], | |
| 'Negative': counts['Negative'], | |
| 'Neutral': counts['Neutral'], | |
| 'total': counts['total'], | |
| 'pos_pct': round(counts['Positive'] / t * 100, 1), | |
| 'neg_pct': round(counts['Negative'] / t * 100, 1), | |
| 'neu_pct': round(counts['Neutral'] / t * 100, 1), | |
| 'dominant': dominant, | |
| } | |
| # Sort by total mentions | |
| top_aspects = sorted( | |
| agg_result.items(), | |
| key=lambda x: x[1]['total'], | |
| reverse=True | |
| ) | |
| aspect_sentiment_map = { | |
| asp: data['dominant'] | |
| for asp, data in top_aspects | |
| } | |
| return { | |
| 'per_text': per_text[:20], # kirim sample ke frontend | |
| 'aggregate': agg_result, | |
| 'top_aspects': [{'aspect': a, **d} for a, d in top_aspects[:8]], | |
| 'aspect_sentiment_map': aspect_sentiment_map, | |
| 'total_texts_analyzed': len(texts), | |
| 'aspects_found': len(agg_result), | |
| } |