| """ |
| LaporIn AI Inference Service |
| Model: IndoBERT-p2 Multi-Task (Kategori + Sentimen) |
| Deploy: HuggingFace Spaces (Docker SDK) |
| """ |
| import os |
| import pickle |
| import torch |
| from torch import nn |
| from fastapi import FastAPI |
| from pydantic import BaseModel |
| from transformers import AutoTokenizer, AutoModel |
|
|
| |
| |
| |
| class IndoBERTMultiTask(nn.Module): |
| def __init__(self, num_kat, model_name="indobenchmark/indobert-base-p2"): |
| super().__init__() |
| self.indobert = AutoModel.from_pretrained(model_name) |
| hidden = self.indobert.config.hidden_size |
| self.dropout = nn.Dropout(0.3) |
| self.fc_kat = nn.Sequential( |
| nn.Linear(hidden, 256), |
| nn.GELU(), |
| nn.Dropout(0.1), |
| nn.Linear(256, num_kat) |
| ) |
| self.fc_sen = nn.Sequential( |
| nn.Linear(hidden, 64), |
| nn.GELU(), |
| nn.Dropout(0.1), |
| nn.Linear(64, 1) |
| ) |
|
|
| def forward(self, input_ids, attention_mask, token_type_ids): |
| outputs = self.indobert( |
| input_ids=input_ids, |
| attention_mask=attention_mask, |
| token_type_ids=token_type_ids |
| ) |
| cls = outputs.last_hidden_state[:, 0, :] |
| x = self.dropout(cls) |
| return self.fc_kat(x), self.fc_sen(x) |
|
|
|
|
| |
| |
| |
| print("π Loading tokenizer...") |
| tokenizer = AutoTokenizer.from_pretrained("./") |
|
|
| print("π Loading label encoders...") |
| with open("label_kat.pickle", "rb") as f: |
| label_kat_enc = pickle.load(f) |
| with open("label_sen.pickle", "rb") as f: |
| label_sen_enc = pickle.load(f) |
|
|
| num_kat = len(label_kat_enc.classes_) |
| num_sen = len(label_sen_enc.classes_) |
|
|
| print(f" Kategori ({num_kat}): {list(label_kat_enc.classes_)}") |
| print(f" Sentimen ({num_sen}): {list(label_sen_enc.classes_)}") |
|
|
| print("π Loading IndoBERT model...") |
| model = IndoBERTMultiTask(num_kat=num_kat) |
| model.load_state_dict( |
| torch.load("model_indobert_p2.pt", map_location="cpu") |
| ) |
| model.eval() |
| print("β
Model ready!") |
|
|
|
|
| |
| |
| |
| app = FastAPI( |
| title="LaporIn AI Service", |
| description="IndoBERT-p2 Multi-Task: Klasifikasi Kategori + Sentimen Laporan Warga", |
| version="1.0.0" |
| ) |
|
|
| MAX_LEN = 128 |
|
|
|
|
| class PredictRequest(BaseModel): |
| teks: str |
|
|
|
|
| class PredictResponse(BaseModel): |
| kategori: str |
| sentimen: str |
| confidence: float |
|
|
|
|
| @app.get("/") |
| def health(): |
| """Health check β dipakai untuk warm-up ping dari backend.""" |
| return { |
| "status": "ok", |
| "model": "IndoBERT-p2 Multi-Task", |
| "kategori_labels": list(label_kat_enc.classes_), |
| "sentimen_labels": list(label_sen_enc.classes_), |
| } |
|
|
|
|
| @app.post("/predict", response_model=PredictResponse) |
| def predict(req: PredictRequest): |
| """ |
| Prediksi kategori dan sentimen dari teks laporan. |
| |
| Input: {"teks": "Jalan rusak di depan sekolah sudah lama tidak diperbaiki"} |
| Output: {"kategori": "infrastruktur", "sentimen": "negatif", "confidence": 0.9423} |
| """ |
| |
| enc = tokenizer( |
| req.teks, |
| truncation=True, |
| padding="max_length", |
| max_length=MAX_LEN, |
| return_tensors="pt" |
| ) |
|
|
| input_ids = enc["input_ids"] |
| attention_mask = enc["attention_mask"] |
| token_type_ids = enc.get("token_type_ids", torch.zeros_like(input_ids)) |
|
|
| |
| with torch.no_grad(): |
| kat_logits, sen_logits = model(input_ids, attention_mask, token_type_ids) |
|
|
| |
| kat_probs = torch.softmax(kat_logits, dim=1) |
| kat_confidence, kat_idx = kat_probs.max(dim=1) |
| kategori = label_kat_enc.inverse_transform([kat_idx.item()])[0] |
|
|
| |
| sen_prob = torch.sigmoid(sen_logits).item() |
| |
| |
| sen_idx = 1 if sen_prob > 0.5 else 0 |
| sentimen = label_sen_enc.inverse_transform([sen_idx])[0] |
|
|
| return PredictResponse( |
| kategori=kategori, |
| sentimen=sentimen, |
| confidence=round(kat_confidence.item(), 4), |
| ) |
|
|