LaporIn / app.py
Dino-bit00
Deploy LaporIn AI model
f409d00
"""
LaporIn AI Inference Service
Model: IndoBERT-p2 Multi-Task (Kategori + Sentimen)
Deploy: HuggingFace Spaces (Docker SDK)
"""
import os
import pickle
import torch
from torch import nn
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModel
# =========================================================
# ARSITEKTUR MODEL (harus sama persis dengan training code)
# =========================================================
class IndoBERTMultiTask(nn.Module):
def __init__(self, num_kat, model_name="indobenchmark/indobert-base-p2"):
super().__init__()
self.indobert = AutoModel.from_pretrained(model_name)
hidden = self.indobert.config.hidden_size # 768
self.dropout = nn.Dropout(0.3)
self.fc_kat = nn.Sequential(
nn.Linear(hidden, 256),
nn.GELU(),
nn.Dropout(0.1),
nn.Linear(256, num_kat)
)
self.fc_sen = nn.Sequential(
nn.Linear(hidden, 64),
nn.GELU(),
nn.Dropout(0.1),
nn.Linear(64, 1)
)
def forward(self, input_ids, attention_mask, token_type_ids):
outputs = self.indobert(
input_ids=input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids
)
cls = outputs.last_hidden_state[:, 0, :]
x = self.dropout(cls)
return self.fc_kat(x), self.fc_sen(x)
# =========================================================
# LOAD MODEL & TOKENIZER (1x saat startup)
# =========================================================
print("πŸ”„ Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained("./")
print("πŸ”„ Loading label encoders...")
with open("label_kat.pickle", "rb") as f:
label_kat_enc = pickle.load(f)
with open("label_sen.pickle", "rb") as f:
label_sen_enc = pickle.load(f)
num_kat = len(label_kat_enc.classes_)
num_sen = len(label_sen_enc.classes_)
print(f" Kategori ({num_kat}): {list(label_kat_enc.classes_)}")
print(f" Sentimen ({num_sen}): {list(label_sen_enc.classes_)}")
print("πŸ”„ Loading IndoBERT model...")
model = IndoBERTMultiTask(num_kat=num_kat)
model.load_state_dict(
torch.load("model_indobert_p2.pt", map_location="cpu")
)
model.eval()
print("βœ… Model ready!")
# =========================================================
# FASTAPI APP
# =========================================================
app = FastAPI(
title="LaporIn AI Service",
description="IndoBERT-p2 Multi-Task: Klasifikasi Kategori + Sentimen Laporan Warga",
version="1.0.0"
)
MAX_LEN = 128
class PredictRequest(BaseModel):
teks: str
class PredictResponse(BaseModel):
kategori: str
sentimen: str
confidence: float
@app.get("/")
def health():
"""Health check β€” dipakai untuk warm-up ping dari backend."""
return {
"status": "ok",
"model": "IndoBERT-p2 Multi-Task",
"kategori_labels": list(label_kat_enc.classes_),
"sentimen_labels": list(label_sen_enc.classes_),
}
@app.post("/predict", response_model=PredictResponse)
def predict(req: PredictRequest):
"""
Prediksi kategori dan sentimen dari teks laporan.
Input: {"teks": "Jalan rusak di depan sekolah sudah lama tidak diperbaiki"}
Output: {"kategori": "infrastruktur", "sentimen": "negatif", "confidence": 0.9423}
"""
# 1. Tokenize (sama persis dengan training)
enc = tokenizer(
req.teks,
truncation=True,
padding="max_length",
max_length=MAX_LEN,
return_tensors="pt"
)
input_ids = enc["input_ids"]
attention_mask = enc["attention_mask"]
token_type_ids = enc.get("token_type_ids", torch.zeros_like(input_ids))
# 2. Inference
with torch.no_grad():
kat_logits, sen_logits = model(input_ids, attention_mask, token_type_ids)
# 3. Parse kategori (multi-class β†’ argmax + softmax confidence)
kat_probs = torch.softmax(kat_logits, dim=1)
kat_confidence, kat_idx = kat_probs.max(dim=1)
kategori = label_kat_enc.inverse_transform([kat_idx.item()])[0]
# 4. Parse sentimen (binary β†’ sigmoid > 0.5)
sen_prob = torch.sigmoid(sen_logits).item()
# LabelEncoder: classes_ diurutkan alfabet β†’ [negatif=0, positif=1]
# sigmoid > 0.5 β†’ index 1 (positif), else index 0 (negatif)
sen_idx = 1 if sen_prob > 0.5 else 0
sentimen = label_sen_enc.inverse_transform([sen_idx])[0]
return PredictResponse(
kategori=kategori,
sentimen=sentimen,
confidence=round(kat_confidence.item(), 4),
)