"""
LaporIn AI Inference Service
Model: IndoBERT-p2 Multi-Task (Kategori + Sentimen)
Deploy: HuggingFace Spaces (Docker SDK)
"""
import os
import pickle
import torch
from torch import nn
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModel

# =========================================================
# ARSITEKTUR MODEL (harus sama persis dengan training code)
# =========================================================
class IndoBERTMultiTask(nn.Module):
    def __init__(self, num_kat, model_name="indobenchmark/indobert-base-p2"):
        super().__init__()
        self.indobert = AutoModel.from_pretrained(model_name)
        hidden = self.indobert.config.hidden_size  # 768
        self.dropout = nn.Dropout(0.3)
        self.fc_kat = nn.Sequential(
            nn.Linear(hidden, 256),
            nn.GELU(),
            nn.Dropout(0.1),
            nn.Linear(256, num_kat)
        )
        self.fc_sen = nn.Sequential(
            nn.Linear(hidden, 64),
            nn.GELU(),
            nn.Dropout(0.1),
            nn.Linear(64, 1)
        )

    def forward(self, input_ids, attention_mask, token_type_ids):
        outputs = self.indobert(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids
        )
        cls = outputs.last_hidden_state[:, 0, :]
        x = self.dropout(cls)
        return self.fc_kat(x), self.fc_sen(x)


# =========================================================
# LOAD MODEL & TOKENIZER (1x saat startup)
# =========================================================
print("🔄 Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained("./")

print("🔄 Loading label encoders...")
with open("label_kat.pickle", "rb") as f:
    label_kat_enc = pickle.load(f)
with open("label_sen.pickle", "rb") as f:
    label_sen_enc = pickle.load(f)

num_kat = len(label_kat_enc.classes_)
num_sen = len(label_sen_enc.classes_)

print(f"   Kategori ({num_kat}): {list(label_kat_enc.classes_)}")
print(f"   Sentimen ({num_sen}): {list(label_sen_enc.classes_)}")

print("🔄 Loading IndoBERT model...")
model = IndoBERTMultiTask(num_kat=num_kat)
model.load_state_dict(
    torch.load("model_indobert_p2.pt", map_location="cpu")
)
model.eval()
print("✅ Model ready!")


# =========================================================
# FASTAPI APP
# =========================================================
app = FastAPI(
    title="LaporIn AI Service",
    description="IndoBERT-p2 Multi-Task: Klasifikasi Kategori + Sentimen Laporan Warga",
    version="1.0.0"
)

MAX_LEN = 128


class PredictRequest(BaseModel):
    teks: str


class PredictResponse(BaseModel):
    kategori: str
    sentimen: str
    confidence: float


@app.get("/")
def health():
    """Health check — dipakai untuk warm-up ping dari backend."""
    return {
        "status": "ok",
        "model": "IndoBERT-p2 Multi-Task",
        "kategori_labels": list(label_kat_enc.classes_),
        "sentimen_labels": list(label_sen_enc.classes_),
    }


@app.post("/predict", response_model=PredictResponse)
def predict(req: PredictRequest):
    """
    Prediksi kategori dan sentimen dari teks laporan.

    Input:  {"teks": "Jalan rusak di depan sekolah sudah lama tidak diperbaiki"}
    Output: {"kategori": "infrastruktur", "sentimen": "negatif", "confidence": 0.9423}
    """
    # 1. Tokenize (sama persis dengan training)
    enc = tokenizer(
        req.teks,
        truncation=True,
        padding="max_length",
        max_length=MAX_LEN,
        return_tensors="pt"
    )

    input_ids = enc["input_ids"]
    attention_mask = enc["attention_mask"]
    token_type_ids = enc.get("token_type_ids", torch.zeros_like(input_ids))

    # 2. Inference
    with torch.no_grad():
        kat_logits, sen_logits = model(input_ids, attention_mask, token_type_ids)

    # 3. Parse kategori (multi-class → argmax + softmax confidence)
    kat_probs = torch.softmax(kat_logits, dim=1)
    kat_confidence, kat_idx = kat_probs.max(dim=1)
    kategori = label_kat_enc.inverse_transform([kat_idx.item()])[0]

    # 4. Parse sentimen (binary → sigmoid > 0.5)
    sen_prob = torch.sigmoid(sen_logits).item()
    # LabelEncoder: classes_ diurutkan alfabet → [negatif=0, positif=1]
    # sigmoid > 0.5 → index 1 (positif), else index 0 (negatif)
    sen_idx = 1 if sen_prob > 0.5 else 0
    sentimen = label_sen_enc.inverse_transform([sen_idx])[0]

    return PredictResponse(
        kategori=kategori,
        sentimen=sentimen,
        confidence=round(kat_confidence.item(), 4),
    )