import streamlit as st
import os
import gdown
from transformers import AutoTokenizer, AutoConfig, AutoModelForSequenceClassification
from safetensors.torch import load_file
import torch

# ================================
# 1. Google Drive FILE ID (model.safetensors)
# ================================
FILE_ID = "1eMR7jxkj5XLLIV6t9IIllpfegxHWCi_A"
MODEL_DIR = "model_folder"
MODEL_FILE = os.path.join(MODEL_DIR, "model.safetensors")

# ================================
# 2. Download model file (jika belum ada)
# ================================
if not os.path.exists(MODEL_DIR):
    os.makedirs(MODEL_DIR, exist_ok=True)

if not os.path.exists(MODEL_FILE):
    st.write("Mengunduh model.safetensors dari Google Drive...")
    url = f"https://drive.google.com/uc?id={FILE_ID}"
    gdown.download(url, MODEL_FILE, quiet=False)
    st.success("model.safetensors berhasil di-download!")

# ================================
# 3. Load model & tokenizer TANPA META MODE
# ================================
st.write("Memuat model...")

tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)

# 3A — Load config
config = AutoConfig.from_pretrained(MODEL_DIR)

# 3B — Buat model kosong
model = AutoModelForSequenceClassification.from_config(config)

# 3C — Load bobot SAFETENSORS
state_dict = load_file(MODEL_FILE)
model.load_state_dict(state_dict, strict=True)

model.to("cpu")
model.eval()

st.success("Model siap digunakan!")

# ================================
# 4. Label Mapping
# ================================
label_map = {
    0: "Negatif",
    1: "Positif"
}

# ================================
# 5. Streamlit UI
# ================================
st.title("🚀 Klasifikasi Kalimat dengan Model dari Google Drive")

text = st.text_area("Masukkan kalimat:")

if st.button("Klasifikasi"):
    if text.strip() == "":
        st.warning("Tolong masukkan kalimat terlebih dahulu.")
    else:
        # Tokenisasi
        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)

        # Prediksi
        with torch.no_grad():
            outputs = model(**inputs)
            probs = torch.softmax(outputs.logits, dim=1)

        pred_tensor = torch.argmax(probs, dim=1)
        pred = int(pred_tensor.cpu().numpy()[0])

        # Ambil label
        label = label_map.get(pred, "Unknown")

        st.subheader("Hasil Prediksi:")
        st.write("Kelas:", f"**{label}**")
        st.write("Probabilitas:", probs.tolist()[0])