import torch
from AI_Model_architecture import BertLSTM_CNN_Classifier
from transformers import BertTokenizer
import re
import os
import requests

# ✅ 使用 CPU 模式（如果你只部署在 Hugging Face）
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ✅ Hugging Face 建議路徑（防止 cache 錯誤）
model_path = "/tmp/model.pth"
model_url = "https://huggingface.co/jerrynnms/scam-model/resolve/main/model.pth"

# ✅ 快取模型檔（僅首次下載）
if not os.path.exists(model_path):
    print("📦 下載 model.pth 中...")
    response = requests.get(model_url)
    if response.status_code == 200:
        with open(model_path, "wb") as f:
            f.write(response.content)
        print("✅ 模型下載完成")
    else:
        raise FileNotFoundError("❌ 無法下載 model.pth，請檢查網址")

# ✅ 全域快取模型與 tokenizer
model = BertLSTM_CNN_Classifier()
model.load_state_dict(torch.load(model_path, map_location=device))
model.to(device)
model.eval()

tokenizer = BertTokenizer.from_pretrained("ckiplab/bert-base-chinese")

# ✅ 預測單句文字
def predict_single_sentence(text: str, max_len=256):
    text = re.sub(r"\s+", "", text)  # 移除空白
    text = re.sub(r"[^\u4e00-\u9fffA-Za-z0-9。，！？:/.\-]", "", text)  # 清洗非標點與文字

    encoded = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=max_len)
    input_ids = encoded["input_ids"].to(device)
    attention_mask = encoded["attention_mask"].to(device)
    token_type_ids = encoded["token_type_ids"].to(device)

    with torch.no_grad():
        output = model(input_ids, attention_mask, token_type_ids)
        prob = output.item()
        label = int(prob > 0.5)

    return label, prob

# ✅ 封裝為 API 可用格式
def analyze_text(text: str):
    label, prob = predict_single_sentence(text)
    prob_percent = round(prob * 100, 2)

    if prob > 0.9:
        risk = "🔴 高風險（極可能是詐騙）"
    elif prob > 0.5:
        risk = "🟡 中風險（可疑）"
    else:
        risk = "🟢 低風險（正常）"

    status = "詐騙" if label == 1 else "正常"

    return {
        "status": status,
        "confidence": prob_percent,
        "suspicious_keywords": [risk]  # 這裡之後可進一步做關鍵字標註
    }