import gradio as gr
import json
import torch
from PIL import Image
from sentence_transformers import SentenceTransformer, util
from transformers import (
    CLIPProcessor, CLIPModel,
    AutoTokenizer, AutoModelForCausalLM
)

# =======================================
# 1. Load recycle data
# =======================================
recycle_data = json.load(open("recycle_data.json", "r", encoding="utf-8"))
label_texts, items = [], []

for item in recycle_data:
    zh = item.get("name", "")
    en = item.get("english_name") or ""
    label_texts.append(f"{en}, {zh}" if en else zh)
    items.append(item)


# =======================================
# 2. Load Q&A (RAG)
# =======================================
qas = json.load(open("qas.json", "r", encoding="utf-8"))
qa_questions = [q["question"] for q in qas]

embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
qa_embeddings = embedder.encode(qa_questions, convert_to_tensor=True)


# =======================================
# 3. CLIP 用於圖片分類
# =======================================
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

with torch.no_grad():
    t_inputs = clip_processor(text=label_texts, return_tensors="pt", padding=True)
    text_embeds = clip_model.get_text_features(
        input_ids=t_inputs["input_ids"],
        attention_mask=t_inputs["attention_mask"]
    )
    text_embeds = text_embeds / text_embeds.norm(p=2, dim=-1, keepdim=True)


# =======================================
# 4. LLM（Qwen 0.5B）＋回答模板
# =======================================
LLM = "Qwen/Qwen2.5-0.5B-Instruct"
tok = AutoTokenizer.from_pretrained(LLM)
llm = AutoModelForCausalLM.from_pretrained(LLM, torch_dtype=torch.float32).to("cpu")

def llm_reply(prompt):
    inputs = tok(prompt, return_tensors="pt")
    outputs = llm.generate(**inputs, max_new_tokens=200)
    return tok.decode(outputs[0], skip_special_tokens=True)


# =======================================
# 5. 回答品質：加入「專業垃圾分類助理模板」
# =======================================

def expert_llm_reply(text):
    prompt = f"""
你是一位「台灣垃圾分類專家助理」。  
請用 **自然、生活化、清楚、條列式、友善語氣** 回答問題。  
遵守規則：

- 使用台灣常見分類（紙類、塑膠類、鐵鋁罐、玻璃、其他可回收、一般垃圾、廚餘…）
- 如可能需要清洗 → 提醒「保持乾淨、不要油膩」
- 如可能需要壓扁、拆蓋 → 主動提醒
- 如不同縣市規則不同 → 說「各縣市略有差異」
- 最後提供 1 個附加小提醒

使用者問題：{text}

請直接回答：
"""
    return llm_reply(prompt)


# =======================================
# 6. 額外知識庫（讓回答更像真人）
# =======================================

extra_rules = {
    "寶特瓶": [
        "瓶身要簡單沖洗乾淨",
        "可壓扁節省空間",
        "瓶蓋需旋開分開丟（塑膠類）",
        "標籤可保留或拆除都可以"
    ],
    "鋁箔包": [
        "要沖洗乾淨避免發臭",
        "記得壓扁更好回收",
        "屬於飲料紙容器類，可回收"
    ],
    "外帶杯": [
        "杯身要沖乾淨",
        "若是紙杯 → 紙類回收",
        "若是塑膠杯 → 塑膠類回收",
        "吸管為一般垃圾"
    ],
    "餐盒": [
        "若為乾淨塑膠 → 可回收",
        "若油膩、難清洗 → 一般垃圾",
        "盒蓋通常可回收（塑膠）"
    ],
}


def add_extra_tips(item_name):
    if item_name not in extra_rules:
        return ""
    tips = "\n".join(f"- {t}" for t in extra_rules[item_name])
    return f"\n🔧 **小提醒：**\n{tips}"


# =======================================
# 7. 圖片分類 + 回答模板
# =======================================

def classify_image(pil):
    inputs = clip_processor(images=pil, return_tensors="pt")
    with torch.no_grad():
        img_emb = clip_model.get_image_features(**inputs)
        img_emb = img_emb / img_emb.norm(p=2, dim=-1, keepdim=True)
        logits = img_emb @ text_embeds.T
        probs = logits.softmax(dim=-1)[0]
    idx = torch.argmax(probs).item()
    score = float(probs[idx])
    return idx, score


def smart_answer(item, score):
    name = item["name"]
    rec = item.get("recyclable", "")
    notes = item.get("notes", "")

    return f"""
🟢 **辨識結果**  
我推測這張照片中的物品是 **{name}**  
（相似度：**{score:.2f}**）

♻ **是否可回收**  
{rec}

📌 **補充說明**  
{notes}
{add_extra_tips(name)}

有需要我可以繼續告訴你：
- 要不要清洗？
- 要不要壓扁？
- 某些配件要不要拆？
都可以問我喔！
"""


# =======================================
# 8. 搜尋 recycle_data 名稱
# =======================================
def search_recycle_name(text):
    for item in items:
        if item["name"] in text:
            return item
    return None


# =======================================
# 9. RAG 搜尋官方 Q&A
# =======================================
def rag_search(text):
    q_emb = embedder.encode(text, convert_to_tensor=True)
    scores = util.cos_sim(q_emb, qa_embeddings)[0]
    best_idx = torch.argmax(scores).item()

    if float(scores[best_idx]) > 0.70:
        return qas[best_idx]["answer"]
    return None


# =======================================
# 10. Chatbot 主邏輯
# =======================================

global_image = None

def bot(message, history):
    global global_image

    # 如果含圖片
    if isinstance(message, dict):
        img = message.get("image", None)
        text = message.get("text", "").strip()

        # 上傳圖片 → 更新 context
        if img is not None:
            global_image = Image.fromarray(img)

            idx, score = classify_image(global_image)
            item = items[idx]
            return smart_answer(item, score)

        # 無圖片但有文字 → 當一般文字處理
        message = text

    # 純文字
    if isinstance(message, str):
        text = message.strip()

        # 若有上一張圖片 → 可以追問
        if global_image is not None:
            idx, _ = classify_image(global_image)
            current_item = items[idx]
            if current_item["name"] in text:
                return smart_answer(current_item, 0.99)

        # recycle_data 查詢
        item = search_recycle_name(text)
        if item:
            return smart_answer(item, 0.99)

        # RAG 查官方資料
        ans = rag_search(text)
        if ans:
            return f"📘 **官方資料：**\n{ans}"

        # fallback → LLM 專業回答
        return expert_llm_reply(text)

    return "我好像不太理解你的訊息，可以再說一次嗎？"


# =======================================
# 11. Gradio Chat UI
# =======================================
ui = gr.ChatInterface(
    fn=bot,
    title="台南垃圾分類智慧助理（圖片 + 多輪聊天）",
    description="你可以傳圖片或提問，我會查看 270+ 類回收資料 + 官方 Q&A + 多輪對話記憶。",
    multimodal=True,
)

ui.launch()