import gradio as gr import json import torch from PIL import Image from sentence_transformers import SentenceTransformer, util from transformers import ( CLIPProcessor, CLIPModel, AutoTokenizer, AutoModelForCausalLM ) # ======================================= # 1. Load recycle data # ======================================= recycle_data = json.load(open("recycle_data.json", "r", encoding="utf-8")) label_texts, items = [], [] for item in recycle_data: zh = item.get("name", "") en = item.get("english_name") or "" label_texts.append(f"{en}, {zh}" if en else zh) items.append(item) # ======================================= # 2. Load Q&A (RAG) # ======================================= qas = json.load(open("qas.json", "r", encoding="utf-8")) qa_questions = [q["question"] for q in qas] embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") qa_embeddings = embedder.encode(qa_questions, convert_to_tensor=True) # ======================================= # 3. CLIP 用於圖片分類 # ======================================= clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32") clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") with torch.no_grad(): t_inputs = clip_processor(text=label_texts, return_tensors="pt", padding=True) text_embeds = clip_model.get_text_features( input_ids=t_inputs["input_ids"], attention_mask=t_inputs["attention_mask"] ) text_embeds = text_embeds / text_embeds.norm(p=2, dim=-1, keepdim=True) # ======================================= # 4. LLM(Qwen 0.5B)+回答模板 # ======================================= LLM = "Qwen/Qwen2.5-0.5B-Instruct" tok = AutoTokenizer.from_pretrained(LLM) llm = AutoModelForCausalLM.from_pretrained(LLM, torch_dtype=torch.float32).to("cpu") def llm_reply(prompt): inputs = tok(prompt, return_tensors="pt") outputs = llm.generate(**inputs, max_new_tokens=200) return tok.decode(outputs[0], skip_special_tokens=True) # ======================================= # 5. 回答品質:加入「專業垃圾分類助理模板」 # ======================================= def expert_llm_reply(text): prompt = f""" 你是一位「台灣垃圾分類專家助理」。 請用 **自然、生活化、清楚、條列式、友善語氣** 回答問題。 遵守規則: - 使用台灣常見分類(紙類、塑膠類、鐵鋁罐、玻璃、其他可回收、一般垃圾、廚餘…) - 如可能需要清洗 → 提醒「保持乾淨、不要油膩」 - 如可能需要壓扁、拆蓋 → 主動提醒 - 如不同縣市規則不同 → 說「各縣市略有差異」 - 最後提供 1 個附加小提醒 使用者問題:{text} 請直接回答: """ return llm_reply(prompt) # ======================================= # 6. 額外知識庫(讓回答更像真人) # ======================================= extra_rules = { "寶特瓶": [ "瓶身要簡單沖洗乾淨", "可壓扁節省空間", "瓶蓋需旋開分開丟(塑膠類)", "標籤可保留或拆除都可以" ], "鋁箔包": [ "要沖洗乾淨避免發臭", "記得壓扁更好回收", "屬於飲料紙容器類,可回收" ], "外帶杯": [ "杯身要沖乾淨", "若是紙杯 → 紙類回收", "若是塑膠杯 → 塑膠類回收", "吸管為一般垃圾" ], "餐盒": [ "若為乾淨塑膠 → 可回收", "若油膩、難清洗 → 一般垃圾", "盒蓋通常可回收(塑膠)" ], } def add_extra_tips(item_name): if item_name not in extra_rules: return "" tips = "\n".join(f"- {t}" for t in extra_rules[item_name]) return f"\n🔧 **小提醒:**\n{tips}" # ======================================= # 7. 圖片分類 + 回答模板 # ======================================= def classify_image(pil): inputs = clip_processor(images=pil, return_tensors="pt") with torch.no_grad(): img_emb = clip_model.get_image_features(**inputs) img_emb = img_emb / img_emb.norm(p=2, dim=-1, keepdim=True) logits = img_emb @ text_embeds.T probs = logits.softmax(dim=-1)[0] idx = torch.argmax(probs).item() score = float(probs[idx]) return idx, score def smart_answer(item, score): name = item["name"] rec = item.get("recyclable", "") notes = item.get("notes", "") return f""" 🟢 **辨識結果** 我推測這張照片中的物品是 **{name}** (相似度:**{score:.2f}**) ♻ **是否可回收** {rec} 📌 **補充說明** {notes} {add_extra_tips(name)} 有需要我可以繼續告訴你: - 要不要清洗? - 要不要壓扁? - 某些配件要不要拆? 都可以問我喔! """ # ======================================= # 8. 搜尋 recycle_data 名稱 # ======================================= def search_recycle_name(text): for item in items: if item["name"] in text: return item return None # ======================================= # 9. RAG 搜尋官方 Q&A # ======================================= def rag_search(text): q_emb = embedder.encode(text, convert_to_tensor=True) scores = util.cos_sim(q_emb, qa_embeddings)[0] best_idx = torch.argmax(scores).item() if float(scores[best_idx]) > 0.70: return qas[best_idx]["answer"] return None # ======================================= # 10. Chatbot 主邏輯 # ======================================= global_image = None def bot(message, history): global global_image # 如果含圖片 if isinstance(message, dict): img = message.get("image", None) text = message.get("text", "").strip() # 上傳圖片 → 更新 context if img is not None: global_image = Image.fromarray(img) idx, score = classify_image(global_image) item = items[idx] return smart_answer(item, score) # 無圖片但有文字 → 當一般文字處理 message = text # 純文字 if isinstance(message, str): text = message.strip() # 若有上一張圖片 → 可以追問 if global_image is not None: idx, _ = classify_image(global_image) current_item = items[idx] if current_item["name"] in text: return smart_answer(current_item, 0.99) # recycle_data 查詢 item = search_recycle_name(text) if item: return smart_answer(item, 0.99) # RAG 查官方資料 ans = rag_search(text) if ans: return f"📘 **官方資料:**\n{ans}" # fallback → LLM 專業回答 return expert_llm_reply(text) return "我好像不太理解你的訊息,可以再說一次嗎?" # ======================================= # 11. Gradio Chat UI # ======================================= ui = gr.ChatInterface( fn=bot, title="台南垃圾分類智慧助理(圖片 + 多輪聊天)", description="你可以傳圖片或提問,我會查看 270+ 類回收資料 + 官方 Q&A + 多輪對話記憶。", multimodal=True, ) ui.launch()