Spaces:

idkWhatToUse
/

trashAI

Sleeping

App Files Files Community

idkWhatToUse commited on Nov 16, 2025

Commit

44fdde6

verified ·

1 Parent(s): 411a6ae

Update app.py

Browse files

Files changed (1) hide show

app.py +119 -160

app.py CHANGED Viewed

@@ -3,188 +3,147 @@ import json
 import torch
 from PIL import Image
 from sentence_transformers import SentenceTransformer, util
-from transformers import CLIPProcessor, CLIPModel, AutoTokenizer, AutoModelForCausalLM
-# ========== 1. 載入回收物品資料 ==========
-with open("recycle_data.json", "r", encoding="utf-8") as f:
-    recycle_data = json.load(f)
-# 271 個 label 文本（給 CLIP 用）
 label_texts = []
-id_to_item = []
 for item in recycle_data:
-    zh = item.get("name", "").strip()
-    en = (item.get("english_name") or "").strip()
-    if en:
-        text = f"{en}, {zh}"
-    else:
-        text = zh
-    label_texts.append(text)
-    id_to_item.append(item)
-num_labels = len(label_texts)
-print(f"Loaded {num_labels} recycle labels")
-# ========== 2. 載入 Q&A 資料 ==========
-with open("qas.json", "r", encoding="utf-8") as f:
-    qas = json.load(f)
 qa_questions = [q["question"] for q in qas]
 embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
 qa_embeddings = embedder.encode(qa_questions, convert_to_tensor=True)
-# ========== 3. 載入 CLIP 模型 (Zero-shot 圖片 → 文本) ==========
-clip_model_name = "openai/clip-vit-base-patch32"
-clip_model = CLIPModel.from_pretrained(clip_model_name)
-clip_processor = CLIPProcessor.from_pretrained(clip_model_name)
-# 預先把 label 文本 embed（可加速）
 with torch.no_grad():
-    text_inputs = clip_processor(
-        text=label_texts,
-        images=None,
-        return_tensors="pt",
-        padding=True
     )
-    text_embeds = clip_model.get_text_features(**{k: v for k, v in text_inputs.items() if k.startswith("input_ids") or k.startswith("attention_mask")})
     text_embeds = text_embeds / text_embeds.norm(p=2, dim=-1, keepdim=True)
-# ========== 4. （可選）小型 LLM 作 fallback ==========
-# 若 Space 免費 CPU 撐不住，可以先註解掉這段，或回傳簡單文字
-llm_name = "microsoft/phi-2"
-tokenizer = AutoTokenizer.from_pretrained(llm_name)
-llm_model = AutoModelForCausalLM.from_pretrained(llm_name)
-def llm_fallback(query: str) -> str:
-    prompt = f"你是一位垃圾分類助理，請用簡單中文回答以下問題，並遵守常見垃圾分類規則：{query}"
-    inputs = tokenizer(prompt, return_tensors="pt")
-    outputs = llm_model.generate(**inputs, max_new_tokens=120)
-    return tokenizer.decode(outputs[0], skip_special_tokens=True)
-# ========== 5. 工具函式 ==========
-def classify_image_with_clip(pil_image: Image.Image):
-    # images + text → CLIP 相似度
-    inputs = clip_processor(
-        text=None,
-        images=pil_image,
-        return_tensors="pt"
     )
     with torch.no_grad():
-        image_embeds = clip_model.get_image_features(**inputs)
-        image_embeds = image_embeds / image_embeds.norm(p=2, dim=-1, keepdim=True)
-        # cosine similarity
-        logits = image_embeds @ text_embeds.T  # (1, num_labels)
         probs = logits.softmax(dim=-1)[0]
     score, idx = torch.max(probs, dim=-1)
-    score = float(score.item())
-    idx = int(idx.item())
-    return idx, score
-def build_recycle_answer(item, score):
-    name = item.get("name", "")
-    en = item.get("english_name", "")
-    notes = item.get("notes", "")
-    rec = item.get("recyclable", "")
-    header = f"🔍 我推測此物品最接近：**{name}**"
-    if en:
-        header += f"（{en}）"
-    header += f"\n相似度：約 {score:.2f}\n\n"
-    body = ""
-    if rec:
-        body += f"♻️ 是否可回收 / 類型：{rec}\n\n"
-    if notes:
-        body += f"📦 建議回收方式：\n{notes}\n"
-    else:
-        body += "目前沒有更詳細的回收說明，可依一般回收原則處理。"
-    return header + body
-def generic_recycle_hint():
-    return (
-        "❓ 我無法自信地判斷這是資料庫中的哪一項物品。\n\n"
-        "可以參考以下一般原則：\n"
-        "1. 乾淨、可分離的紙類、塑膠、金屬、玻璃 → 多半可回收。\n"
-        "2. 沾滿油污、混合多種材質又不易拆解 → 通常當一般垃圾。\n"
-        "3. 電器、電池、燈管、農藥容器等 → 應交由清潔隊或指定回收點。\n"
-        "4. 若不確定，建議詢問當地環保局或 1999 專線。"
-    )
-def search_qa(query: str):
     q_emb = embedder.encode(query, convert_to_tensor=True)
     scores = util.cos_sim(q_emb, qa_embeddings)[0]
-    best_idx = torch.argmax(scores).item()
-    best_score = float(scores[best_idx].item())
-    if best_score > 0.7:
-        return qas[best_idx]["answer"]
-    else:
-        return None
-# ========== 6. 主助理邏輯 ==========
-def waste_assistant(user_text, image):
-    # 有圖片的情況（可以同時搭配文字）
-    if image is not None:
-        pil_image = Image.fromarray(image)
-        idx, score = classify_image_with_clip(pil_image)
-        # threshold：判斷「是否在 271 類的合理範圍內」
-        THRESH = 0.25
-        if score >= THRESH:
-            item = id_to_item[idx]
-            ans = build_recycle_answer(item, score)
-            # 如果還有文字問題，就順便試著回答
-            if user_text:
-                qa_ans = search_qa(user_text)
-                if qa_ans:
-                    ans += "\n\n---\n\n📚 相關延伸說明：\n" + qa_ans
-                else:
-                    # 補上一個簡單 LLM 回覆（可註解）
-                    extra = llm_fallback(user_text)
-                    ans += "\n\n---\n\n🤖 額外說明（模型推論）：\n" + extra
-            return ans
-        else:
-            # score 太低：可能不在 271 類中
-            base = generic_recycle_hint()
-            if user_text:
-                # 若有問題，就用 LLM 回答問題內容
-                extra = llm_fallback(user_text)
-                base += "\n\n---\n\n🤖 根據你輸入的文字，這是模型的推論：\n" + extra
-            return base
-    # 純文字問答模式
-    if user_text:
-        qa_ans = search_qa(user_text)
-        if qa_ans:
-            return qa_ans
-        # 找不到就交給 LLM 硬推
-        return llm_fallback(user_text)
-    return "請上傳圖片或輸入問題。"
-# ========== 7. Gradio 介面 ==========
-demo = gr.Interface(
-    fn=waste_assistant,
-    inputs=[
-        gr.Textbox(label="輸入你的問題（可留空，只傳圖片）"),
-        gr.Image(type="numpy", label="上傳垃圾 / 物品的照片")
-    ],
-    outputs=gr.Markdown(),
-    title="台南垃圾分類智慧助理（CLIP + 271 類回收資料）",
-    description=(
-        "● 上傳圖片，我會幫你猜這是什麼，並從回收資料中找最接近的物品，提供回收方式。\n"
-        "● 可以同時輸入文字，例如「這個要怎麼回收？」或「這個是可回收嗎？」\n"
-        "● 也可以只輸入文字，查詢常見的垃圾分類 / 回收問答。\n"
     )
 )
-if __name__ == "__main__":
-    demo.launch()

 import torch
 from PIL import Image
 from sentence_transformers import SentenceTransformer, util
+from transformers import (
+    CLIPProcessor, CLIPModel,
+    AutoTokenizer, AutoModelForCausalLM
+)
+# =======================================
+# 1. Load recycle data
+# =======================================
+recycle_data = json.load(open("recycle_data.json", "r", encoding="utf-8"))
 label_texts = []
+items = []
 for item in recycle_data:
+    zh = item.get("name", "")
+    en = item.get("english_name") or ""
+    label_texts.append(f"{en}, {zh}" if en else zh)
+    items.append(item)
+# =======================================
+# 2. Load Q&A data
+# =======================================
+qas = json.load(open("qas.json", "r", encoding="utf-8"))
 qa_questions = [q["question"] for q in qas]
 embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
 qa_embeddings = embedder.encode(qa_questions, convert_to_tensor=True)
+# =======================================
+# 3. Load CLIP for image → text similarity
+# =======================================
+clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
+clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
 with torch.no_grad():
+    t_inputs = clip_processor(
+        text=label_texts, images=None, return_tensors="pt", padding=True
+    )
+    text_embeds = clip_model.get_text_features(
+        input_ids=t_inputs["input_ids"],
+        attention_mask=t_inputs["attention_mask"]
     )
     text_embeds = text_embeds / text_embeds.norm(p=2, dim=-1, keepdim=True)
+# =======================================
+# 4. SUPER-FAST Chat LLM (0.5B)
+# =======================================
+LLM_NAME = "Qwen/Qwen2.5-0.5B-Instruct"
+tok = AutoTokenizer.from_pretrained(LLM_NAME)
+llm = AutoModelForCausalLM.from_pretrained(
+    LLM_NAME,
+    torch_dtype=torch.float32,
+    device_map="cpu"
+)
+def llm_chat(prompt):
+    inputs = tok(prompt, return_tensors="pt")
+    outputs = llm.generate(
+        **inputs,
+        max_new_tokens=120,
+        temperature=0.4
     )
+    return tok.decode(outputs[0], skip_special_tokens=True)
+# =======================================
+# Helper functions
+# =======================================
+def classify_image(image):
+    inputs = clip_processor(images=image, return_tensors="pt")
     with torch.no_grad():
+        img_emb = clip_model.get_image_features(**inputs)
+        img_emb = img_emb / img_emb.norm(p=2, dim=-1, keepdim=True)
+        logits = img_emb @ text_embeds.T
         probs = logits.softmax(dim=-1)[0]
     score, idx = torch.max(probs, dim=-1)
+    return idx.item(), float(score.item())
+def search_qa(query):
     q_emb = embedder.encode(query, convert_to_tensor=True)
     scores = util.cos_sim(q_emb, qa_embeddings)[0]
+    idx = torch.argmax(scores).item()
+    if scores[idx] > 0.70:
+        return qas[idx]["answer"]
+    return None
+def general_rules():
+    return (
+        "以下是一般垃圾分類原則：\n"
+        "1. 乾淨可分離材質 → 可回收。\n"
+        "2. 污損/混合材質不易拆 → 一般垃圾。\n"
+        "3. 電器、電池、有害物 → 指定回收。\n"
+        "4. 不確定時 → 打 1999 或問清潔隊。\n"
     )
+# =======================================
+# 5. Main Chatbot Logic
+# =======================================
+def chatbot(message, history):
+    image = None
+    if isinstance(message, dict) and "image" in message:
+        image = message["image"]
+        message = ""
+    final_answer = ""
+    # --- Image mode ---
+    if image:
+        pil = Image.fromarray(image)
+        idx, sim = classify_image(pil)
+        if sim >= 0.25:
+            item = items[idx]
+            final_answer += (
+                f"🔍 推測最接近：**{item['name']}**（相似度 {sim:.2f}）\n\n"
+                f"♻️ {item.get('recyclable', '')}\n\n"
+                f"{item.get('notes', '')}\n\n"
+            )
+        else:
+            final_answer += (
+                "❓ 無法確定圖片屬於資料庫中的哪一項物品。\n\n" +
+                general_rules()
+            )
+    # --- Text mode ---
+    if message:
+        q_ans = search_qa(message)
+        if q_ans:
+            final_answer += f"📘 查到官方資料：\n{q_ans}\n"
+        else:
+            llm_ans = llm_chat(f"請以台灣垃圾分類規則回答問題：{message}")
+            final_answer += f"🤖 推論回答：\n{llm_ans}\n"
+    return final_answer or "請輸入問題或上傳圖片。"
+# =======================================
+# 6. Chat UI
+# =======================================
+chat_ui = gr.ChatInterface(
+    fn=chatbot,
+    title="垃圾分類聊天助理（CLIP × Qwen × 271 類）",
+    description="可上傳圖片，也可直接聊天。"
 )
+chat_ui.launch()