Spaces:

idkWhatToUse
/

trashAI

Sleeping

App Files Files Community

trashAI / app.py

idkWhatToUse

Update app.py

557db1b verified 7 months ago

Raw

History Blame Contribute Delete

7.26 kB

	import gradio as gr
	import json
	import torch
	from PIL import Image
	from sentence_transformers import SentenceTransformer, util
	from transformers import (
	CLIPProcessor, CLIPModel,
	AutoTokenizer, AutoModelForCausalLM
	)

	# =======================================
	# 1. Load recycle data
	# =======================================
	recycle_data = json.load(open("recycle_data.json", "r", encoding="utf-8"))
	label_texts, items = [], []

	for item in recycle_data:
	zh = item.get("name", "")
	en = item.get("english_name") or ""
	label_texts.append(f"{en}, {zh}" if en else zh)
	items.append(item)


	# =======================================
	# 2. Load Q&A (RAG)
	# =======================================
	qas = json.load(open("qas.json", "r", encoding="utf-8"))
	qa_questions = [q["question"] for q in qas]

	embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
	qa_embeddings = embedder.encode(qa_questions, convert_to_tensor=True)


	# =======================================
	# 3. CLIP 用於圖片分類
	# =======================================
	clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
	clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

	with torch.no_grad():
	t_inputs = clip_processor(text=label_texts, return_tensors="pt", padding=True)
	text_embeds = clip_model.get_text_features(
	input_ids=t_inputs["input_ids"],
	attention_mask=t_inputs["attention_mask"]
	)
	text_embeds = text_embeds / text_embeds.norm(p=2, dim=-1, keepdim=True)


	# =======================================
	# 4. LLM（Qwen 0.5B）＋回答模板
	# =======================================
	LLM = "Qwen/Qwen2.5-0.5B-Instruct"
	tok = AutoTokenizer.from_pretrained(LLM)
	llm = AutoModelForCausalLM.from_pretrained(LLM, torch_dtype=torch.float32).to("cpu")

	def llm_reply(prompt):
	inputs = tok(prompt, return_tensors="pt")
	outputs = llm.generate(**inputs, max_new_tokens=200)
	return tok.decode(outputs[0], skip_special_tokens=True)


	# =======================================
	# 5. 回答品質：加入「專業垃圾分類助理模板」
	# =======================================

	def expert_llm_reply(text):
	prompt = f"""
	你是一位「台灣垃圾分類專家助理」。
	請用自然、生活化、清楚、條列式、友善語氣回答問題。
	遵守規則：

	- 使用台灣常見分類（紙類、塑膠類、鐵鋁罐、玻璃、其他可回收、一般垃圾、廚餘…）
	- 如可能需要清洗 → 提醒「保持乾淨、不要油膩」
	- 如可能需要壓扁、拆蓋 → 主動提醒
	- 如不同縣市規則不同 → 說「各縣市略有差異」
	- 最後提供 1 個附加小提醒

	使用者問題：{text}

	請直接回答：
	"""
	return llm_reply(prompt)


	# =======================================
	# 6. 額外知識庫（讓回答更像真人）
	# =======================================

	extra_rules = {
	"寶特瓶": [
	"瓶身要簡單沖洗乾淨",
	"可壓扁節省空間",
	"瓶蓋需旋開分開丟（塑膠類）",
	"標籤可保留或拆除都可以"
	],
	"鋁箔包": [
	"要沖洗乾淨避免發臭",
	"記得壓扁更好回收",
	"屬於飲料紙容器類，可回收"
	],
	"外帶杯": [
	"杯身要沖乾淨",
	"若是紙杯 → 紙類回收",
	"若是塑膠杯 → 塑膠類回收",
	"吸管為一般垃圾"
	],
	"餐盒": [
	"若為乾淨塑膠 → 可回收",
	"若油膩、難清洗 → 一般垃圾",
	"盒蓋通常可回收（塑膠）"
	],
	}


	def add_extra_tips(item_name):
	if item_name not in extra_rules:
	return ""
	tips = "\n".join(f"- {t}" for t in extra_rules[item_name])
	return f"\n🔧 小提醒：\n{tips}"


	# =======================================
	# 7. 圖片分類 + 回答模板
	# =======================================

	def classify_image(pil):
	inputs = clip_processor(images=pil, return_tensors="pt")
	with torch.no_grad():
	img_emb = clip_model.get_image_features(**inputs)
	img_emb = img_emb / img_emb.norm(p=2, dim=-1, keepdim=True)
	logits = img_emb @ text_embeds.T
	probs = logits.softmax(dim=-1)[0]
	idx = torch.argmax(probs).item()
	score = float(probs[idx])
	return idx, score


	def smart_answer(item, score):
	name = item["name"]
	rec = item.get("recyclable", "")
	notes = item.get("notes", "")

	return f"""
	🟢 辨識結果
	我推測這張照片中的物品是 {name}
	（相似度：{score:.2f}）

	♻ 是否可回收
	{rec}

	📌 補充說明
	{notes}
	{add_extra_tips(name)}

	有需要我可以繼續告訴你：
	- 要不要清洗？
	- 要不要壓扁？
	- 某些配件要不要拆？
	都可以問我喔！
	"""


	# =======================================
	# 8. 搜尋 recycle_data 名稱
	# =======================================
	def search_recycle_name(text):
	for item in items:
	if item["name"] in text:
	return item
	return None


	# =======================================
	# 9. RAG 搜尋官方 Q&A
	# =======================================
	def rag_search(text):
	q_emb = embedder.encode(text, convert_to_tensor=True)
	scores = util.cos_sim(q_emb, qa_embeddings)[0]
	best_idx = torch.argmax(scores).item()

	if float(scores[best_idx]) > 0.70:
	return qas[best_idx]["answer"]
	return None


	# =======================================
	# 10. Chatbot 主邏輯
	# =======================================

	global_image = None

	def bot(message, history):
	global global_image

	# 如果含圖片
	if isinstance(message, dict):
	img = message.get("image", None)
	text = message.get("text", "").strip()

	# 上傳圖片 → 更新 context
	if img is not None:
	global_image = Image.fromarray(img)

	idx, score = classify_image(global_image)
	item = items[idx]
	return smart_answer(item, score)

	# 無圖片但有文字 → 當一般文字處理
	message = text

	# 純文字
	if isinstance(message, str):
	text = message.strip()

	# 若有上一張圖片 → 可以追問
	if global_image is not None:
	idx, _ = classify_image(global_image)
	current_item = items[idx]
	if current_item["name"] in text:
	return smart_answer(current_item, 0.99)

	# recycle_data 查詢
	item = search_recycle_name(text)
	if item:
	return smart_answer(item, 0.99)

	# RAG 查官方資料
	ans = rag_search(text)
	if ans:
	return f"📘 官方資料：\n{ans}"

	# fallback → LLM 專業回答
	return expert_llm_reply(text)

	return "我好像不太理解你的訊息，可以再說一次嗎？"


	# =======================================
	# 11. Gradio Chat UI
	# =======================================
	ui = gr.ChatInterface(
	fn=bot,
	title="台南垃圾分類智慧助理（圖片 + 多輪聊天）",
	description="你可以傳圖片或提問，我會查看 270+ 類回收資料 + 官方 Q&A + 多輪對話記憶。",
	multimodal=True,
	)

	ui.launch()