Spaces:

DeepLearning101
/

Multimodal-Playground

Sleeping

App Files Files Community

Multimodal-Playground / app.py

DeepLearning101

Update app.py

8fe1c58 verified 6 days ago

raw

history blame

11.8 kB

	import gradio as gr
	import requests
	import mimetypes
	import json, os
	import asyncio
	import aiohttp
	import subprocess

	# pip 升級
	def upgrade_pip():
	try:
	subprocess.check_call([os.sys.executable, "-m", "pip", "install", "--upgrade", "pip"])
	print("pip 升級成功")
	except subprocess.CalledProcessError:
	print("pip 升級失敗")

	upgrade_pip()

	LLM_API = os.environ.get("LLM_API", "").strip()
	LLM_URL = os.environ.get("LLM_URL")
	USER_ID = "HuggingFace Space"

	async def send_chat_message(LLM_URL, LLM_API, category, file_url):
	print(f"--- 開始請求 ---")
	print(f"URL: {LLM_URL}/chat-messages")
	print(f"File URL: {file_url}") # 檢查這裡產生的 URL 是否真的公開可讀取

	payload = {
	"inputs": {},
	"query": category,
	"conversation_id": "",
	"user": USER_ID,
	"response_mode": "streaming",
	"files": [
	{
	"type": "image",
	"transfer_method": "remote_url",
	"url": file_url
	}
	]
	}

	answer = ""
	try:
	async with aiohttp.ClientSession() as session:
	async with session.post(
	f"{LLM_URL}/chat-messages",
	headers={
	"Authorization": f"Bearer {LLM_API}",
	"Content-Type": "application/json"
	},
	json=payload
	) as response:

	# [DEBUG 1] 檢查狀態碼
	print(f"HTTP Status: {response.status}")

	if response.status != 200:
	# 如果出錯，讀取錯誤訊息並回傳
	error_text = await response.text()
	print(f"API Error Response: {error_text}")
	return f"API Error {response.status}: {error_text}"

	# [DEBUG 2] 逐行監看回傳內容
	async for line_bytes in response.content:
	line = line_bytes.decode("utf-8").strip()

	# 印出原始資料 (Debug 用，確認有東西回來)
	if line:
	print(f"Raw Line: {line}")

	if line.startswith("data: "):
	try:
	data = json.loads(line[6:])
	if "answer" in data:
	answer += data["answer"]
	# print(f"Current Answer chunk: {data['answer']}") # 選用：即時看片段
	if "error" in data:
	print(f"Data Error: {data}") # Dify 有時會在 data 裡回傳 error
	except Exception as e:
	print(f"JSON Parse Error: {e} \| Content: {line}")
	continue

	except Exception as e:
	print(f"Request Exception: {e}")
	return f"System Error: {str(e)}"

	if not answer:
	print("警告: 請求結束但 answer 為空")

	return answer or "No answer returned (Empty Response)"

	async def upload_file(LLM_URL, LLM_API, file_path, user_id):
	if not os.path.exists(file_path):
	return f"Error: File {file_path} not found"

	# ✅ 先定義 filename 和 mime_type
	mime_type, _ = mimetypes.guess_type(file_path)
	filename = os.path.basename(file_path)

	# ✅ print 放在這裡
	print("Uploading file:", filename, "mime_type:", mime_type)

	with open(file_path, 'rb') as f:
	async with aiohttp.ClientSession() as session:
	form_data = aiohttp.FormData()
	form_data.add_field('file', f, filename=filename, content_type=mime_type)
	form_data.add_field('user', user_id)
	async with session.post(
	f"{LLM_URL}/files/upload",
	headers={"Authorization": f"Bearer {LLM_API}"},
	data=form_data
	) as response:
	if response.status == 404:
	return "Error: Endpoint not found (404)"
	response_text = await response.text()
	print("Upload response:", response_text)
	try:
	return json.loads(response_text)
	except json.JSONDecodeError:
	return "Error: Invalid JSON response"

	async def handle_input(file_path, category):
	# 取得檔名
	filename = os.path.basename(file_path)

	# 直接用 HuggingFace repo URL
	file_url = f"https://huggingface.co/spaces/DeepLearning101/Multimodal-Playground/blob/main/DEMO/{filename}?raw=true"

	return await send_chat_message(LLM_URL, LLM_API, category, file_url)
	# async def handle_input(file_path, category):
	# # 如果 tmp 路徑不存在，改成 repo 內的 DEMO 路徑
	# if not os.path.exists(file_path):
	# file_path = os.path.join("DEMO", os.path.basename(file_path))

	# upload_response = await upload_file(LLM_URL, LLM_API, file_path, USER_ID)
	# if isinstance(upload_response, str) and upload_response.startswith("Error"):
	# return upload_response
	# file_id = upload_response.get("id")
	# if not file_id:
	# return "Error: No file ID returned from upload"
	# return await send_chat_message(LLM_URL, LLM_API, category, file_id)

	# UI 元件 & 資料
	examples = [
	['DEMO/Medical1.jpg', '診斷證明書'],
	['DEMO/Medical2.jpg', '診斷證明書'],
	['DEMO/passport.png', '護照'],
	['DEMO/residence.png', '居留證'],
	['DEMO/boarding-pass.png', '機票'],
	['DEMO/taxi.jpg', '計程車乘車證明'],
	['DEMO/etag.jpg', '通行明細 (etag)'],
	["DEMO/qrcode.jpg", 'QRCODE發票'],
	['DEMO/mthsr.JPG', '超商高鐵車票'],
	['DEMO/thsr.jpg', '高鐵車票'],
	['DEMO/mtra.jpg', '超商台鐵車票'],
	['DEMO/tra.JPG', '台鐵車票'],
	['DEMO/ID-back.png', '身份證背面'],
	['DEMO/ID.png', '身份證正面'],
	['DEMO/health.png', '健保卡'],
	]

	TITLE = """<h1>Multimodal Playground 💬 輸入各種單據並選擇種類，解析得到各種關鍵資訊 </h1>"""
	SUBTITLE = """<h2><a href='https://deep-learning-101.github.io' target='_blank'>deep-learning-101.github.io</a> \| <a href='https://www.twman.org/AI' target='_blank'> AI </a> \| <a href='https://www.twman.org' target='_blank'>TonTon Huang Ph.D.</a> \| <a href='https://blog.twman.org/p/deeplearning101.html' target='_blank'>手把手帶你一起踩AI坑</a><br></h2>"""
	LINKS = """
	<a href='https://github.com/Deep-Learning-101' target='_blank'>Deep Learning 101 Github</a> \| <a href='http://deeplearning101.twman.org' target='_blank'>Deep Learning 101</a> \| <a href='https://www.facebook.com/groups/525579498272187/' target='_blank'>台灣人工智慧社團 FB</a> \| <a href='https://www.youtube.com/c/DeepLearning101' target='_blank'>YouTube</a><br>
	<a href='https://blog.twman.org/2025/04/AI-Robot.html' target='_blank'>AI 陪伴機器人：2025 趨勢分析技術突破、市場潛力與未來展望</a> \| <a href='https://blog.twman.org/2025/04/FinanceGenAI.html' target='_blank'>金融科技新浪潮：生成式 AI (GenAI) 應用場景、效益與導入挑戰</a><br>
	<a href='https://blog.twman.org/2025/03/AIAgent.html' target='_blank'>避開 AI Agent 開發陷阱：常見問題、挑戰與解決方案 (實戰經驗)</a>：<a href="https://deep-learning-101.github.io/agent" target="_blank">探討多種 AI 代理人工具的應用經驗與挑戰，分享實用經驗與工具推薦。</a><br>
	<a href="https://blog.twman.org/2024/08/LLM.html" target="_blank">白話文手把手帶你科普 GenAI</a></b>：<a href="https://deep-learning-101.github.io/GenAI" target="_blank">淺顯介紹生成式人工智慧核心概念，強調硬體資源和數據的重要性。</a><br>
	<a href="https://blog.twman.org/2024/09/LLM.html" target="_blank">大型語言模型直接就打完收工？</a></b>：<a href="https://deep-learning-101.github.io/1010LLM" target="_blank">回顧 LLM 領域探索歷程，討論硬體升級對 AI 開發的重要性。</a><br>
	<a href="https://blog.twman.org/2024/07/RAG.html" target="_blank">檢索增強生成(RAG)不是萬靈丹之優化挑戰技巧</a></b>：<a href="https://deep-learning-101.github.io/RAG" target="_blank">探討 RAG 技術應用與挑戰，提供實用經驗分享和工具建議。</a><br>
	<a href="https://blog.twman.org/2024/02/LLM.html" target="_blank">大型語言模型 (LLM) 入門完整指南：原理、應用與未來</a></b>：<a href="https://deep-learning-101.github.io/0204LLM" target="_blank">探討多種 LLM 工具的應用與挑戰，強調硬體資源的重要性。</a><br>
	<a href="https://blog.twman.org/2023/04/GPT.html" target="_blank">解析探索大型語言模型：模型發展歷史、訓練及微調技術的 VRAM 估算</a></b>：<a href="https://deep-learning-101.github.io/GPU" target="_blank">探討 LLM 的發展與應用，強調硬體資源在開發中的關鍵作用。</a><br>
	<a href="https://blog.twman.org/2024/11/diffusion.html" target="_blank">Diffusion Model 完全解析：從原理、應用到實作 (AI 圖像生成)</a></b>；<a href="https://deep-learning-101.github.io/diffusion" target="_blank">深入探討影像生成與分割技術的應用，強調硬體資源的重要性。</a><br>
	<a href="https://blog.twman.org/2024/02/asr-tts.html" target="_blank">ASR/TTS 開發避坑指南：語音辨識與合成的常見挑戰與對策</a></b>：<a href="https://deep-learning-101.github.io/asr-tts" target="_blank">探討 ASR 和 TTS 技術應用中的問題，強調數據質量的重要性。</a><br>
	<a href="https://blog.twman.org/2021/04/NLP.html" target="_blank">那些 NLP 踩的坑</a></b>：<a href="https://deep-learning-101.github.io/nlp" target="_blank">分享 NLP 領域的實踐經驗，強調數據質量對模型效果的影響。</a><br>
	<a href="https://blog.twman.org/2021/04/ASR.html" target="_blank">那些語音處理踩的坑</a></b>：<a href="https://deep-learning-101.github.io/speech" target="_blank">分享語音處理領域的實務經驗，強調資料品質對模型效果的影響。</a><br>
	<a href="https://blog.twman.org/2020/05/DeepLearning.html" target="_blank">手把手學深度學習安裝環境</a></b>：<a href="https://deep-learning-101.github.io/101" target="_blank">詳細介紹在 Ubuntu 上安裝深度學習環境的步驟，分享實際操作經驗。</a><br>
	<a href='https://blog.twman.org/2023/07/wsl.html' target='_blank'>用PPOCRLabel來幫PaddleOCR做OCR的微調和標註</a><br>
	<a href='https://blog.twman.org/2023/07/HugIE.html' target='_blank'>基於機器閱讀理解和指令微調的統一信息抽取框架之診斷書醫囑資訊擷取分析</a><br>
	"""

	# Gradio Blocks 寫法（全新修正）
	with gr.Blocks() as iface:
	gr.HTML(TITLE)
	gr.HTML(SUBTITLE)
	gr.HTML(LINKS)

	with gr.Row():
	file_input = gr.Image(label='圖片上傳', type='filepath')
	category = gr.Radio(label="Message Category", choices=[
	"機票", "計程車乘車證明", "通行明細 (etag)", "QRCODE發票",
	"超商高鐵車票", "高鐵車票", "超商台鐵車票", "台鐵車票", "旅行業代收轉付收據", "電子發票證明", "收據", "診斷證明書",
	"身份證正面", "身份證反面", "健保卡", "護照", "居留證", "行照", "勞保個人加保"
	])

	submit_btn = gr.Button("解析")
	output_text = gr.Textbox(label="解析結果", lines=10)

	submit_btn.click(fn=handle_input, inputs=[file_input, category], outputs=output_text)

	gr.Examples(
	examples=examples,
	inputs=[file_input, category],
	label="範例圖片與類型"
	)

	iface.launch()