import gradio as gr import requests import mimetypes import json, os import asyncio import aiohttp import subprocess # pip 升級 def upgrade_pip(): try: subprocess.check_call([os.sys.executable, "-m", "pip", "install", "--upgrade", "pip"]) print("pip 升級成功") except subprocess.CalledProcessError: print("pip 升級失敗") upgrade_pip() LLM_API = os.environ.get("LLM_API", "").strip() LLM_URL = os.environ.get("LLM_URL") USER_ID = "HuggingFace Space" async def send_chat_message(LLM_URL, LLM_API, category, file_url): payload = { "inputs": {}, "query": category, "conversation_id": "", "user": USER_ID, "response_mode": "streaming", "files": [ { "type": "image", "transfer_method": "remote_url", "url": file_url } ] } answer = "" async with aiohttp.ClientSession() as session: async with session.post( f"{LLM_URL}/chat-messages", headers={ "Authorization": f"Bearer {LLM_API}", "Content-Type": "application/json" }, json=payload ) as response: async for line_bytes in response.content: line = line_bytes.decode("utf-8").strip() if line.startswith("data: "): try: data = json.loads(line[6:]) if "answer" in data: answer += data["answer"] except: continue return answer or "No answer returned" # async def send_chat_message(LLM_URL, LLM_API, category, file_id): # payload = { # "inputs": {}, # "query": category, # "conversation_id": "", # "user": USER_ID, # "files": [ # { # "type": "image", # "transfer_method": "remote_url", # "upload_file_id": file_id # } # ] # } # try: # async with aiohttp.ClientSession() as session: # async with session.post( # f"{LLM_URL}/chat-messages", # headers={"Authorization": f"Bearer {LLM_API}"}, # json=payload # ) as response: # if response.status != 200: # error_text = await response.text() # return f"Error: Server returned status {response.status} - {error_text}" # try: # data = await response.json() # return data.get("thought", "Error: No thought in response") # except Exception as e: # return f"Error: Failed to parse JSON - {e}" # except Exception as e: # return f"Error: Unexpected exception - {e}" async def upload_file(LLM_URL, LLM_API, file_path, user_id): if not os.path.exists(file_path): return f"Error: File {file_path} not found" # ✅ 先定義 filename 和 mime_type mime_type, _ = mimetypes.guess_type(file_path) filename = os.path.basename(file_path) # ✅ print 放在這裡 print("Uploading file:", filename, "mime_type:", mime_type) with open(file_path, 'rb') as f: async with aiohttp.ClientSession() as session: form_data = aiohttp.FormData() form_data.add_field('file', f, filename=filename, content_type=mime_type) form_data.add_field('user', user_id) async with session.post( f"{LLM_URL}/files/upload", headers={"Authorization": f"Bearer {LLM_API}"}, data=form_data ) as response: if response.status == 404: return "Error: Endpoint not found (404)" response_text = await response.text() print("Upload response:", response_text) try: return json.loads(response_text) except json.JSONDecodeError: return "Error: Invalid JSON response" async def handle_input(file_path, category): # 取得檔名 filename = os.path.basename(file_path) # 直接用 HuggingFace repo URL file_url = f"https://huggingface.co/spaces/DeepLearning101/Multimodal-Playground/blob/main/DEMO/{filename}?raw=true" return await send_chat_message(LLM_URL, LLM_API, category, file_url) # async def handle_input(file_path, category): # # 如果 tmp 路徑不存在,改成 repo 內的 DEMO 路徑 # if not os.path.exists(file_path): # file_path = os.path.join("DEMO", os.path.basename(file_path)) # upload_response = await upload_file(LLM_URL, LLM_API, file_path, USER_ID) # if isinstance(upload_response, str) and upload_response.startswith("Error"): # return upload_response # file_id = upload_response.get("id") # if not file_id: # return "Error: No file ID returned from upload" # return await send_chat_message(LLM_URL, LLM_API, category, file_id) # UI 元件 & 資料 examples = [ ['DEMO/Medical1.jpg', '診斷證明書'], ['DEMO/Medical2.jpg', '診斷證明書'], ['DEMO/passport.png', '護照'], ['DEMO/residence.png', '居留證'], ['DEMO/boarding-pass.png', '機票'], ['DEMO/taxi.jpg', '計程車乘車證明'], ['DEMO/etag.jpg', '通行明細 (etag)'], ["DEMO/qrcode.jpg", 'QRCODE發票'], ['DEMO/mthsr.JPG', '超商高鐵車票'], ['DEMO/thsr.jpg', '高鐵車票'], ['DEMO/mtra.jpg', '超商台鐵車票'], ['DEMO/tra.JPG', '台鐵車票'], ['DEMO/ID-back.png', '身份證背面'], ['DEMO/ID.png', '身份證正面'], ['DEMO/health.png', '健保卡'], ] TITLE = """

Multimodal Playground 💬 輸入各種單據並選擇種類,解析得到各種關鍵資訊

""" SUBTITLE = """

deep-learning-101.github.io | AI | TonTon Huang Ph.D. | 手把手帶你一起踩AI坑

""" LINKS = """ Deep Learning 101 Github | Deep Learning 101 | 台灣人工智慧社團 FB | YouTube
AI 陪伴機器人:2025 趨勢分析技術突破、市場潛力與未來展望 | 金融科技新浪潮:生成式 AI (GenAI) 應用場景、效益與導入挑戰
避開 AI Agent 開發陷阱:常見問題、挑戰與解決方案 (實戰經驗)探討多種 AI 代理人工具的應用經驗與挑戰,分享實用經驗與工具推薦。
白話文手把手帶你科普 GenAI淺顯介紹生成式人工智慧核心概念,強調硬體資源和數據的重要性。
大型語言模型直接就打完收工?回顧 LLM 領域探索歷程,討論硬體升級對 AI 開發的重要性。
檢索增強生成(RAG)不是萬靈丹之優化挑戰技巧探討 RAG 技術應用與挑戰,提供實用經驗分享和工具建議。
大型語言模型 (LLM) 入門完整指南:原理、應用與未來探討多種 LLM 工具的應用與挑戰,強調硬體資源的重要性。
解析探索大型語言模型:模型發展歷史、訓練及微調技術的 VRAM 估算探討 LLM 的發展與應用,強調硬體資源在開發中的關鍵作用。
Diffusion Model 完全解析:從原理、應用到實作 (AI 圖像生成)深入探討影像生成與分割技術的應用,強調硬體資源的重要性。
ASR/TTS 開發避坑指南:語音辨識與合成的常見挑戰與對策探討 ASR 和 TTS 技術應用中的問題,強調數據質量的重要性。
那些 NLP 踩的坑分享 NLP 領域的實踐經驗,強調數據質量對模型效果的影響。
那些語音處理踩的坑分享語音處理領域的實務經驗,強調資料品質對模型效果的影響。
手把手學深度學習安裝環境詳細介紹在 Ubuntu 上安裝深度學習環境的步驟,分享實際操作經驗。
用PPOCRLabel來幫PaddleOCR做OCR的微調和標註
基於機器閱讀理解和指令微調的統一信息抽取框架之診斷書醫囑資訊擷取分析
""" # Gradio Blocks 寫法(全新修正) with gr.Blocks() as iface: gr.HTML(TITLE) gr.HTML(SUBTITLE) gr.HTML(LINKS) with gr.Row(): file_input = gr.Image(label='圖片上傳', type='filepath') category = gr.Radio(label="Message Category", choices=[ "機票", "計程車乘車證明", "通行明細 (etag)", "QRCODE發票", "超商高鐵車票", "高鐵車票", "超商台鐵車票", "台鐵車票", "旅行業代收轉付收據", "電子發票證明", "收據", "診斷證明書", "身份證正面", "身份證反面", "健保卡", "護照", "居留證", "行照", "勞保個人加保" ]) submit_btn = gr.Button("解析") output_text = gr.Textbox(label="解析結果", lines=10) submit_btn.click(fn=handle_input, inputs=[file_input, category], outputs=output_text) gr.Examples( examples=examples, inputs=[file_input, category], label="範例圖片與類型" ) iface.launch()