File size: 11,825 Bytes
60549f8 0233ec6 0f11297 0e2dc36 0f11297 fc7298c 60549f8 0e2dc36 60549f8 3f7cb6a 8fe1c58 60549f8 0231211 60549f8 3f7cb6a 60549f8 10cc800 0231211 8fe1c58 5a9d2f3 8fe1c58 10cc800 8fe1c58 3f7cb6a 8fe1c58 60549f8 0233ec6 60549f8 4644c0b 60549f8 4644c0b c94de42 4644c0b 60549f8 0233ec6 f3c77fe 4644c0b f3c77fe 0233ec6 4644c0b 0233ec6 f3c77fe 0233ec6 4644c0b 0233ec6 60549f8 0233ec6 3f7cb6a ebeb106 3f7cb6a 60549f8 0e2dc36 60549f8 0fa1ae0 f6dc388 60549f8 ebdd83b 7adc92e cc6eaea 8cc52e7 a0bd2f4 c7c6990 4a6ec9d cc6eaea 0e2dc36 60549f8 0e2dc36 f8d2efa 0e2dc36 60549f8 0e2dc36 60549f8 0e2dc36 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 |
import gradio as gr
import requests
import mimetypes
import json, os
import asyncio
import aiohttp
import subprocess
# pip 升級
def upgrade_pip():
try:
subprocess.check_call([os.sys.executable, "-m", "pip", "install", "--upgrade", "pip"])
print("pip 升級成功")
except subprocess.CalledProcessError:
print("pip 升級失敗")
upgrade_pip()
LLM_API = os.environ.get("LLM_API", "").strip()
LLM_URL = os.environ.get("LLM_URL")
USER_ID = "HuggingFace Space"
async def send_chat_message(LLM_URL, LLM_API, category, file_url):
print(f"--- 開始請求 ---")
print(f"URL: {LLM_URL}/chat-messages")
print(f"File URL: {file_url}") # 檢查這裡產生的 URL 是否真的公開可讀取
payload = {
"inputs": {},
"query": category,
"conversation_id": "",
"user": USER_ID,
"response_mode": "streaming",
"files": [
{
"type": "image",
"transfer_method": "remote_url",
"url": file_url
}
]
}
answer = ""
try:
async with aiohttp.ClientSession() as session:
async with session.post(
f"{LLM_URL}/chat-messages",
headers={
"Authorization": f"Bearer {LLM_API}",
"Content-Type": "application/json"
},
json=payload
) as response:
# [DEBUG 1] 檢查狀態碼
print(f"HTTP Status: {response.status}")
if response.status != 200:
# 如果出錯,讀取錯誤訊息並回傳
error_text = await response.text()
print(f"API Error Response: {error_text}")
return f"API Error {response.status}: {error_text}"
# [DEBUG 2] 逐行監看回傳內容
async for line_bytes in response.content:
line = line_bytes.decode("utf-8").strip()
# 印出原始資料 (Debug 用,確認有東西回來)
if line:
print(f"Raw Line: {line}")
if line.startswith("data: "):
try:
data = json.loads(line[6:])
if "answer" in data:
answer += data["answer"]
# print(f"Current Answer chunk: {data['answer']}") # 選用:即時看片段
if "error" in data:
print(f"Data Error: {data}") # Dify 有時會在 data 裡回傳 error
except Exception as e:
print(f"JSON Parse Error: {e} | Content: {line}")
continue
except Exception as e:
print(f"Request Exception: {e}")
return f"System Error: {str(e)}"
if not answer:
print("警告: 請求結束但 answer 為空")
return answer or "No answer returned (Empty Response)"
async def upload_file(LLM_URL, LLM_API, file_path, user_id):
if not os.path.exists(file_path):
return f"Error: File {file_path} not found"
# ✅ 先定義 filename 和 mime_type
mime_type, _ = mimetypes.guess_type(file_path)
filename = os.path.basename(file_path)
# ✅ print 放在這裡
print("Uploading file:", filename, "mime_type:", mime_type)
with open(file_path, 'rb') as f:
async with aiohttp.ClientSession() as session:
form_data = aiohttp.FormData()
form_data.add_field('file', f, filename=filename, content_type=mime_type)
form_data.add_field('user', user_id)
async with session.post(
f"{LLM_URL}/files/upload",
headers={"Authorization": f"Bearer {LLM_API}"},
data=form_data
) as response:
if response.status == 404:
return "Error: Endpoint not found (404)"
response_text = await response.text()
print("Upload response:", response_text)
try:
return json.loads(response_text)
except json.JSONDecodeError:
return "Error: Invalid JSON response"
async def handle_input(file_path, category):
# 取得檔名
filename = os.path.basename(file_path)
# 直接用 HuggingFace repo URL
file_url = f"https://huggingface.co/spaces/DeepLearning101/Multimodal-Playground/blob/main/DEMO/{filename}?raw=true"
return await send_chat_message(LLM_URL, LLM_API, category, file_url)
# async def handle_input(file_path, category):
# # 如果 tmp 路徑不存在,改成 repo 內的 DEMO 路徑
# if not os.path.exists(file_path):
# file_path = os.path.join("DEMO", os.path.basename(file_path))
# upload_response = await upload_file(LLM_URL, LLM_API, file_path, USER_ID)
# if isinstance(upload_response, str) and upload_response.startswith("Error"):
# return upload_response
# file_id = upload_response.get("id")
# if not file_id:
# return "Error: No file ID returned from upload"
# return await send_chat_message(LLM_URL, LLM_API, category, file_id)
# UI 元件 & 資料
examples = [
['DEMO/Medical1.jpg', '診斷證明書'],
['DEMO/Medical2.jpg', '診斷證明書'],
['DEMO/passport.png', '護照'],
['DEMO/residence.png', '居留證'],
['DEMO/boarding-pass.png', '機票'],
['DEMO/taxi.jpg', '計程車乘車證明'],
['DEMO/etag.jpg', '通行明細 (etag)'],
["DEMO/qrcode.jpg", 'QRCODE發票'],
['DEMO/mthsr.JPG', '超商高鐵車票'],
['DEMO/thsr.jpg', '高鐵車票'],
['DEMO/mtra.jpg', '超商台鐵車票'],
['DEMO/tra.JPG', '台鐵車票'],
['DEMO/ID-back.png', '身份證背面'],
['DEMO/ID.png', '身份證正面'],
['DEMO/health.png', '健保卡'],
]
TITLE = """<h1>Multimodal Playground 💬 輸入各種單據並選擇種類,解析得到各種關鍵資訊 </h1>"""
SUBTITLE = """<h2><a href='https://deep-learning-101.github.io' target='_blank'>deep-learning-101.github.io</a> | <a href='https://www.twman.org/AI' target='_blank'> AI </a> | <a href='https://www.twman.org' target='_blank'>TonTon Huang Ph.D.</a> | <a href='https://blog.twman.org/p/deeplearning101.html' target='_blank'>手把手帶你一起踩AI坑</a><br></h2>"""
LINKS = """
<a href='https://github.com/Deep-Learning-101' target='_blank'>Deep Learning 101 Github</a> | <a href='http://deeplearning101.twman.org' target='_blank'>Deep Learning 101</a> | <a href='https://www.facebook.com/groups/525579498272187/' target='_blank'>台灣人工智慧社團 FB</a> | <a href='https://www.youtube.com/c/DeepLearning101' target='_blank'>YouTube</a><br>
<a href='https://blog.twman.org/2025/04/AI-Robot.html' target='_blank'>AI 陪伴機器人:2025 趨勢分析技術突破、市場潛力與未來展望</a> | <a href='https://blog.twman.org/2025/04/FinanceGenAI.html' target='_blank'>金融科技新浪潮:生成式 AI (GenAI) 應用場景、效益與導入挑戰</a><br>
<a href='https://blog.twman.org/2025/03/AIAgent.html' target='_blank'>避開 AI Agent 開發陷阱:常見問題、挑戰與解決方案 (實戰經驗)</a>:<a href="https://deep-learning-101.github.io/agent" target="_blank">探討多種 AI 代理人工具的應用經驗與挑戰,分享實用經驗與工具推薦。</a><br>
<a href="https://blog.twman.org/2024/08/LLM.html" target="_blank">白話文手把手帶你科普 GenAI</a></b>:<a href="https://deep-learning-101.github.io/GenAI" target="_blank">淺顯介紹生成式人工智慧核心概念,強調硬體資源和數據的重要性。</a><br>
<a href="https://blog.twman.org/2024/09/LLM.html" target="_blank">大型語言模型直接就打完收工?</a></b>:<a href="https://deep-learning-101.github.io/1010LLM" target="_blank">回顧 LLM 領域探索歷程,討論硬體升級對 AI 開發的重要性。</a><br>
<a href="https://blog.twman.org/2024/07/RAG.html" target="_blank">檢索增強生成(RAG)不是萬靈丹之優化挑戰技巧</a></b>:<a href="https://deep-learning-101.github.io/RAG" target="_blank">探討 RAG 技術應用與挑戰,提供實用經驗分享和工具建議。</a><br>
<a href="https://blog.twman.org/2024/02/LLM.html" target="_blank">大型語言模型 (LLM) 入門完整指南:原理、應用與未來</a></b>:<a href="https://deep-learning-101.github.io/0204LLM" target="_blank">探討多種 LLM 工具的應用與挑戰,強調硬體資源的重要性。</a><br>
<a href="https://blog.twman.org/2023/04/GPT.html" target="_blank">解析探索大型語言模型:模型發展歷史、訓練及微調技術的 VRAM 估算</a></b>:<a href="https://deep-learning-101.github.io/GPU" target="_blank">探討 LLM 的發展與應用,強調硬體資源在開發中的關鍵作用。</a><br>
<a href="https://blog.twman.org/2024/11/diffusion.html" target="_blank">Diffusion Model 完全解析:從原理、應用到實作 (AI 圖像生成)</a></b>;<a href="https://deep-learning-101.github.io/diffusion" target="_blank">深入探討影像生成與分割技術的應用,強調硬體資源的重要性。</a><br>
<a href="https://blog.twman.org/2024/02/asr-tts.html" target="_blank">ASR/TTS 開發避坑指南:語音辨識與合成的常見挑戰與對策</a></b>:<a href="https://deep-learning-101.github.io/asr-tts" target="_blank">探討 ASR 和 TTS 技術應用中的問題,強調數據質量的重要性。</a><br>
<a href="https://blog.twman.org/2021/04/NLP.html" target="_blank">那些 NLP 踩的坑</a></b>:<a href="https://deep-learning-101.github.io/nlp" target="_blank">分享 NLP 領域的實踐經驗,強調數據質量對模型效果的影響。</a><br>
<a href="https://blog.twman.org/2021/04/ASR.html" target="_blank">那些語音處理踩的坑</a></b>:<a href="https://deep-learning-101.github.io/speech" target="_blank">分享語音處理領域的實務經驗,強調資料品質對模型效果的影響。</a><br>
<a href="https://blog.twman.org/2020/05/DeepLearning.html" target="_blank">手把手學深度學習安裝環境</a></b>:<a href="https://deep-learning-101.github.io/101" target="_blank">詳細介紹在 Ubuntu 上安裝深度學習環境的步驟,分享實際操作經驗。</a><br>
<a href='https://blog.twman.org/2023/07/wsl.html' target='_blank'>用PPOCRLabel來幫PaddleOCR做OCR的微調和標註</a><br>
<a href='https://blog.twman.org/2023/07/HugIE.html' target='_blank'>基於機器閱讀理解和指令微調的統一信息抽取框架之診斷書醫囑資訊擷取分析</a><br>
"""
# Gradio Blocks 寫法(全新修正)
with gr.Blocks() as iface:
gr.HTML(TITLE)
gr.HTML(SUBTITLE)
gr.HTML(LINKS)
with gr.Row():
file_input = gr.Image(label='圖片上傳', type='filepath')
category = gr.Radio(label="Message Category", choices=[
"機票", "計程車乘車證明", "通行明細 (etag)", "QRCODE發票",
"超商高鐵車票", "高鐵車票", "超商台鐵車票", "台鐵車票", "旅行業代收轉付收據", "電子發票證明", "收據", "診斷證明書",
"身份證正面", "身份證反面", "健保卡", "護照", "居留證", "行照", "勞保個人加保"
])
submit_btn = gr.Button("解析")
output_text = gr.Textbox(label="解析結果", lines=10)
submit_btn.click(fn=handle_input, inputs=[file_input, category], outputs=output_text)
gr.Examples(
examples=examples,
inputs=[file_input, category],
label="範例圖片與類型"
)
iface.launch() |