File size: 10,994 Bytes
60549f8
 
 
 
0233ec6
 
0f11297
 
a334638
 
0f11297
 
 
 
 
 
 
 
 
fc7298c
a334638
0e2dc36
60549f8
a334638
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8fe1c58
a334638
 
 
 
 
 
 
 
 
60549f8
 
 
 
 
0231211
60549f8
 
 
a334638
 
60549f8
 
 
10cc800
a334638
0231211
a334638
8fe1c58
 
 
 
 
 
 
 
 
 
 
 
 
a334638
5a9d2f3
8fe1c58
 
 
 
 
 
 
 
a334638
 
8fe1c58
 
 
a334638
4644c0b
a334638
60549f8
a334638
0233ec6
a334638
 
3f7cb6a
a334638
 
ebeb106
a334638
 
 
 
 
 
 
 
 
 
60549f8
0e2dc36
60549f8
0fa1ae0
 
f6dc388
 
 
 
 
 
 
 
 
 
 
 
 
60549f8
 
ebdd83b
7adc92e
cc6eaea
8cc52e7
a0bd2f4
c7c6990
 
 
 
 
 
 
 
 
 
 
4a6ec9d
 
cc6eaea
0e2dc36
60549f8
 
cd4f02a
0e2dc36
 
a334638
 
0e2dc36
a334638
 
0e2dc36
 
a334638
0e2dc36
 
 
 
 
60549f8
0e2dc36
a334638
60549f8
 
0e2dc36
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
import gradio as gr
import requests
import mimetypes
import json, os
import asyncio
import aiohttp
import subprocess

# --- 1. 環境設定 ---
# pip 升級 (通常 Space 啟動時跑一次即可)
def upgrade_pip():
    try:
        subprocess.check_call([os.sys.executable, "-m", "pip", "install", "--upgrade", "pip"])
        print("pip 升級成功")
    except subprocess.CalledProcessError:
        print("pip 升級失敗")

upgrade_pip()

LLM_API = os.environ.get("LLM_API", "").strip()
LLM_URL = os.environ.get("LLM_URL", "").strip() # 確保去除空格
USER_ID = "HuggingFace Space"

# --- 2. 上傳檔案函式 (修正版) ---
async def upload_file(LLM_URL, LLM_API, file_path, user_id):
    """
    將本地暫存檔案上傳到 LLM Server,取得 file_id
    """
    if not os.path.exists(file_path):
        return {"error": f"File {file_path} not found"}

    mime_type, _ = mimetypes.guess_type(file_path)
    if mime_type is None:
        mime_type = 'application/octet-stream'
        
    filename = os.path.basename(file_path)
    print(f"正在上傳檔案: {filename} ({mime_type})")

    try:
        data = aiohttp.FormData()
        # 注意: 這裡必須再次 open file,aiohttp 會自動處理串流
        data.add_field('file', open(file_path, 'rb'), filename=filename, content_type=mime_type)
        data.add_field('user', user_id)

        async with aiohttp.ClientSession() as session:
            async with session.post(
                f"{LLM_URL}/files/upload",
                headers={"Authorization": f"Bearer {LLM_API}"},
                data=data
            ) as response:
                response_text = await response.text()
                print(f"上傳回應狀態: {response.status}")
                
                if response.status != 200 and response.status != 201:
                    print(f"上傳失敗回應: {response_text}")
                    return {"error": f"Upload failed: {response.status} - {response_text}"}
                
                return json.loads(response_text)

    except Exception as e:
        print(f"上傳過程發生例外: {e}")
        return {"error": str(e)}

# --- 3. 對話請求函式 (改用 file_id) ---
async def send_chat_message(LLM_URL, LLM_API, category, file_id):
    """
    使用 file_id 發送對話請求
    """
    payload = {
        "inputs": {},
        "query": category,
        "conversation_id": "",
        "user": USER_ID,
        "response_mode": "streaming",
        "files": [
            {
                "type": "image",
                "transfer_method": "local_file", # 注意:使用 ID 時這裡通常是 local_file
                "upload_file_id": file_id
            }
        ]
    }

    print(f"發送請求中... (File ID: {file_id})")
    answer = ""
    
    try:
        async with aiohttp.ClientSession() as session:
            async with session.post(
                f"{LLM_URL}/chat-messages",
                headers={
                    "Authorization": f"Bearer {LLM_API}",
                    "Content-Type": "application/json"
                },
                json=payload
            ) as response:
                
                if response.status != 200:
                    error_text = await response.text()
                    return f"Chat Error {response.status}: {error_text}"

                async for line_bytes in response.content:
                    line = line_bytes.decode("utf-8").strip()
                    if line.startswith("data: "):
                        try:
                            data = json.loads(line[6:])
                            if "answer" in data:
                                answer += data["answer"]
                            if "error" in data:
                                return f"Stream Error: {data}"
                        except:
                            continue
                            
    except Exception as e:
        return f"Request Exception: {str(e)}"

    return answer or "No answer returned."

# --- 4. 主處理邏輯 ---
async def handle_input(file_path, category):
    if not file_path:
        return "請先上傳圖片"

    # 步驟 1: 上傳檔案
    upload_result = await upload_file(LLM_URL, LLM_API, file_path, USER_ID)
    
    # 檢查上傳是否成功
    if "error" in upload_result:
        return f"上傳錯誤: {upload_result['error']}"
    
    file_id = upload_result.get("id")
    if not file_id:
        return f"錯誤: 上傳成功但未回傳 ID。回應: {upload_result}"

    # 步驟 2: 發送對話
    return await send_chat_message(LLM_URL, LLM_API, category, file_id)

# UI 元件 & 資料
examples = [
    ['DEMO/Medical1.jpg', '診斷證明書'],
    ['DEMO/Medical2.jpg', '診斷證明書'],
    ['DEMO/passport.png', '護照'],
    ['DEMO/residence.png', '居留證'],    
    ['DEMO/boarding-pass.png', '機票'],    
    ['DEMO/taxi.jpg', '計程車乘車證明'],    
    ['DEMO/etag.jpg', '通行明細 (etag)'],
    ["DEMO/qrcode.jpg", 'QRCODE發票'],
    ['DEMO/mthsr.JPG', '超商高鐵車票'],
    ['DEMO/thsr.jpg', '高鐵車票'],
    ['DEMO/mtra.jpg', '超商台鐵車票'],
    ['DEMO/tra.JPG', '台鐵車票'],
    ['DEMO/ID-back.png', '身份證背面'],
    ['DEMO/ID.png', '身份證正面'],
    ['DEMO/health.png', '健保卡'],
]

TITLE = """<h1>Multimodal Playground 💬 輸入各種單據並選擇種類,解析得到各種關鍵資訊 </h1>"""
SUBTITLE = """<h2><a href='https://deep-learning-101.github.io' target='_blank'>deep-learning-101.github.io</a> | <a href='https://www.twman.org/AI' target='_blank'> AI </a> | <a href='https://www.twman.org' target='_blank'>TonTon Huang Ph.D.</a> | <a href='https://blog.twman.org/p/deeplearning101.html' target='_blank'>手把手帶你一起踩AI坑</a><br></h2>"""
LINKS = """
<a href='https://github.com/Deep-Learning-101' target='_blank'>Deep Learning 101 Github</a> | <a href='http://deeplearning101.twman.org' target='_blank'>Deep Learning 101</a> | <a href='https://www.facebook.com/groups/525579498272187/' target='_blank'>台灣人工智慧社團 FB</a> | <a href='https://www.youtube.com/c/DeepLearning101' target='_blank'>YouTube</a><br>
<a href='https://blog.twman.org/2025/04/AI-Robot.html' target='_blank'>AI 陪伴機器人:2025 趨勢分析技術突破、市場潛力與未來展望</a> | <a href='https://blog.twman.org/2025/04/FinanceGenAI.html' target='_blank'>金融科技新浪潮:生成式 AI (GenAI) 應用場景、效益與導入挑戰</a><br>
<a href='https://blog.twman.org/2025/03/AIAgent.html' target='_blank'>避開 AI Agent 開發陷阱:常見問題、挑戰與解決方案 (實戰經驗)</a>:<a href="https://deep-learning-101.github.io/agent" target="_blank">探討多種 AI 代理人工具的應用經驗與挑戰,分享實用經驗與工具推薦。</a><br>
<a href="https://blog.twman.org/2024/08/LLM.html" target="_blank">白話文手把手帶你科普 GenAI</a></b>:<a href="https://deep-learning-101.github.io/GenAI" target="_blank">淺顯介紹生成式人工智慧核心概念,強調硬體資源和數據的重要性。</a><br>
<a href="https://blog.twman.org/2024/09/LLM.html" target="_blank">大型語言模型直接就打完收工?</a></b>:<a href="https://deep-learning-101.github.io/1010LLM" target="_blank">回顧 LLM 領域探索歷程,討論硬體升級對 AI 開發的重要性。</a><br>
<a href="https://blog.twman.org/2024/07/RAG.html" target="_blank">檢索增強生成(RAG)不是萬靈丹之優化挑戰技巧</a></b>:<a href="https://deep-learning-101.github.io/RAG" target="_blank">探討 RAG 技術應用與挑戰,提供實用經驗分享和工具建議。</a><br>
<a href="https://blog.twman.org/2024/02/LLM.html" target="_blank">大型語言模型 (LLM) 入門完整指南:原理、應用與未來</a></b>:<a href="https://deep-learning-101.github.io/0204LLM" target="_blank">探討多種 LLM 工具的應用與挑戰,強調硬體資源的重要性。</a><br>
<a href="https://blog.twman.org/2023/04/GPT.html" target="_blank">解析探索大型語言模型:模型發展歷史、訓練及微調技術的 VRAM 估算</a></b>:<a href="https://deep-learning-101.github.io/GPU" target="_blank">探討 LLM 的發展與應用,強調硬體資源在開發中的關鍵作用。</a><br>
<a href="https://blog.twman.org/2024/11/diffusion.html" target="_blank">Diffusion Model 完全解析:從原理、應用到實作 (AI 圖像生成)</a></b>;<a href="https://deep-learning-101.github.io/diffusion" target="_blank">深入探討影像生成與分割技術的應用,強調硬體資源的重要性。</a><br>
<a href="https://blog.twman.org/2024/02/asr-tts.html" target="_blank">ASR/TTS 開發避坑指南:語音辨識與合成的常見挑戰與對策</a></b>:<a href="https://deep-learning-101.github.io/asr-tts" target="_blank">探討 ASR 和 TTS 技術應用中的問題,強調數據質量的重要性。</a><br>
<a href="https://blog.twman.org/2021/04/NLP.html" target="_blank">那些 NLP 踩的坑</a></b>:<a href="https://deep-learning-101.github.io/nlp" target="_blank">分享 NLP 領域的實踐經驗,強調數據質量對模型效果的影響。</a><br>
<a href="https://blog.twman.org/2021/04/ASR.html" target="_blank">那些語音處理踩的坑</a></b>:<a href="https://deep-learning-101.github.io/speech" target="_blank">分享語音處理領域的實務經驗,強調資料品質對模型效果的影響。</a><br>
<a href="https://blog.twman.org/2020/05/DeepLearning.html" target="_blank">手把手學深度學習安裝環境</a></b>:<a href="https://deep-learning-101.github.io/101" target="_blank">詳細介紹在 Ubuntu 上安裝深度學習環境的步驟,分享實際操作經驗。</a><br>
<a href='https://blog.twman.org/2023/07/wsl.html' target='_blank'>用PPOCRLabel來幫PaddleOCR做OCR的微調和標註</a><br>
<a href='https://blog.twman.org/2023/07/HugIE.html' target='_blank'>基於機器閱讀理解和指令微調的統一信息抽取框架之診斷書醫囑資訊擷取分析</a><br>
"""

with gr.Blocks() as iface:
    gr.HTML(TITLE)
    gr.HTML(LINKS) # 需要時取消註解

    with gr.Row():
        file_input = gr.Image(label='圖片上傳', type='filepath') # type='filepath' 很重要
        category = gr.Radio(label="文件類型", choices=[
            "機票", "計程車乘車證明", "通行明細 (etag)", "QRCODE發票",
            "超商高鐵車票", "高鐵車票", "超商台鐵車票", "台鐵車票", 
            "診斷證明書", "身份證正面", "身份證反面", "健保卡", "護照", "居留證"
        ])
    
    submit_btn = gr.Button("解析", variant="primary")
    output_text = gr.Textbox(label="解析結果", lines=10)

    submit_btn.click(fn=handle_input, inputs=[file_input, category], outputs=output_text)

    gr.Examples(
        examples=examples,
        inputs=[file_input, category],
        label="點擊範例直接測試"
    )

iface.launch()