import os, io, base64, json, tempfile, pathlib import pandas as pd import gradio as gr from openai import OpenAI # ---------- ❶ 基本設定 ---------- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") if not OPENAI_API_KEY: raise RuntimeError("Please set the OPENAI_API_KEY environment variable.") client = OpenAI(api_key=OPENAI_API_KEY) MODEL = "o3" MAX_TOKENS = 1024 # ---------- ❷ JSON Schema ---------- product_schema = { "name": "RetailPriceTagSchema", "description": "Products extracted from retail price-tag photos.", "strict": True, "schema": { "type": "object", "properties": { "products": { "type": "array", "items": { "type": "object", "properties": { "name": { "type": "string", "description": "完整商品名稱;若無或無法辨識請輸出『無法辨識』" }, "list_price": { "type": "string", "description": "原價 (非促銷);若有標促銷價但沒標原價或無法辨識請輸出『無法辨識』" }, "promo_price": { "type": "string", "description": "促銷/特價;若無或無法辨識請輸出『無法辨識』" }, "weight": { "type": "string", "description": "總重量 (g / kg);若無或無法辨識請輸出『無法辨識』" }, "volume": { "type": "string", "description": "總量(件數/入數/顆,或類似數量單位);若無或無法辨識請輸出『無法辨識』" }, "barcode": { "type": "string", "description": "條碼號 EAN/UPC;;若無或無法辨識請輸出『無法辨識』" }, "item_code": { "type": "string", "description": "通路自用貨號(多為英數混排,類似F0500)/PLU;若無或無法辨識請輸出『無法辨識』" } }, "required": [ "name", "list_price", "promo_price", "weight", "volume", "barcode", "item_code" ], "additionalProperties": False } } }, "required": ["products"], "additionalProperties": False } } system_prompt = ( """你是一個零售標價解析助手,請嚴格根據圖片分析商品標示資訊,商品名稱顯示標示上的原文(若為中文則務必顯示中文):  規則: # 判斷規則 (務必遵守) 1. 價牌上如果以『促銷價』『special price』『sale』等字樣作為抬頭,或者整張牌為紅/黃底高亮,則此價格一律填入 'promo_price','list_price' 改寫『無法辨識』。 2. 出現『×2』『x3』『3包』等倍數或件數,寫入 'volume';重量只留單包重量 (例如「50g ±3」→ weight=50g,volume=3包)。 3. 僅當價牌明示「原價/建議售價/定價/刪除線價格」才填 list_price,否則填『無法辨識』… 4. 若 weight 含 (10顆),把括號內容移到 volume,weight 只留 g/kg… """ ) # ---------- ❸ 小工具 ---------- def encode_image_to_data_url(img_path: str) -> str: mime = "image/" + pathlib.Path(img_path).suffix.lstrip(".").lower() with open(img_path, "rb") as f: b64 = base64.b64encode(f.read()).decode() return f"data:{mime};base64,{b64}" def call_gpt_model(model_name, image_path): messages = [ {"role": "system", "content": system_prompt}, { "role": "user", "content": [ {"type": "image_url", "image_url": {"url": encode_image_to_data_url(image_path)}} ] } ] params = { "model": model_name, "messages": messages, "response_format": { "type": "json_schema", "json_schema": product_schema } } if model_name == "gpt-4o": params["temperature"] = 0.0 resp = client.chat.completions.create(**params) return json.loads(resp.choices[0].message.content) def process(images, model_name): all_items = [] for img in images: payload = call_gpt_model(model_name, img.name) items = payload.get("products", []) all_items.extend(items) json_str = json.dumps(all_items, ensure_ascii=False, indent=2) df = pd.DataFrame(all_items) bio = io.BytesIO() df.to_excel(bio, index=False, engine="openpyxl") bio.seek(0) tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") tmp.write(bio.read()) tmp.flush() return json_str, tmp.name # ---------- ❹ Gradio 介面 ---------- with gr.Blocks(title="Price-Tag Parser") as demo: gr.Markdown("## 🏷️ 零售標價解析\n上傳一張或多張標價照片 → 取得 JSON 與 Excel") inp = gr.Files(label="上傳圖片 (可多選)", file_types=["image"]) model_selector = gr.Radio( choices=["gpt-4o", "o3"], value="gpt-4o", label="選擇使用的模型" ) btn = gr.Button("開始解析 🪄") out_json = gr.JSON(label="辨識結果 (JSON)") out_file = gr.File(label="下載 Excel", file_types=[".xlsx"]) btn.click(process, inputs=[inp, model_selector], outputs=[out_json, out_file]) if __name__ == "__main__": demo.launch()