import os, io, base64, json, tempfile, pathlib
import pandas as pd
import gradio as gr
from openai import OpenAI

# ---------- ❶ 基本設定 ----------
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
if not OPENAI_API_KEY:
    raise RuntimeError("Please set the OPENAI_API_KEY environment variable.")
client = OpenAI(api_key=OPENAI_API_KEY)

MODEL = "o3"
MAX_TOKENS = 1024

# ---------- ❷ JSON Schema ----------
product_schema = {
    "name": "RetailPriceTagSchema",
    "description": "Products extracted from retail price-tag photos.",
    "strict": True,
    "schema": {
        "type": "object",
        "properties": {
            "products": {
                "type": "array",
                "items": {
                    "type": "object",
                    "properties": {
                        "name": {
                            "type": "string",
                            "description": "完整商品名稱；若無或無法辨識請輸出『無法辨識』"
                        },
                        "list_price": {
                            "type": "string",
                            "description": "原價 (非促銷)；若有標促銷價但沒標原價或無法辨識請輸出『無法辨識』"
                        },
                        "promo_price": {
                            "type": "string",
                            "description": "促銷／特價；若無或無法辨識請輸出『無法辨識』"
                        },
                        "weight": {
                            "type": "string",
                            "description": "總重量 (g / kg)；若無或無法辨識請輸出『無法辨識』"
                        },
                        "volume": {
                            "type": "string",
                            "description": "總量（件數／入數／顆，或類似數量單位）；若無或無法辨識請輸出『無法辨識』"
                        },
                        "barcode": {
                            "type": "string",
                            "description": "條碼號 EAN／UPC；；若無或無法辨識請輸出『無法辨識』"
                        },
                        "item_code": {
                            "type": "string",
                            "description": "通路自用貨號(多為英數混排，類似F0500）／PLU；若無或無法辨識請輸出『無法辨識』"
                        }
                    },
                    "required": [
                        "name", "list_price", "promo_price",
                        "weight", "volume", "barcode", "item_code"
                    ],
                    "additionalProperties": False
                }
            }
        },
        "required": ["products"],
        "additionalProperties": False
    }
}

system_prompt = (
    """你是一個零售標價解析助手，請嚴格根據圖片分析商品標示資訊，商品名稱顯示標示上的原文(若為中文則務必顯示中文)：　

規則：
# 判斷規則 (務必遵守)
1. 價牌上如果以『促銷價』『special price』『sale』等字樣作為抬頭，或者整張牌為紅/黃底高亮，則此價格一律填入 'promo_price'，'list_price' 改寫『無法辨識』。
2. 出現『×2』『x3』『3包』等倍數或件數，寫入 'volume'；重量只留單包重量 (例如「50g ±3」→ weight=50g，volume=3包)。
3. 僅當價牌明示「原價／建議售價／定價/刪除線價格」才填 list_price，否則填『無法辨識』…
4. 若 weight 含 (10顆)，把括號內容移到 volume，weight 只留 g/kg…
"""
)

# ---------- ❸ 小工具 ----------
def encode_image_to_data_url(img_path: str) -> str:
    mime = "image/" + pathlib.Path(img_path).suffix.lstrip(".").lower()
    with open(img_path, "rb") as f:
        b64 = base64.b64encode(f.read()).decode()
    return f"data:{mime};base64,{b64}"

def call_gpt_model(model_name, image_path):
    messages = [
        {"role": "system", "content": system_prompt},
        {
            "role": "user",
            "content": [
                {"type": "image_url", "image_url": {"url": encode_image_to_data_url(image_path)}}
            ]
        }
    ]

    params = {
        "model": model_name,
        "messages": messages,
        "response_format": {
            "type": "json_schema",
            "json_schema": product_schema
        }
    }

    if model_name == "gpt-4o":
        params["temperature"] = 0.0

    resp = client.chat.completions.create(**params)
    return json.loads(resp.choices[0].message.content)

def process(images, model_name):
    all_items = []
    for img in images:
        payload = call_gpt_model(model_name, img.name)
        items = payload.get("products", [])
        all_items.extend(items)

    json_str = json.dumps(all_items, ensure_ascii=False, indent=2)

    df = pd.DataFrame(all_items)
    bio = io.BytesIO()
    df.to_excel(bio, index=False, engine="openpyxl")
    bio.seek(0)
    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
    tmp.write(bio.read())
    tmp.flush()

    return json_str, tmp.name

# ---------- ❹ Gradio 介面 ----------
with gr.Blocks(title="Price-Tag Parser") as demo:
    gr.Markdown("## 🏷️ 零售標價解析\n上傳一張或多張標價照片 → 取得 JSON 與 Excel")
    inp = gr.Files(label="上傳圖片 (可多選)", file_types=["image"])
    model_selector = gr.Radio(
        choices=["gpt-4o", "o3"],
        value="gpt-4o",
        label="選擇使用的模型"
    )
    
    btn = gr.Button("開始解析 🪄")
    out_json = gr.JSON(label="辨識結果 (JSON)")
    out_file = gr.File(label="下載 Excel", file_types=[".xlsx"])

    btn.click(process, inputs=[inp, model_selector], outputs=[out_json, out_file])

if __name__ == "__main__":
    demo.launch()