import gradio as gr
import base64
import json
import requests
import os

HF_ROUTER_API = "https://router.huggingface.co/hf-inference"
HF_TOKEN = os.getenv("HF_CV_ROBOT_TOKEN")
MODEL_NAME = "Qwen/Qwen3-VL-32B-Instruct"

def call_vlm_api(payload: dict):
    """
    Call Hugging Face Router Inference API with Base64 image.
    """
    headers = {"Authorization": f"Bearer {HF_TOKEN}"}
    
    data = {
        "model": MODEL_NAME,
        "inputs": [
            {
                "image": {"b64": payload["image_b64"]},
                "text": "Describe the image in detail."
            }
        ]
    }
    
    try:
        resp = requests.post(HF_ROUTER_API, headers=headers, json=data, timeout=60)
        if resp.status_code == 200:
            # 取第一個 generated_text
            return resp.json()[0].get("generated_text", "")
        else:
            return f"VLM API error: {resp.status_code}, {resp.text}"
    except Exception as e:
        return f"Exception: {str(e)}"

def process(payload: dict):
    """
    Process JSON payload from Jetson: Base64 image + robot_id
    Return JSON with VLM analysis
    """
    try:
        vlm_text = call_vlm_api(payload)
        reply = {
            "received": True,
            "robot_id": payload.get("robot_id", "unknown"),
            "vllm_analysis": vlm_text
        }
        return reply
    except Exception as e:
        return {"error": str(e)}

# Gradio MCP server
demo = gr.Interface(
    fn=process,
    inputs=gr.JSON(label="Input Payload from Jetson"),
    outputs=gr.JSON(label="Reply to Jetson"),
    api_name="predict"
)

if __name__ == "__main__":
    demo.launch(mcp_server=True)