import gradio as gr import json import base64 from io import BytesIO import requests import os # HF token & model HF_TOKEN = os.environ.get("HF_CV_ROBOT_TOKEN") MODEL = "Qwen/Qwen2.5-VL-7B-Instruct" # HF 支援列表裡的模型 if not HF_TOKEN: print("ERROR: HF_CV_ROBOT_TOKEN environment variable not set.") # ------------------------------- # 主處理函數 (Main Processing Function) # ------------------------------- def process(payload: dict): try: if not HF_TOKEN: return {"error": "Hugging Face token is missing. Please check Space secrets."} robot_id = payload.get("robot_id", "unknown") image_b64 = payload["image_b64"] # Base64 -> bytes -> 保存為 tmp.jpg tmp_path = "tmp.jpg" with open(tmp_path, "wb") as f: f.write(base64.b64decode(image_b64)) # 上傳 image file 到 HF Router files = {"file": open(tmp_path, "rb")} upload_resp = requests.post( "https://huggingface.co/api/uploads", headers={"Authorization": f"Bearer {HF_TOKEN}"}, files=files ) files["file"].close() os.remove(tmp_path) if upload_resp.status_code != 200: return {"error": f"HF upload failed: {upload_resp.status_code}, {upload_resp.text}"} file_info = upload_resp.json() file_url = file_info.get("href") # 取得 HF hosted file URL # JSON payload 放文字訊息 + image file reference data = { "model": MODEL, "messages": [ { "role": "user", "content": [ {"type": "text", "text": f"![]({file_url}) Describe this image in detail."} ] } ] } resp = requests.post( "https://router.huggingface.co/v1/chat/completions", headers={"Authorization": f"Bearer {HF_TOKEN}"}, data={"payload": json.dumps(data)}, timeout=60 ) if resp.status_code != 200: return {"error": f"VLM API error: {resp.status_code}, {resp.text}"} try: vlm_text = resp.json()["choices"][0]["message"]["content"][0]["text"] except (KeyError, IndexError, json.JSONDecodeError) as e: return {"error": f"Failed to parse VLM response: {e}, Response text: {resp.text}"} return { "received": True, "robot_id": robot_id, "vllm_analysis": vlm_text } except Exception as e: return {"error": str(e)} # ------------------------------- # Gradio MCP Server # ------------------------------- demo = gr.Interface( fn=process, inputs=gr.JSON(label="Input Payload (Dict format)"), outputs=gr.JSON(label="Reply to Jetson"), api_name="predict" ) if __name__ == "__main__": demo.launch(mcp_server=True)