File size: 3,297 Bytes
0ef482f
 
ec3d9e7
c071669
6c10eb2
0ef482f
6c10eb2
c071669
 
5e39a23
444e2a5
0ef482f
c071669
ec3d9e7
c071669
5e39a23
c071669
0ef482f
 
c071669
 
 
0ef482f
 
 
5e39a23
 
 
c071669
5e39a23
 
 
24f75ec
5e39a23
 
 
0ef482f
 
ec3d9e7
 
 
 
c071669
ec3d9e7
 
 
 
 
5e39a23
 
 
0ef482f
 
c071669
 
5e39a23
0ef482f
 
 
5e39a23
 
 
0ef482f
c071669
0ef482f
 
5e39a23
0ef482f
5e39a23
 
 
 
 
 
 
0ef482f
 
 
 
 
5e39a23
eb6d527
ec3d9e7
0ef482f
5e39a23
0ef482f
d081bf3
c071669
24f75ec
c071669
0ef482f
 
 
 
 
 
444e2a5
0ef482f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import gradio as gr
import json
import base64
from io import BytesIO
import requests
import os

# HF token & model
HF_TOKEN = os.environ.get("HF_CV_ROBOT_TOKEN")
MODEL = "Qwen/Qwen2.5-VL-7B-Instruct"  # 確認此模型有支援 VLM (目前有)

if not HF_TOKEN:
    print("ERROR: HF_CV_ROBOT_TOKEN environment variable not set.")

# -------------------------------
# 主處理函數
# -------------------------------
def process(payload: dict):
    try:
        if not HF_TOKEN:
            return {"error": "Hugging Face token is missing. Please check Space secrets."}
        
        robot_id = payload.get("robot_id", "unknown")
        image_b64 = payload["image_b64"]

        # ------------------------------------------------
        # ⭐ 1) Base64 → 圖檔並存成 temp.jpg
        # ------------------------------------------------
        img_bytes = base64.b64decode(image_b64)
        temp_path = "temp.jpg"
        with open(temp_path, "wb") as f:
            f.write(img_bytes)

        # ------------------------------------------------
        # ⭐ 2) JSON 部分(只放文字)
        # ------------------------------------------------
        data = {
            "model": MODEL,
            "messages": [
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": "Describe this image in detail."}
                    ]
                }
            ]
        }

        # ------------------------------------------------
        # ⭐ 3) 用 multipart/form-data 傳送 image + JSON payload
        # ------------------------------------------------
        resp = requests.post(
            "https://router.huggingface.co/v1/chat/completions",
            headers={"Authorization": f"Bearer {HF_TOKEN}"},
            data={"payload": json.dumps(data)},
            files={"file": ("image.jpg", open(temp_path, "rb"), "image/jpeg")},
            timeout=60
        )

        # ------------------------------------------------
        # ⭐ 4) 處理回應
        # ------------------------------------------------
        if resp.status_code != 200:
            print(f"VLM API error: {resp.status_code}, {resp.text}")
            return {"error": f"VLM API error: {resp.status_code}, {resp.text}"}

        # 正常解析內容
        try:
            content = resp.json()["choices"][0]["message"]["content"]
            # content 是 array,找出 text
            vlm_text = ""
            for part in content:
                if part.get("type") == "text":
                    vlm_text += part["text"]
        except Exception as e:
            return {"error": f"Failed to parse VLM response: {e}, Response text: {resp.text}"}

        return {
            "received": True,
            "robot_id": robot_id,
            "vllm_analysis": vlm_text.strip()
        }

    except Exception as e:
        print(f"Unexpected error: {e}")
        return {"error": str(e)}

# -------------------------------
# Gradio MCP Server
# -------------------------------
demo = gr.Interface(
    fn=process,
    inputs=gr.JSON(label="Input Payload (Dict format)"),
    outputs=gr.JSON(label="Reply to Jetson"),
    api_name="predict"
)

if __name__ == "__main__":
    demo.launch(mcp_server=True)