File size: 2,854 Bytes
0ef482f
 
ec3d9e7
0ef482f
48607b7
53af268
48607b7
c8fa46a
53af268
48607b7
 
53af268
 
 
48607b7
0ef482f
48607b7
17438da
53af268
17438da
53af268
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd3451f
53af268
 
48607b7
53af268
48607b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd3451f
53af268
 
 
 
 
48607b7
53af268
dd3451f
ec3d9e7
0ef482f
53af268
d081bf3
17438da
53af268
 
48607b7
17438da
 
444e2a5
0ef482f
17438da
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import gradio as gr
import json
import base64
import os
import requests
from huggingface_hub import upload_file

HF_TOKEN = os.environ.get("HF_CV_ROBOT_TOKEN")
HF_DATASET_REPO = "OppaAI/Robot_MCP"  # Replace with your dataset repo
MODEL = "Qwen/Qwen2.5-VL-7B-Instruct"

def process_and_describe(payload: dict):
    if not HF_TOKEN:
        return {"error": "HF_TOKEN secret not found in Space settings."}

    try:
        robot_id = payload.get("robot_id", "unknown")
        image_b64 = payload["image_b64"]
        image_bytes = base64.b64decode(image_b64)

        # 1️⃣ Save temporarily
        local_tmp_path = "/tmp/uploaded_image.jpg"
        with open(local_tmp_path, "wb") as f:
            f.write(image_bytes)

        # 2️⃣ Upload to HF dataset repo
        path_in_repo = f"images/uploaded_image_{len(image_bytes)}.jpg"
        upload_file(
            path_or_fileobj=local_tmp_path,
            path_in_repo=path_in_repo,
            repo_id=HF_DATASET_REPO,
            token=HF_TOKEN,
            repo_type="dataset"
        )
        os.remove(local_tmp_path)

        # 3️⃣ Construct public URL
        image_url = f"https://huggingface.co/datasets/{HF_DATASET_REPO}/resolve/main/{path_in_repo}"

        # 4️⃣ Call VLM
        data = {
            "model": MODEL,
            "messages": [
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": "Describe this image in detail."},
                        {"type": "image_url", "image_url": image_url}
                    ]
                }
            ]
        }

        resp = requests.post(
            "https://router.huggingface.co/v1/chat/completions",
            headers={"Authorization": f"Bearer {HF_TOKEN}"},
            json=data,
            timeout=60
        )

        if resp.status_code != 200:
            vlm_text = f"HF VLM error: {resp.status_code}, {resp.text}"
        else:
            try:
                vlm_text = resp.json()["choices"][0]["message"]["content"][0]["text"]
            except Exception as e:
                vlm_text = f"Failed to parse VLM response: {e}, Response={resp.text}"

        return {
            "saved_to_hf_hub": True,
            "repo_id": HF_DATASET_REPO,
            "path_in_repo": path_in_repo,
            "image_url": image_url,
            "file_size_bytes": len(image_bytes),
            "robot_id": robot_id,
            "vlm_description": vlm_text
        }

    except Exception as e:
        return {"error": f"Failed to upload/describe image: {str(e)}"}

demo = gr.Interface(
    fn=process_and_describe,
    inputs=gr.JSON(label="Input Payload (Dict format with 'image_b64')"),
    outputs=gr.JSON(label="Reply to Jetson"),
    api_name="predict"
)

if __name__ == "__main__":
    demo.launch(mcp_server=True)