Spaces:

OppaAI
/

Robot_MCP_Server

Sleeping

App Files Files Community

OppaAI commited on Nov 16, 2025

Commit

71865dd

verified ·

1 Parent(s): 23f2922

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -40

app.py CHANGED Viewed

@@ -8,74 +8,80 @@ import os
 HF_TOKEN = os.environ.get("HF_CV_ROBOT_TOKEN")
 MODEL = "Qwen/Qwen2.5-VL-7B-Instruct"
-if not HF_TOKEN:
-    print("ERROR: HF_CV_ROBOT_TOKEN environment variable not set.")
 def process(payload: dict):
     try:
         if not HF_TOKEN:
-            return {"error": "Missing HF token"}
         robot_id = payload.get("robot_id", "unknown")
-        image_b64 = payload["image_b64"]
-        # Save Base64 → temp file
         img_bytes = base64.b64decode(image_b64)
-        temp_path = "temp.jpg"
-        with open(temp_path, "wb") as f:
-            f.write(img_bytes)
-        # HF Router expects:
-        # data = model + messages (string)
-        # files = file
-        messages_json = json.dumps([
-            {
-                "role": "user",
-                "content": [
-                    {"type": "text", "text": "Describe this image in detail."}
-                ]
-            }
-        ])
         resp = requests.post(
             "https://router.huggingface.co/v1/chat/completions",
-            headers={"Authorization": f"Bearer {HF_TOKEN}"},
-            data={
-                "model": MODEL,               # <- 放在這裡才對！
-                "messages": messages_json     # <- 必須是字串
-            },
-            files={
-                "file": ("image.jpg", open(temp_path, "rb"), "image/jpeg")
-            },
             timeout=60
         )
         if resp.status_code != 200:
-            print(f"VLM API error: {resp.status_code}, {resp.text}")
             return {"error": f"VLM API error: {resp.status_code}, {resp.text}"}
-        # Parse
-        out = resp.json()
-        parts = out["choices"][0]["message"]["content"]
-        text = ""
-        for p in parts:
-            if p["type"] == "text":
-                text += p["text"]
         return {
             "received": True,
             "robot_id": robot_id,
-            "vllm_analysis": text
         }
     except Exception as e:
         return {"error": str(e)}
 demo = gr.Interface(
     fn=process,
-    inputs=gr.JSON(label="Input Payload"),
-    outputs=gr.JSON(label="Reply"),
     api_name="predict"
 )

 HF_TOKEN = os.environ.get("HF_CV_ROBOT_TOKEN")
 MODEL = "Qwen/Qwen2.5-VL-7B-Instruct"
+HF_UPLOAD_URL = "https://huggingface.co/api/uploads"
+def upload_to_hf(bytes_data):
+    """Upload image bytes to HF and return image_url."""
+    resp = requests.post(
+        HF_UPLOAD_URL,
+        headers={"Authorization": f"Bearer {HF_TOKEN}"},
+        files={"file": ("temp.jpg", bytes_data, "image/jpeg")}
+    )
+    if resp.status_code != 200:
+        raise RuntimeError(f"HF upload failed: {resp.text}")
+    url = resp.json()["url"]
+    return url
 def process(payload: dict):
     try:
         if not HF_TOKEN:
+            return {"error": "Missing HF token."}
         robot_id = payload.get("robot_id", "unknown")
+        # --- get image bytes
+        image_b64 = payload["image_b64"]
         img_bytes = base64.b64decode(image_b64)
+        # --- upload to HF (get public URL)
+        image_url = upload_to_hf(img_bytes)
+        # --- VLM request (image_url only)
+        data = {
+            "model": MODEL,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "Describe this image in detail."},
+                        {"type": "image_url", "image_url": {"url": image_url}}
+                    ]
+                }
+            ]
+        }
         resp = requests.post(
             "https://router.huggingface.co/v1/chat/completions",
+            headers={"Authorization": f"Bearer {HF_TOKEN}", "Content-Type": "application/json"},
+            data=json.dumps(data),
             timeout=60
         )
         if resp.status_code != 200:
             return {"error": f"VLM API error: {resp.status_code}, {resp.text}"}
+        try:
+            vlm_text = resp.json()["choices"][0]["message"]["content"][0]["text"]
+        except:
+            return {"error": f"Bad VLM response: {resp.text}"}
         return {
             "received": True,
             "robot_id": robot_id,
+            "vllm_analysis": vlm_text
         }
     except Exception as e:
         return {"error": str(e)}
 demo = gr.Interface(
     fn=process,
+    inputs=gr.JSON(label="Input Payload (Dict format)"),
+    outputs=gr.JSON(label="Reply to Jetson"),
     api_name="predict"
 )