Spaces:

OppaAI
/

Robot_MCP_Server

Sleeping

App Files Files Community

OppaAI commited on Nov 16, 2025

Commit

3de8d94

verified ·

1 Parent(s): e37ca9c

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -73

app.py CHANGED Viewed

@@ -1,93 +1,65 @@
-import gradio as gr
-import json
 import base64
-from PIL import Image
-import io
 import requests
-import os
-# Get token from environment variable
-HF_TOKEN = os.environ.get("HF_CV_ROBOT_TOKEN")
-MODEL = "Qwen/Qwen2-VL-7B-Instruct"
-# Check if the token is available when the script starts
-if not HF_TOKEN:
-    print("ERROR: HF_CV_ROBOT_TOKEN environment variable not set.")
-    # In a real app, you might want to stop execution or handle this more gracefully
-    # For a Gradio app in a Space, it might just fail upon the first request.
-# -------------------------------
-# 主處理函數 (Main Processing Function)
-# -------------------------------
-def process(payload: dict):
-    try:
-        if not HF_TOKEN:
-            return {"error": "Hugging Face token is missing. Please check Space secrets."}
-        robot_id = payload.get("robot_id", "unknown")
-        image_b64 = payload["image_b64"]
-        # Base64 解碼成圖片，用 PIL 開啟 (Decode base64 to image, open with PIL)
-        img_bytes = base64.b64decode(image_b64)
-        # We don't actually use the PIL image object in the rest of the code,
-        # so this part is technically unnecessary for the API call, but harmless.
-        # img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
-        # Router API payload
-        headers = {"Authorization": f"Bearer {HF_TOKEN}"}
-        data = {
-            "model": MODEL,
             "messages": [
                 {
                     "role": "user",
                     "content": [
-                        {"type": "text", "text": "Describe this image in detail."},
-                        {"type": "image_data", "image_data": {"b64": image_b64}}
                     ]
                 }
             ]
         }
-        resp = requests.post(
-            "https://router.huggingface.co/v1/chat/completions",
-            headers=headers,
-            json=data,
-            timeout=60
-        )
-        if resp.status_code != 200:
-            # Added more detail to error logging
-            print(f"VLM API error: {resp.status_code}, {resp.text}")
-            return {"error": f"VLM API error: {resp.status_code}, {resp.text}"}
-        # Check if the expected response structure exists before accessing it
-        try:
-            vlm_text = resp.json()["choices"][0]["message"]["content"][0]["text"]
-        except (KeyError, IndexError, json.JSONDecodeError) as e:
-            return {"error": f"Failed to parse VLM response: {e}, Response text: {resp.text}"}
-        return {
-            "received": True,
-            "robot_id": robot_id,
-            "vllm_analysis": vlm_text
         }
-    except Exception as e:
-        # Added logging for general exceptions
-        print(f"An unexpected error occurred: {e}")
-        return {"error": str(e)}
-# -------------------------------
-# Gradio MCP Server
-# -------------------------------
-demo = gr.Interface(
-    fn=process,
-    inputs=gr.JSON(label="Input Payload (Dict format)"),
-    outputs=gr.JSON(label="Reply to Jetson"),
-    api_name="predict"
-)
 if __name__ == "__main__":
-    demo.launch(mcp_server=True)

 import base64
 import requests
+import json
+import io
+from PIL import Image
+import tempfile
+import time
+# ----------------------------
+# OpenRouter API 設定
+# ----------------------------
+OPENROUTER_KEY = "YOUR_OPENROUTER_API_KEY"
+MODEL_NAME = "qwen/qwen3-vl-32b-instruct"
+API_URL = "https://openrouter.ai/api/v1/chat/completions"
+def send_image_to_vlm(b64_image: str):
+    # 1️⃣ base64 -> PIL Image
+    img_bytes = base64.b64decode(b64_image)
+    img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
+    # 2️⃣ 存成臨時檔
+    with tempfile.NamedTemporaryFile(suffix=".jpg") as tmp_file:
+        img.save(tmp_file.name)
+        # 3️⃣ 組 payload，使用 file 上傳
+        payload = {
+            "model": MODEL_NAME,
             "messages": [
                 {
                     "role": "user",
                     "content": [
+                        {"type": "text", "text": "Describe the content of this image in detail."},
+                        {"type": "file", "file": tmp_file.name}  # 這裡上傳檔案
                     ]
                 }
             ]
         }
+        headers = {
+            "Authorization": f"Bearer {OPENROUTER_KEY}",
+            "Content-Type": "application/json"
         }
+        # 4️⃣ 呼叫 API
+        resp = requests.post(API_URL, headers=headers, data=json.dumps(payload), timeout=120)
+        if resp.status_code == 200:
+            result = resp.json()
+            # 回傳生成的文字
+            try:
+                return result["choices"][0]["message"]["content"][0]["text"]
+            except:
+                return str(result)
+        else:
+            return f"VLM API error {resp.status_code}: {resp.text}"
+# ----------------------------
+# 測試
+# ----------------------------
 if __name__ == "__main__":
+    # 這裡放一張你抓到的 base64 測試
+    with open("test.jpg", "rb") as f:
+        b64_test = base64.b64encode(f.read()).decode("utf-8")
+    analysis = send_image_to_vlm(b64_test)
+    print("VLM 分析結果：", analysis)