Spaces:

OppaAI
/

Robot_MCP_Server

Sleeping

App Files Files Community

OppaAI commited on Nov 17, 2025

Commit

bbcef43

verified ·

1 Parent(s): a10dd0b

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -31

app.py CHANGED Viewed

@@ -1,10 +1,8 @@
 import os
 import base64
-import requests
-import tempfile
 import gradio as gr
 from huggingface_hub import upload_file, InferenceClient
-from PIL import Image
 # --- Config ---
 HF_DATASET_REPO = "OppaAI/Robot_MCP"
@@ -14,11 +12,11 @@ HF_VLM_MODEL = "Qwen/Qwen2.5-VL-7B-Instruct"
 def save_and_upload_image(image_b64, hf_token):
     """Save image to /tmp and upload to HF dataset."""
     image_bytes = base64.b64decode(image_b64)
-    local_tmp_path = f"/tmp/tmp.jpg"
     with open(local_tmp_path, "wb") as f:
         f.write(image_bytes)
-    path_in_repo = f"images/tmp.jpg"
     upload_file(
         path_or_fileobj=local_tmp_path,
         path_in_repo=path_in_repo,
@@ -34,54 +32,63 @@ def save_and_upload_image(image_b64, hf_token):
 # --- Main MCP function ---
 def process_and_describe(payload: dict):
     try:
-        # 1️⃣ Use robot-sent token
         hf_token = payload.get("hf_token")
         if not hf_token:
             return {"error": "HF token not provided in payload."}
         robot_id = payload.get("robot_id", "unknown")
-        image_b64 = payload["image_b64"]
-        # 2️⃣ Save image temporarily
         local_tmp_path, hf_url, path_in_repo, size_bytes = save_and_upload_image(image_b64, hf_token)
-        # 3️⃣ Initialize HF client per request
         hf_client = InferenceClient(token=hf_token)
-        # 4️⃣ System prompt for robot action
         system_prompt = """
-        You are a helpful robot assistant.
-        When receiving an image, you must:
         1. Describe the image in detail.
-        2. Suggest what the robot should do next based on what it sees.
-           - If you see a human figure, suggest the robot to say 'Hi'.
-           - If you see a ball, suggest the robot to go towards it.
-           - If you see obstacles, suggest 'stop' or 'avoid'.
-           - If you see a red button, suggest 'press the button'.
-        Always provide the suggested actions in JSON format:
-        {"move": "forward/stop/left/right", "interact": "press/say/do nothing"}
         """
-        # 5️⃣ Prepare multimodal message payload
         messages_payload = [
             {"role": "system", "content": system_prompt},
-            {
-                "role": "user",
-                "content": [
-                    {"type": "text", "text": "Here is an image."},
-                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_b64}"}}
-                ]
-            }
         ]
-        # 6️⃣ Call VLM
         chat_completion = hf_client.chat.completions.create(
             model=HF_VLM_MODEL,
             messages=messages_payload,
-            max_tokens=200,
         )
-        vlm_text = chat_completion.choices[0].message.content.strip()
         return {
             "saved_to_hf_hub": True,
@@ -90,7 +97,9 @@ def process_and_describe(payload: dict):
             "image_url": hf_url,
             "file_size_bytes": size_bytes,
             "robot_id": robot_id,
-            "vlm_response": vlm_text
         }
     except Exception as e:

 import os
 import base64
 import gradio as gr
 from huggingface_hub import upload_file, InferenceClient
+import json
 # --- Config ---
 HF_DATASET_REPO = "OppaAI/Robot_MCP"
 def save_and_upload_image(image_b64, hf_token):
     """Save image to /tmp and upload to HF dataset."""
     image_bytes = base64.b64decode(image_b64)
+    local_tmp_path = "/tmp/tmp.jpg"
     with open(local_tmp_path, "wb") as f:
         f.write(image_bytes)
+    path_in_repo = "images/tmp.jpg"
     upload_file(
         path_or_fileobj=local_tmp_path,
         path_in_repo=path_in_repo,
 # --- Main MCP function ---
 def process_and_describe(payload: dict):
     try:
         hf_token = payload.get("hf_token")
         if not hf_token:
             return {"error": "HF token not provided in payload."}
         robot_id = payload.get("robot_id", "unknown")
+        image_b64 = payload.get("image_b64")
+        if not image_b64:
+            return {"error": "No image provided."}
+        # Save & upload
         local_tmp_path, hf_url, path_in_repo, size_bytes = save_and_upload_image(image_b64, hf_token)
+        # Init HF client
         hf_client = InferenceClient(token=hf_token)
+        # System prompt: describe + suggest action
         system_prompt = """
+        You are a helpful robot assistant.
         1. Describe the image in detail.
+        2. Suggest what the robot should do next based on what it sees:
+           - Human figure → say 'Hi'.
+           - Ball → move towards it.
+           - Obstacles → stop or avoid.
+           - Red button → press it.
+        Always respond in JSON:
+        {"description": "...", "action": {"move": "...", "interact": "..."}}
         """
         messages_payload = [
             {"role": "system", "content": system_prompt},
+            {"role": "user", "content": [
+                {"type": "text", "text": "Here is an image."},
+                {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_b64}"}}
+            ]}
         ]
+        # Call VLM
         chat_completion = hf_client.chat.completions.create(
             model=HF_VLM_MODEL,
             messages=messages_payload,
+            max_tokens=300
         )
+        # Robustly extract text
+        try:
+            vlm_text = chat_completion.choices[0].message.content.strip()
+        except Exception:
+            # fallback if structure is different
+            vlm_text = str(chat_completion)
+        # Attempt to parse JSON from VLM
+        action_data = {}
+        try:
+            action_data = json.loads(vlm_text)
+        except Exception:
+            # If VLM didn't return valid JSON, wrap text as description
+            action_data = {"description": vlm_text, "action": {"move": "unknown", "interact": "unknown"}}
         return {
             "saved_to_hf_hub": True,
             "image_url": hf_url,
             "file_size_bytes": size_bytes,
             "robot_id": robot_id,
+            "vlm_response": vlm_text,
+            "vlm_action": action_data.get("action", {}),
+            "vlm_description": action_data.get("description", "")
         }
     except Exception as e: