Spaces:

OppaAI
/

Robot_MCP_Server

Sleeping

App Files Files Community

OppaAI commited on Nov 23, 2025

Commit

80c4ab2

verified ·

1 Parent(s): 99974e9

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -10

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import os
 import base64
 import json
 import gradio as gr
-from huggingface_hub import upload_file, InferenceClient
 from datetime import datetime
 import traceback
 from typing import Optional, Dict, Any
@@ -31,7 +31,6 @@ def save_and_upload_image(image_b64: str, hf_token: str):
         filename = f"robot_{timestamp}.jpg"
-        from huggingface_hub import HfApi
         api = HfApi()
         api.upload_file(
             path_or_fileobj=local_path,
@@ -110,19 +109,28 @@ def process_and_describe(payload: Dict[str, Any]) -> Dict[str, Any]:
     if not hf_url:
         return {"error": "Image upload failed"}
-    # VLM system prompt
     system_prompt = f"""
-Respond in STRICT JSON ONLY:
 {{
- "description": "detail visual description",
- "tool_name": "speak | navigate | scan_hazard | analyze_human",
- "arguments": {{ ... }}
 }}
 """
     messages = [
         {"role": "system", "content": system_prompt},
         {"role": "user", "content": [
-            {"type": "text", "text": "Analyze the image and choose ONE tool."},
             {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_b64}"}}
         ]}
     ]
@@ -143,9 +151,12 @@ Respond in STRICT JSON ONLY:
     if parsed is None:
         return {"status": "model_no_json", "robot_id": robot_id, "vlm_raw": vlm_output, "message": "VLM returned invalid JSON"}
     tool_name = parsed.get("tool_name")
     tool_args = parsed.get("arguments") or {}
-    tool_result = validate_and_call_tool(tool_name, tool_args)
     return {
         "status": "success",
@@ -177,4 +188,4 @@ if __name__ == "__main__":
     print(f"[Config] HF_VLM_MODEL: {HF_VLM_MODEL}")
     print(f"[Config] REMOTE_MCP_URL: {REMOTE_MCP_URL}")
     print("[Gradio] Launching interface...")
-    app.launch(server_name="0.0.0.0", server_port=7860)

 import base64
 import json
 import gradio as gr
+from huggingface_hub import HfApi, InferenceClient
 from datetime import datetime
 import traceback
 from typing import Optional, Dict, Any
         filename = f"robot_{timestamp}.jpg"
         api = HfApi()
         api.upload_file(
             path_or_fileobj=local_path,
     if not hf_url:
         return {"error": "Image upload failed"}
+    # VLM system prompt: decide MCP tool automatically
     system_prompt = f"""
+Respond in STRICT JSON ONLY.
+Rules:
+1. Provide a short description of what you see.
+2. Decide ONE MCP tool to call from:
+   - chat_with_human
+3. If a human is looking directly at the robot and waving, set "tool_name": "chat_with_human".
+4. Otherwise, set "tool_name": null and leave "arguments": {{}}
+Output format:
 {{
+ "description": "...",
+ "tool_name": "chat_with_human | null",
+ "arguments": {{}}
 }}
 """
     messages = [
         {"role": "system", "content": system_prompt},
         {"role": "user", "content": [
+            {"type": "text", "text": "Analyze the image and call the appropriate MCP tool."},
             {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_b64}"}}
         ]}
     ]
     if parsed is None:
         return {"status": "model_no_json", "robot_id": robot_id, "vlm_raw": vlm_output, "message": "VLM returned invalid JSON"}
+    # Call the MCP tool directly if VLM chooses one
     tool_name = parsed.get("tool_name")
     tool_args = parsed.get("arguments") or {}
+    tool_result = None
+    if tool_name:
+        tool_result = validate_and_call_tool(tool_name, tool_args)
     return {
         "status": "success",
     print(f"[Config] HF_VLM_MODEL: {HF_VLM_MODEL}")
     print(f"[Config] REMOTE_MCP_URL: {REMOTE_MCP_URL}")
     print("[Gradio] Launching interface...")
+    app.launch()