Spaces:

OppaAI
/

Robot_MCP_Server

Sleeping

App Files Files Community

OppaAI commited on Nov 18, 2025

Commit

9c6065d

verified ·

1 Parent(s): 5410665

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -43

app.py CHANGED Viewed

@@ -3,23 +3,23 @@ import base64
 import gradio as gr
 from huggingface_hub import upload_file, InferenceClient
 import json
-from fastmcp import MCP
 # --- Config ---
 HF_DATASET_REPO = "OppaAI/Robot_MCP"
-HF_VLM_MODEL = "Qwen/Qwen2.5-VL-7B-Instruct"
 # --- MCP server instance ---
-mcp = MCP()  # 用於定義工具
 # --- MCP Tool ---
 @mcp.tools()
-def say_hi(greeting_text="Hi!"):
     """Return a greeting command in JSON."""
-    return {
-        "command": "say_hi",
-        "text": greeting_text
-    }
 # --- Helper Functions ---
 def save_and_upload_image(image_b64, hf_token):
@@ -52,22 +52,23 @@ def process_and_describe(payload: dict):
         if not image_b64:
             return {"error": "No image provided."}
-        # Save image and upload to HF
         local_tmp_path, hf_url, path_in_repo, size_bytes = save_and_upload_image(image_b64, hf_token)
         hf_client = InferenceClient(token=hf_token)
-        # System prompt for VLM
-        system_prompt = """
-        You are a helpful robot assistant.
-        1. Describe the image in detail.
-        2. Suggest what the robot should do next:
-           - If you see a human figure, suggest saying 'Hi' in a friendly and varied way.
-        Always respond in JSON format:
-        {
-            "description": "...",
-            "action": "say_hi",
-            "greeting_text": "a friendly greeting that can be different each time"
-        }
         """
         messages_payload = [
@@ -78,31 +79,17 @@ def process_and_describe(payload: dict):
             ]}
         ]
-        # Call VLM
         chat_completion = hf_client.chat.completions.create(
             model=HF_VLM_MODEL,
             messages=messages_payload,
             max_tokens=300
         )
-        # Extract VLM text
         vlm_text = chat_completion.choices[0].message.content.strip()
-        action_data = {}
-        try:
-            action_data = json.loads(vlm_text)
-        except Exception:
-            action_data = {
-                "description": vlm_text,
-                "action": "unknown",
-                "greeting_text": "Hi!"
-            }
-        # --- Call MCP tool if needed ---
-        vlm_action = action_data.get("action")
-        tool_result = None
-        if vlm_action == "say_hi":
-            greeting_text = action_data.get("greeting_text", "Hi!")
-            tool_result = say_hi(greeting_text=greeting_text)
         return {
             "saved_to_hf_hub": True,
@@ -112,9 +99,7 @@ def process_and_describe(payload: dict):
             "file_size_bytes": size_bytes,
             "robot_id": robot_id,
             "vlm_response": vlm_text,
-            "vlm_action": vlm_action,
-            "vlm_description": action_data.get("description", ""),
-            "tool_result": tool_result
         }
     except Exception as e:
@@ -123,7 +108,7 @@ def process_and_describe(payload: dict):
 # --- Gradio MCP Interface ---
 demo = gr.Interface(
     fn=process_and_describe,
-    inputs=gr.JSON(label="Input Payload (Dict format with 'image_b64')"),
     outputs=gr.JSON(label="Reply to Jetson"),
     api_name="predict"
 )

 import gradio as gr
 from huggingface_hub import upload_file, InferenceClient
 import json
+from fastmcp import MCP, STIO
 # --- Config ---
 HF_DATASET_REPO = "OppaAI/Robot_MCP"
+HF_VLM_MODEL = "Qwen/Qwen2.5-VL-7B-Instruct"
 # --- MCP server instance ---
+mcp = MCP()
+# --- STIO for the LLM ---
+stio = STIO(mcp)  # Bind STIO to MCP tools
 # --- MCP Tool ---
 @mcp.tools()
+def say_hi(greeting_text: str = "Hi there!"):
     """Return a greeting command in JSON."""
+    return {"command": "say_hi", "text": greeting_text}
 # --- Helper Functions ---
 def save_and_upload_image(image_b64, hf_token):
         if not image_b64:
             return {"error": "No image provided."}
+        # Save image & upload
         local_tmp_path, hf_url, path_in_repo, size_bytes = save_and_upload_image(image_b64, hf_token)
+        # Initialize HF client
         hf_client = InferenceClient(token=hf_token)
+        # --- System prompt with STIO instructions ---
+        system_prompt = f"""
+        You are a helpful robot assistant. You have access to MCP tools via STIO.
+        When you receive an image, you must:
+        1️⃣ Describe the image in detail.
+        2️⃣ Decide actions for the robot. Example:
+           - Human figure → call `say_hi` tool with a friendly greeting (vary every time)
+        3️⃣ Use STIO to call the tools. Always respond in JSON if calling tools.
+        Available tools:
+        {stio.describe_tools()}
         """
         messages_payload = [
             ]}
         ]
+        # --- Call VLM with STIO ---
         chat_completion = hf_client.chat.completions.create(
             model=HF_VLM_MODEL,
             messages=messages_payload,
             max_tokens=300
         )
         vlm_text = chat_completion.choices[0].message.content.strip()
+        # --- Use STIO to execute tool calls if present ---
+        tool_results = stio.run(vlm_text)
         return {
             "saved_to_hf_hub": True,
             "file_size_bytes": size_bytes,
             "robot_id": robot_id,
             "vlm_response": vlm_text,
+            "tool_results": tool_results
         }
     except Exception as e:
 # --- Gradio MCP Interface ---
 demo = gr.Interface(
     fn=process_and_describe,
+    inputs=gr.JSON(label="Input Payload"),
     outputs=gr.JSON(label="Reply to Jetson"),
     api_name="predict"
 )