Spaces:

OppaAI
/

Robot_MCP_Server

Sleeping

App Files Files Community

OppaAI commited on Nov 19, 2025

Commit

e6b6ea7

verified ·

1 Parent(s): 9230f22

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -21

app.py CHANGED Viewed

@@ -10,19 +10,16 @@ HF_DATASET_REPO = "OppaAI/Robot_MCP"
 HF_VLM_MODEL = "Qwen/Qwen2.5-VL-7B-Instruct"
 # --- MCP server instance ---
-mcp = FastMCP("Robot MCP")
-# --- STIO for the LLM ---
-#stio = STIO(mcp)  # Bind STIO to MCP tools
 # --- MCP Tool ---
 @mcp.tool()
-def say_hi(greeting_text: str = "Hi there!"):
     """Return a greeting command in JSON."""
     return {"command": "say_hi", "text": greeting_text}
 # --- Helper Functions ---
-def save_and_upload_image(image_b64, hf_token):
     image_bytes = base64.b64decode(image_b64)
     local_tmp_path = "/tmp/tmp.jpg"
     with open(local_tmp_path, "wb") as f:
@@ -58,17 +55,19 @@ def process_and_describe(payload: dict):
         # Initialize HF client
         hf_client = InferenceClient(token=hf_token)
-        # --- System prompt with STIO instructions ---
-        system_prompt = f"""
-        You are a helpful robot assistant. You have access to MCP tools via STIO.
         When you receive an image, you must:
-        1️⃣ Describe the image in detail.
-        2️⃣ Decide actions for the robot. Example:
-           - Human figure → call `say_hi` tool with a friendly greeting (vary every time)
-        3️⃣ Use STIO to call the tools. Always respond in JSON if calling tools.
-        Available tools:
-        {stio.describe_tools()}
         """
         messages_payload = [
@@ -79,7 +78,7 @@ def process_and_describe(payload: dict):
             ]}
         ]
-        # --- Call VLM with STIO ---
         chat_completion = hf_client.chat.completions.create(
             model=HF_VLM_MODEL,
             messages=messages_payload,
@@ -88,8 +87,17 @@ def process_and_describe(payload: dict):
         vlm_text = chat_completion.choices[0].message.content.strip()
-        # --- Use STIO to execute tool calls if present ---
-        tool_results = stio.run(vlm_text)
         return {
             "saved_to_hf_hub": True,
@@ -99,7 +107,9 @@ def process_and_describe(payload: dict):
             "file_size_bytes": size_bytes,
             "robot_id": robot_id,
             "vlm_response": vlm_text,
-            "tool_results": tool_results
         }
     except Exception as e:
@@ -113,6 +123,14 @@ demo = gr.Interface(
     api_name="predict"
 )
-#
 if __name__ == "__main__":
     demo.launch(mcp_server=True)

 HF_VLM_MODEL = "Qwen/Qwen2.5-VL-7B-Instruct"
 # --- MCP server instance ---
+mcp = FastMCP(name="Robot MCP")
 # --- MCP Tool ---
 @mcp.tool()
+def say_hi(greeting_text: str = "Hi there!") -> dict:
     """Return a greeting command in JSON."""
     return {"command": "say_hi", "text": greeting_text}
 # --- Helper Functions ---
+def save_and_upload_image(image_b64: str, hf_token: str):
     image_bytes = base64.b64decode(image_b64)
     local_tmp_path = "/tmp/tmp.jpg"
     with open(local_tmp_path, "wb") as f:
         # Initialize HF client
         hf_client = InferenceClient(token=hf_token)
+        # System prompt (without stio.describe_tools because not using STIO here)
+        system_prompt = """
+        You are a helpful robot assistant.
         When you receive an image, you must:
+        1. Describe the image in detail.
+        2. Decide actions for the robot. Example:
+           - Human figure → call the `say_hi` tool with a friendly greeting (vary every time)
+        Always respond in JSON with:
+        {
+            "description": "...",
+            "action": "say_hi",
+            "greeting_text": "a friendly greeting"
+        }
         """
         messages_payload = [
             ]}
         ]
+        # Call VLM
         chat_completion = hf_client.chat.completions.create(
             model=HF_VLM_MODEL,
             messages=messages_payload,
         vlm_text = chat_completion.choices[0].message.content.strip()
+        # Parse JSON from VLM
+        try:
+            action_data = json.loads(vlm_text)
+        except json.JSONDecodeError:
+            action_data = {"description": vlm_text, "action": None, "greeting_text": None}
+        # Call the tool if action == say_hi
+        tool_result = None
+        if action_data.get("action") == "say_hi":
+            greeting = action_data.get("greeting_text") or "Hi!"
+            tool_result = say_hi(greeting_text=greeting)
         return {
             "saved_to_hf_hub": True,
             "file_size_bytes": size_bytes,
             "robot_id": robot_id,
             "vlm_response": vlm_text,
+            "vlm_action": action_data.get("action"),
+            "vlm_description": action_data.get("description"),
+            "tool_result": tool_result
         }
     except Exception as e:
     api_name="predict"
 )
 if __name__ == "__main__":
+    # Run FastMCP server *in the same process* (blocking)
+    import threading
+    def run_mcp():
+        mcp.run(transport="stdio")
+    t = threading.Thread(target=run_mcp, daemon=True)
+    t.start()
     demo.launch(mcp_server=True)