Spaces:

OppaAI
/

Robot_MCP_Server

Sleeping

App Files Files Community

OppaAI commited on Nov 23, 2025

Commit

ea7663a

verified ·

1 Parent(s): 9f6e9fd

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -5

app.py CHANGED Viewed

@@ -84,8 +84,25 @@ def safe_parse_json_from_text(text: str):
 # ---------------------------------------------------
 # TRUE MCP TOOL
 # ---------------------------------------------------
-@mcp.tool("robot_watch", description="Analyze a base64 image using Qwen VLM and return structured JSON.")
-def robot_watch_tool(payload: RobotWatchPayload):
     hf_token = payload.hf_token
     image_b64 = payload.image_b64
     robot_id = payload.robot_id
@@ -139,16 +156,17 @@ Respond in STRICT JSON ONLY:
 # ---------------------------------------------------
 # Gradio UI Placeholder
 # ---------------------------------------------------
-def robot_watch_ui(payload):
     return {"message": "Use an MCP Client to call the robot_watch tool."}
 app = gr.Interface(
-    fn=robot_watch_ui,
     inputs=gr.JSON(),
     outputs=gr.JSON(),
     title="Robot MCP Server",
-    description="A MCP Server to describe image obtained from the CV of a robot/webcam."
 )
 if __name__ == "__main__":

 # ---------------------------------------------------
 # TRUE MCP TOOL
 # ---------------------------------------------------
+def robot_watch(payload: RobotWatchPayload):
+    """
+    Analyze a base64-encoded image using a Hugging Face Vision-Language Model (VLM) and return structured JSON.
+    Args:
+        payload (RobotWatchPayload): A Pydantic model containing:
+            - hf_token (str): Your Hugging Face API token.
+            - robot_id (str): The unique identifier for the robot.
+            - image_b64 (str): Base64 encoded image data.
+    Returns:
+        dict: A dictionary containing:
+            - status (str): "success" or "error".
+            - robot_id (str): The ID of the robot.
+            - file_size_bytes (int): Size of the uploaded image in bytes.
+            - image_url (str): URL of the uploaded image on Hugging Face dataset.
+            - result (dict): Parsed JSON response from the VLM containing "description", "human", "environment", "objects".
+            - vlm_raw (str): Raw string response from the VLM model.
+    """
     hf_token = payload.hf_token
     image_b64 = payload.image_b64
     robot_id = payload.robot_id
 # ---------------------------------------------------
 # Gradio UI Placeholder
 # ---------------------------------------------------
+def robot_watch(payload):
     return {"message": "Use an MCP Client to call the robot_watch tool."}
 app = gr.Interface(
+    fn=robot_watch,
     inputs=gr.JSON(),
     outputs=gr.JSON(),
     title="Robot MCP Server",
+    description="A MCP Server to describe image obtained from the CV of a robot/webcam.",
+    api_name="predict"
 )
 if __name__ == "__main__":