Spaces:

OppaAI
/

Robot_MCP_Server

Sleeping

App Files Files Community

OppaAI commited on Nov 23, 2025

Commit

91b3954

verified ·

1 Parent(s): ea7663a

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -30

app.py CHANGED Viewed

@@ -3,18 +3,19 @@ import base64
 import json
 from datetime import datetime
 import traceback
-from typing import Dict, Any
 import gradio as gr
 from huggingface_hub import HfApi, InferenceClient
-from fastmcp import FastMCP
 from pydantic import BaseModel, Field
 HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "OppaAI/Robot_MCP")
 HF_VLM_MODEL = os.environ.get("HF_VLM_MODEL", "Qwen/Qwen2.5-VL-7B-Instruct")
-mcp = FastMCP("Robot_MCP_Server")
 # ---------------------------------------------------
 #  Payload Schema
@@ -26,7 +27,7 @@ class RobotWatchPayload(BaseModel):
 # ---------------------------------------------------
-# Upload Helper
 # ---------------------------------------------------
 def upload_image(image_b64: str, hf_token: str):
     try:
@@ -59,7 +60,7 @@ def upload_image(image_b64: str, hf_token: str):
 # ---------------------------------------------------
-# JSON Cleaning Helper
 # ---------------------------------------------------
 def safe_parse_json_from_text(text: str):
     if not text:
@@ -82,27 +83,13 @@ def safe_parse_json_from_text(text: str):
 # ---------------------------------------------------
-# TRUE MCP TOOL
 # ---------------------------------------------------
-def robot_watch(payload: RobotWatchPayload):
     """
-    Analyze a base64-encoded image using a Hugging Face Vision-Language Model (VLM) and return structured JSON.
-    Args:
-        payload (RobotWatchPayload): A Pydantic model containing:
-            - hf_token (str): Your Hugging Face API token.
-            - robot_id (str): The unique identifier for the robot.
-            - image_b64 (str): Base64 encoded image data.
-    Returns:
-        dict: A dictionary containing:
-            - status (str): "success" or "error".
-            - robot_id (str): The ID of the robot.
-            - file_size_bytes (int): Size of the uploaded image in bytes.
-            - image_url (str): URL of the uploaded image on Hugging Face dataset.
-            - result (dict): Parsed JSON response from the VLM containing "description", "human", "environment", "objects".
-            - vlm_raw (str): Raw string response from the VLM model.
     """
     hf_token = payload.hf_token
     image_b64 = payload.image_b64
     robot_id = payload.robot_id
@@ -154,20 +141,28 @@ Respond in STRICT JSON ONLY:
 # ---------------------------------------------------
-# Gradio UI Placeholder
 # ---------------------------------------------------
-def robot_watch(payload):
-    return {"message": "Use an MCP Client to call the robot_watch tool."}
 app = gr.Interface(
-    fn=robot_watch,
-    inputs=gr.JSON(),
-    outputs=gr.JSON(),
     title="Robot MCP Server",
     description="A MCP Server to describe image obtained from the CV of a robot/webcam.",
     api_name="predict"
 )
 if __name__ == "__main__":
     app.launch(mcp_server=True)

 import json
 from datetime import datetime
 import traceback
+# Removed unused typing import: from typing import Dict, Any
 import gradio as gr
 from huggingface_hub import HfApi, InferenceClient
+# The FastMCP object is automatically initialized when you call app.launch(mcp_server=True)
+# You don't need to manually instantiate FastMCP if only using Gradio's integration.
+# from fastmcp import FastMCP # Removed manual import/instantiation
 from pydantic import BaseModel, Field
 HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "OppaAI/Robot_MCP")
 HF_VLM_MODEL = os.environ.get("HF_VLM_MODEL", "Qwen/Qwen2.5-VL-7B-Instruct")
+# mcp = FastMCP("Robot_MCP_Server") # Removed this line
 # ---------------------------------------------------
 #  Payload Schema
 # ---------------------------------------------------
+# Upload Helper (Remains the same)
 # ---------------------------------------------------
 def upload_image(image_b64: str, hf_token: str):
     try:
 # ---------------------------------------------------
+# JSON Cleaning Helper (Remains the same)
 # ---------------------------------------------------
 def safe_parse_json_from_text(text: str):
     if not text:
 # ---------------------------------------------------
+# Core VLM Analysis Logic (Renamed to avoid conflict)
 # ---------------------------------------------------
+def run_vlm_analysis(payload: RobotWatchPayload):
     """
+    Analyze a base64-encoded image using a Hugging Face Vision-Language Model (VLM).
     """
+    # The payload is automatically validated by the time it reaches here if called via MCP
     hf_token = payload.hf_token
     image_b64 = payload.image_b64
     robot_id = payload.robot_id
 # ---------------------------------------------------
+# Gradio UI Function
 # ---------------------------------------------------
+def gradio_interface_fn(payload: RobotWatchPayload):
+    """
+    This function acts as the entry point for both the Gradio UI and the MCP Server endpoint.
+    Using the Pydantic model ensures a valid JSON schema is exposed.
+    """
+    # When called via MCP, the input is already a RobotWatchPayload instance.
+    return run_vlm_analysis(payload)
 app = gr.Interface(
+    fn=gradio_interface_fn, # Use the single entry point function
+    # Corrected input component from gr.JSON() to gr.Json() as per Gradio documentation
+    inputs=gr.Json(label="Input Payload (Pydantic Schema Applied)"),
+    outputs=gr.Json(label="Tool Output"),
     title="Robot MCP Server",
     description="A MCP Server to describe image obtained from the CV of a robot/webcam.",
     api_name="predict"
 )
 if __name__ == "__main__":
+    # Gradio will use the function signature of `gradio_interface_fn`
+    # (which uses RobotWatchPayload) to generate a valid MCP tool schema.
     app.launch(mcp_server=True)