Spaces:

OppaAI
/

Robot_MCP_Server

Sleeping

App Files Files Community

OppaAI commited on Nov 23, 2025

Commit

4456cc8

verified ·

1 Parent(s): 6916c39

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -25

app.py CHANGED Viewed

@@ -18,7 +18,7 @@ HF_VLM_MODEL = os.environ.get("HF_VLM_MODEL", "Qwen/Qwen2.5-VL-7B-Instruct")
 # mcp = FastMCP("Robot_MCP_Server") # Removed this line
 # ---------------------------------------------------
-#  Payload Schema
 # ---------------------------------------------------
 class RobotWatchPayload(BaseModel):
     hf_token: str = Field(description="Your Hugging Face API token.")
@@ -83,13 +83,10 @@ def safe_parse_json_from_text(text: str):
 # ---------------------------------------------------
-# Core VLM Analysis Logic (Renamed to avoid conflict)
 # ---------------------------------------------------
 def run_vlm_analysis(payload: RobotWatchPayload):
-    """
-    Analyze a base64-encoded image using a Hugging Face Vision-Language Model (VLM).
-    """
-    # The payload is automatically validated by the time it reaches here if called via MCP
     hf_token = payload.hf_token
     image_b64 = payload.image_b64
     robot_id = payload.robot_id
@@ -141,24 +138,20 @@ Respond in STRICT JSON ONLY:
 # ---------------------------------------------------
-# Gradio UI Function (Uses individual fields)
 # ---------------------------------------------------
-def gradio_ui_with_fields(
     hf_token_input: str,
     robot_id_input: str,
-    image_file: gr.File # Gradio component for file upload
 ):
     """
-    Handles input from individual Gradio components, converts to Pydantic model,
-    and calls the core logic.
     """
-    if not image_file or not image_file.path:
-        return {"error": "Image file not uploaded."}
-    # Read the file from the path Gradio provides and convert to base64
-    with open(image_file.path, "rb") as f:
-        image_b64_input = base64.b64encode(f.read()).decode()
     # Create the Pydantic model instance manually
     payload_instance = RobotWatchPayload(
         hf_token=hf_token_input,
@@ -172,20 +165,17 @@ def gradio_ui_with_fields(
 app = gr.Interface(
-    fn=gradio_ui_with_fields, # Use the multi-input function for the UI
     inputs=[
         gr.Textbox(label="Hugging Face Token", lines=1),
         gr.Textbox(label="Robot ID", lines=1, value="unknown"),
-        gr.File(label="Upload Image (test.jpg)")
     ],
     outputs=gr.Json(label="Tool Output"),
-    title="Robot MCP Server (Field Inputs)",
-    description="Interface for the robot VLM analysis using individual fields.",
     api_name="predict"
 )
 if __name__ == "__main__":
-    # Note: When using this method, the automatic MCP schema might become invalid
-    # again because the *function signature* has changed dramatically.
-    # You might *still* need the `mcp==1.8.1` pin in requirements.txt to work.
-    app.launch(mcp_server=True)

 # mcp = FastMCP("Robot_MCP_Server") # Removed this line
 # ---------------------------------------------------
+#  Payload Schema (Remains the same as it already expects image_b64)
 # ---------------------------------------------------
 class RobotWatchPayload(BaseModel):
     hf_token: str = Field(description="Your Hugging Face API token.")
 # ---------------------------------------------------
+# Core VLM Analysis Logic (Remains the same)
 # ---------------------------------------------------
 def run_vlm_analysis(payload: RobotWatchPayload):
+    # ... (function body remains identical to previous version) ...
     hf_token = payload.hf_token
     image_b64 = payload.image_b64
     robot_id = payload.robot_id
 # ---------------------------------------------------
+# Gradio UI Function (NOW USES BASE64 STRING INPUT)
 # ---------------------------------------------------
+def gradio_ui_with_base64_fields(
     hf_token_input: str,
     robot_id_input: str,
+    image_b64_input: str # Changed input type to a string (base64)
 ):
     """
+    Handles input from individual Gradio components (including base64 string),
+    converts to Pydantic model, and calls the core logic.
     """
+    if not image_b64_input:
+        return {"error": "Base64 image string is empty."}
     # Create the Pydantic model instance manually
     payload_instance = RobotWatchPayload(
         hf_token=hf_token_input,
 app = gr.Interface(
+    fn=gradio_ui_with_base64_fields, # Use the new multi-input function for the UI
     inputs=[
         gr.Textbox(label="Hugging Face Token", lines=1),
         gr.Textbox(label="Robot ID", lines=1, value="unknown"),
+        gr.Textbox(label="Image Base64 String", lines=5) # Changed input component to Textbox
     ],
     outputs=gr.Json(label="Tool Output"),
+    title="Robot MCP Server (Base64 Inputs)",
+    description="Interface for the robot VLM analysis using individual fields, including base64 image string.",
     api_name="predict"
 )
 if __name__ == "__main__":
+    app.launch(mcp_server=True)