OppaAI commited on
Commit
ea7663a
·
verified ·
1 Parent(s): 9f6e9fd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -5
app.py CHANGED
@@ -84,8 +84,25 @@ def safe_parse_json_from_text(text: str):
84
  # ---------------------------------------------------
85
  # TRUE MCP TOOL
86
  # ---------------------------------------------------
87
- @mcp.tool("robot_watch", description="Analyze a base64 image using Qwen VLM and return structured JSON.")
88
- def robot_watch_tool(payload: RobotWatchPayload):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  hf_token = payload.hf_token
90
  image_b64 = payload.image_b64
91
  robot_id = payload.robot_id
@@ -139,16 +156,17 @@ Respond in STRICT JSON ONLY:
139
  # ---------------------------------------------------
140
  # Gradio UI Placeholder
141
  # ---------------------------------------------------
142
- def robot_watch_ui(payload):
143
  return {"message": "Use an MCP Client to call the robot_watch tool."}
144
 
145
 
146
  app = gr.Interface(
147
- fn=robot_watch_ui,
148
  inputs=gr.JSON(),
149
  outputs=gr.JSON(),
150
  title="Robot MCP Server",
151
- description="A MCP Server to describe image obtained from the CV of a robot/webcam."
 
152
  )
153
 
154
  if __name__ == "__main__":
 
84
  # ---------------------------------------------------
85
  # TRUE MCP TOOL
86
  # ---------------------------------------------------
87
+ def robot_watch(payload: RobotWatchPayload):
88
+ """
89
+ Analyze a base64-encoded image using a Hugging Face Vision-Language Model (VLM) and return structured JSON.
90
+
91
+ Args:
92
+ payload (RobotWatchPayload): A Pydantic model containing:
93
+ - hf_token (str): Your Hugging Face API token.
94
+ - robot_id (str): The unique identifier for the robot.
95
+ - image_b64 (str): Base64 encoded image data.
96
+
97
+ Returns:
98
+ dict: A dictionary containing:
99
+ - status (str): "success" or "error".
100
+ - robot_id (str): The ID of the robot.
101
+ - file_size_bytes (int): Size of the uploaded image in bytes.
102
+ - image_url (str): URL of the uploaded image on Hugging Face dataset.
103
+ - result (dict): Parsed JSON response from the VLM containing "description", "human", "environment", "objects".
104
+ - vlm_raw (str): Raw string response from the VLM model.
105
+ """
106
  hf_token = payload.hf_token
107
  image_b64 = payload.image_b64
108
  robot_id = payload.robot_id
 
156
  # ---------------------------------------------------
157
  # Gradio UI Placeholder
158
  # ---------------------------------------------------
159
+ def robot_watch(payload):
160
  return {"message": "Use an MCP Client to call the robot_watch tool."}
161
 
162
 
163
  app = gr.Interface(
164
+ fn=robot_watch,
165
  inputs=gr.JSON(),
166
  outputs=gr.JSON(),
167
  title="Robot MCP Server",
168
+ description="A MCP Server to describe image obtained from the CV of a robot/webcam.",
169
+ api_name="predict"
170
  )
171
 
172
  if __name__ == "__main__":