OppaAI commited on
Commit
91b3954
·
verified ·
1 Parent(s): ea7663a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -30
app.py CHANGED
@@ -3,18 +3,19 @@ import base64
3
  import json
4
  from datetime import datetime
5
  import traceback
6
- from typing import Dict, Any
7
 
8
  import gradio as gr
9
  from huggingface_hub import HfApi, InferenceClient
10
- from fastmcp import FastMCP
 
 
11
  from pydantic import BaseModel, Field
12
 
13
  HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "OppaAI/Robot_MCP")
14
  HF_VLM_MODEL = os.environ.get("HF_VLM_MODEL", "Qwen/Qwen2.5-VL-7B-Instruct")
15
 
16
- mcp = FastMCP("Robot_MCP_Server")
17
-
18
 
19
  # ---------------------------------------------------
20
  # Payload Schema
@@ -26,7 +27,7 @@ class RobotWatchPayload(BaseModel):
26
 
27
 
28
  # ---------------------------------------------------
29
- # Upload Helper
30
  # ---------------------------------------------------
31
  def upload_image(image_b64: str, hf_token: str):
32
  try:
@@ -59,7 +60,7 @@ def upload_image(image_b64: str, hf_token: str):
59
 
60
 
61
  # ---------------------------------------------------
62
- # JSON Cleaning Helper
63
  # ---------------------------------------------------
64
  def safe_parse_json_from_text(text: str):
65
  if not text:
@@ -82,27 +83,13 @@ def safe_parse_json_from_text(text: str):
82
 
83
 
84
  # ---------------------------------------------------
85
- # TRUE MCP TOOL
86
  # ---------------------------------------------------
87
- def robot_watch(payload: RobotWatchPayload):
88
  """
89
- Analyze a base64-encoded image using a Hugging Face Vision-Language Model (VLM) and return structured JSON.
90
-
91
- Args:
92
- payload (RobotWatchPayload): A Pydantic model containing:
93
- - hf_token (str): Your Hugging Face API token.
94
- - robot_id (str): The unique identifier for the robot.
95
- - image_b64 (str): Base64 encoded image data.
96
-
97
- Returns:
98
- dict: A dictionary containing:
99
- - status (str): "success" or "error".
100
- - robot_id (str): The ID of the robot.
101
- - file_size_bytes (int): Size of the uploaded image in bytes.
102
- - image_url (str): URL of the uploaded image on Hugging Face dataset.
103
- - result (dict): Parsed JSON response from the VLM containing "description", "human", "environment", "objects".
104
- - vlm_raw (str): Raw string response from the VLM model.
105
  """
 
106
  hf_token = payload.hf_token
107
  image_b64 = payload.image_b64
108
  robot_id = payload.robot_id
@@ -154,20 +141,28 @@ Respond in STRICT JSON ONLY:
154
 
155
 
156
  # ---------------------------------------------------
157
- # Gradio UI Placeholder
158
  # ---------------------------------------------------
159
- def robot_watch(payload):
160
- return {"message": "Use an MCP Client to call the robot_watch tool."}
 
 
 
 
 
161
 
162
 
163
  app = gr.Interface(
164
- fn=robot_watch,
165
- inputs=gr.JSON(),
166
- outputs=gr.JSON(),
 
167
  title="Robot MCP Server",
168
  description="A MCP Server to describe image obtained from the CV of a robot/webcam.",
169
  api_name="predict"
170
  )
171
 
172
  if __name__ == "__main__":
 
 
173
  app.launch(mcp_server=True)
 
3
  import json
4
  from datetime import datetime
5
  import traceback
6
+ # Removed unused typing import: from typing import Dict, Any
7
 
8
  import gradio as gr
9
  from huggingface_hub import HfApi, InferenceClient
10
+ # The FastMCP object is automatically initialized when you call app.launch(mcp_server=True)
11
+ # You don't need to manually instantiate FastMCP if only using Gradio's integration.
12
+ # from fastmcp import FastMCP # Removed manual import/instantiation
13
  from pydantic import BaseModel, Field
14
 
15
  HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "OppaAI/Robot_MCP")
16
  HF_VLM_MODEL = os.environ.get("HF_VLM_MODEL", "Qwen/Qwen2.5-VL-7B-Instruct")
17
 
18
+ # mcp = FastMCP("Robot_MCP_Server") # Removed this line
 
19
 
20
  # ---------------------------------------------------
21
  # Payload Schema
 
27
 
28
 
29
  # ---------------------------------------------------
30
+ # Upload Helper (Remains the same)
31
  # ---------------------------------------------------
32
  def upload_image(image_b64: str, hf_token: str):
33
  try:
 
60
 
61
 
62
  # ---------------------------------------------------
63
+ # JSON Cleaning Helper (Remains the same)
64
  # ---------------------------------------------------
65
  def safe_parse_json_from_text(text: str):
66
  if not text:
 
83
 
84
 
85
  # ---------------------------------------------------
86
+ # Core VLM Analysis Logic (Renamed to avoid conflict)
87
  # ---------------------------------------------------
88
+ def run_vlm_analysis(payload: RobotWatchPayload):
89
  """
90
+ Analyze a base64-encoded image using a Hugging Face Vision-Language Model (VLM).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  """
92
+ # The payload is automatically validated by the time it reaches here if called via MCP
93
  hf_token = payload.hf_token
94
  image_b64 = payload.image_b64
95
  robot_id = payload.robot_id
 
141
 
142
 
143
  # ---------------------------------------------------
144
+ # Gradio UI Function
145
  # ---------------------------------------------------
146
+ def gradio_interface_fn(payload: RobotWatchPayload):
147
+ """
148
+ This function acts as the entry point for both the Gradio UI and the MCP Server endpoint.
149
+ Using the Pydantic model ensures a valid JSON schema is exposed.
150
+ """
151
+ # When called via MCP, the input is already a RobotWatchPayload instance.
152
+ return run_vlm_analysis(payload)
153
 
154
 
155
  app = gr.Interface(
156
+ fn=gradio_interface_fn, # Use the single entry point function
157
+ # Corrected input component from gr.JSON() to gr.Json() as per Gradio documentation
158
+ inputs=gr.Json(label="Input Payload (Pydantic Schema Applied)"),
159
+ outputs=gr.Json(label="Tool Output"),
160
  title="Robot MCP Server",
161
  description="A MCP Server to describe image obtained from the CV of a robot/webcam.",
162
  api_name="predict"
163
  )
164
 
165
  if __name__ == "__main__":
166
+ # Gradio will use the function signature of `gradio_interface_fn`
167
+ # (which uses RobotWatchPayload) to generate a valid MCP tool schema.
168
  app.launch(mcp_server=True)