OppaAI commited on
Commit
4456cc8
·
verified ·
1 Parent(s): 6916c39

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -25
app.py CHANGED
@@ -18,7 +18,7 @@ HF_VLM_MODEL = os.environ.get("HF_VLM_MODEL", "Qwen/Qwen2.5-VL-7B-Instruct")
18
  # mcp = FastMCP("Robot_MCP_Server") # Removed this line
19
 
20
  # ---------------------------------------------------
21
- # Payload Schema
22
  # ---------------------------------------------------
23
  class RobotWatchPayload(BaseModel):
24
  hf_token: str = Field(description="Your Hugging Face API token.")
@@ -83,13 +83,10 @@ def safe_parse_json_from_text(text: str):
83
 
84
 
85
  # ---------------------------------------------------
86
- # Core VLM Analysis Logic (Renamed to avoid conflict)
87
  # ---------------------------------------------------
88
  def run_vlm_analysis(payload: RobotWatchPayload):
89
- """
90
- Analyze a base64-encoded image using a Hugging Face Vision-Language Model (VLM).
91
- """
92
- # The payload is automatically validated by the time it reaches here if called via MCP
93
  hf_token = payload.hf_token
94
  image_b64 = payload.image_b64
95
  robot_id = payload.robot_id
@@ -141,24 +138,20 @@ Respond in STRICT JSON ONLY:
141
 
142
 
143
  # ---------------------------------------------------
144
- # Gradio UI Function (Uses individual fields)
145
  # ---------------------------------------------------
146
- def gradio_ui_with_fields(
147
  hf_token_input: str,
148
  robot_id_input: str,
149
- image_file: gr.File # Gradio component for file upload
150
  ):
151
  """
152
- Handles input from individual Gradio components, converts to Pydantic model,
153
- and calls the core logic.
154
  """
155
- if not image_file or not image_file.path:
156
- return {"error": "Image file not uploaded."}
157
 
158
- # Read the file from the path Gradio provides and convert to base64
159
- with open(image_file.path, "rb") as f:
160
- image_b64_input = base64.b64encode(f.read()).decode()
161
-
162
  # Create the Pydantic model instance manually
163
  payload_instance = RobotWatchPayload(
164
  hf_token=hf_token_input,
@@ -172,20 +165,17 @@ def gradio_ui_with_fields(
172
 
173
 
174
  app = gr.Interface(
175
- fn=gradio_ui_with_fields, # Use the multi-input function for the UI
176
  inputs=[
177
  gr.Textbox(label="Hugging Face Token", lines=1),
178
  gr.Textbox(label="Robot ID", lines=1, value="unknown"),
179
- gr.File(label="Upload Image (test.jpg)")
180
  ],
181
  outputs=gr.Json(label="Tool Output"),
182
- title="Robot MCP Server (Field Inputs)",
183
- description="Interface for the robot VLM analysis using individual fields.",
184
  api_name="predict"
185
  )
186
 
187
  if __name__ == "__main__":
188
- # Note: When using this method, the automatic MCP schema might become invalid
189
- # again because the *function signature* has changed dramatically.
190
- # You might *still* need the `mcp==1.8.1` pin in requirements.txt to work.
191
- app.launch(mcp_server=True)
 
18
  # mcp = FastMCP("Robot_MCP_Server") # Removed this line
19
 
20
  # ---------------------------------------------------
21
+ # Payload Schema (Remains the same as it already expects image_b64)
22
  # ---------------------------------------------------
23
  class RobotWatchPayload(BaseModel):
24
  hf_token: str = Field(description="Your Hugging Face API token.")
 
83
 
84
 
85
  # ---------------------------------------------------
86
+ # Core VLM Analysis Logic (Remains the same)
87
  # ---------------------------------------------------
88
  def run_vlm_analysis(payload: RobotWatchPayload):
89
+ # ... (function body remains identical to previous version) ...
 
 
 
90
  hf_token = payload.hf_token
91
  image_b64 = payload.image_b64
92
  robot_id = payload.robot_id
 
138
 
139
 
140
  # ---------------------------------------------------
141
+ # Gradio UI Function (NOW USES BASE64 STRING INPUT)
142
  # ---------------------------------------------------
143
+ def gradio_ui_with_base64_fields(
144
  hf_token_input: str,
145
  robot_id_input: str,
146
+ image_b64_input: str # Changed input type to a string (base64)
147
  ):
148
  """
149
+ Handles input from individual Gradio components (including base64 string),
150
+ converts to Pydantic model, and calls the core logic.
151
  """
152
+ if not image_b64_input:
153
+ return {"error": "Base64 image string is empty."}
154
 
 
 
 
 
155
  # Create the Pydantic model instance manually
156
  payload_instance = RobotWatchPayload(
157
  hf_token=hf_token_input,
 
165
 
166
 
167
  app = gr.Interface(
168
+ fn=gradio_ui_with_base64_fields, # Use the new multi-input function for the UI
169
  inputs=[
170
  gr.Textbox(label="Hugging Face Token", lines=1),
171
  gr.Textbox(label="Robot ID", lines=1, value="unknown"),
172
+ gr.Textbox(label="Image Base64 String", lines=5) # Changed input component to Textbox
173
  ],
174
  outputs=gr.Json(label="Tool Output"),
175
+ title="Robot MCP Server (Base64 Inputs)",
176
+ description="Interface for the robot VLM analysis using individual fields, including base64 image string.",
177
  api_name="predict"
178
  )
179
 
180
  if __name__ == "__main__":
181
+ app.launch(mcp_server=True)