OppaAI commited on
Commit
80c4ab2
·
verified ·
1 Parent(s): 99974e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -10
app.py CHANGED
@@ -2,7 +2,7 @@ import os
2
  import base64
3
  import json
4
  import gradio as gr
5
- from huggingface_hub import upload_file, InferenceClient
6
  from datetime import datetime
7
  import traceback
8
  from typing import Optional, Dict, Any
@@ -31,7 +31,6 @@ def save_and_upload_image(image_b64: str, hf_token: str):
31
 
32
  filename = f"robot_{timestamp}.jpg"
33
 
34
- from huggingface_hub import HfApi
35
  api = HfApi()
36
  api.upload_file(
37
  path_or_fileobj=local_path,
@@ -110,19 +109,28 @@ def process_and_describe(payload: Dict[str, Any]) -> Dict[str, Any]:
110
  if not hf_url:
111
  return {"error": "Image upload failed"}
112
 
113
- # VLM system prompt
114
  system_prompt = f"""
115
- Respond in STRICT JSON ONLY:
 
 
 
 
 
 
 
 
116
  {{
117
- "description": "detail visual description",
118
- "tool_name": "speak | navigate | scan_hazard | analyze_human",
119
- "arguments": {{ ... }}
120
  }}
121
  """
 
122
  messages = [
123
  {"role": "system", "content": system_prompt},
124
  {"role": "user", "content": [
125
- {"type": "text", "text": "Analyze the image and choose ONE tool."},
126
  {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_b64}"}}
127
  ]}
128
  ]
@@ -143,9 +151,12 @@ Respond in STRICT JSON ONLY:
143
  if parsed is None:
144
  return {"status": "model_no_json", "robot_id": robot_id, "vlm_raw": vlm_output, "message": "VLM returned invalid JSON"}
145
 
 
146
  tool_name = parsed.get("tool_name")
147
  tool_args = parsed.get("arguments") or {}
148
- tool_result = validate_and_call_tool(tool_name, tool_args)
 
 
149
 
150
  return {
151
  "status": "success",
@@ -177,4 +188,4 @@ if __name__ == "__main__":
177
  print(f"[Config] HF_VLM_MODEL: {HF_VLM_MODEL}")
178
  print(f"[Config] REMOTE_MCP_URL: {REMOTE_MCP_URL}")
179
  print("[Gradio] Launching interface...")
180
- app.launch(server_name="0.0.0.0", server_port=7860)
 
2
  import base64
3
  import json
4
  import gradio as gr
5
+ from huggingface_hub import HfApi, InferenceClient
6
  from datetime import datetime
7
  import traceback
8
  from typing import Optional, Dict, Any
 
31
 
32
  filename = f"robot_{timestamp}.jpg"
33
 
 
34
  api = HfApi()
35
  api.upload_file(
36
  path_or_fileobj=local_path,
 
109
  if not hf_url:
110
  return {"error": "Image upload failed"}
111
 
112
+ # VLM system prompt: decide MCP tool automatically
113
  system_prompt = f"""
114
+ Respond in STRICT JSON ONLY.
115
+ Rules:
116
+ 1. Provide a short description of what you see.
117
+ 2. Decide ONE MCP tool to call from:
118
+ - chat_with_human
119
+ 3. If a human is looking directly at the robot and waving, set "tool_name": "chat_with_human".
120
+ 4. Otherwise, set "tool_name": null and leave "arguments": {{}}
121
+
122
+ Output format:
123
  {{
124
+ "description": "...",
125
+ "tool_name": "chat_with_human | null",
126
+ "arguments": {{}}
127
  }}
128
  """
129
+
130
  messages = [
131
  {"role": "system", "content": system_prompt},
132
  {"role": "user", "content": [
133
+ {"type": "text", "text": "Analyze the image and call the appropriate MCP tool."},
134
  {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_b64}"}}
135
  ]}
136
  ]
 
151
  if parsed is None:
152
  return {"status": "model_no_json", "robot_id": robot_id, "vlm_raw": vlm_output, "message": "VLM returned invalid JSON"}
153
 
154
+ # Call the MCP tool directly if VLM chooses one
155
  tool_name = parsed.get("tool_name")
156
  tool_args = parsed.get("arguments") or {}
157
+ tool_result = None
158
+ if tool_name:
159
+ tool_result = validate_and_call_tool(tool_name, tool_args)
160
 
161
  return {
162
  "status": "success",
 
188
  print(f"[Config] HF_VLM_MODEL: {HF_VLM_MODEL}")
189
  print(f"[Config] REMOTE_MCP_URL: {REMOTE_MCP_URL}")
190
  print("[Gradio] Launching interface...")
191
+ app.launch()