OppaAI commited on
Commit
a837225
·
verified ·
1 Parent(s): c971202

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -103
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py (MCP + HF Space unified)
2
  import os
3
  import base64
4
  import json
@@ -6,75 +6,69 @@ import gradio as gr
6
  from huggingface_hub import upload_file, InferenceClient
7
  from datetime import datetime
8
  import traceback
9
- from typing import Tuple, Optional, Dict, Any
 
 
10
  from fastmcp import FastMCP
11
 
 
12
  HF_DATASET_REPO = "OppaAI/Robot_MCP"
13
  HF_VLM_MODEL = "Qwen/Qwen2.5-VL-7B-Instruct"
14
 
15
- # ================================================================
16
- # MCP SERVER + TOOLS (FASTMCP)
17
- # ================================================================
18
- mcp = FastMCP("Robot_MCP_Server")
19
 
20
- # -------------------------
21
- # MCP Tools
22
- # -------------------------
23
  @mcp.tool()
24
- def speak(text: str, emotion: str = "neutral") -> dict:
25
- """
26
- Speak something with a given emotion.
27
- """
28
  return {
29
  "status": "success",
30
  "action_executed": "speak",
31
- "payload": {"text": text, "emotion": emotion}
32
  }
33
 
 
34
  @mcp.tool()
35
- def navigate(direction: str, distance_meters: float) -> dict:
36
- """
37
- Navigate the robot safely. Max distance: 5m.
38
- """
39
  if distance_meters > 5.0:
40
  return {"status": "error", "message": "Safety limit exceeded"}
41
  return {
42
  "status": "success",
43
  "action_executed": "navigate",
44
- "payload": {"direction": direction, "distance": distance_meters}
45
  }
46
 
 
47
  @mcp.tool()
48
- def scan_hazard(hazard_type: str, severity: str) -> dict:
49
- """
50
- Log a hazard event.
51
- """
52
  timestamp = datetime.now().isoformat()
53
  return {
54
  "status": "warning_logged",
55
- "log": f"[{timestamp}] HAZARD: {hazard_type} (Severity: {severity})"
56
  }
57
 
 
58
  @mcp.tool()
59
- def analyze_human(clothing_color: str, estimated_action: str) -> dict:
60
- """
61
- Describe a detected human.
62
- """
63
  return {
64
  "status": "human_tracked",
65
- "details": f"Human wearing {clothing_color} is {estimated_action}"
66
  }
67
 
68
- # MCP tool definitions to embed into VLM system prompt
69
- TOOL_SPECS = mcp.get_tool_schemas()
70
 
71
- # ================================================================
72
- # HELPER: SAVE + UPLOAD IMAGE
73
- # ================================================================
74
- def save_and_upload_image(image_b64: str, hf_token: str) -> Tuple[Optional[str], Optional[str], Optional[str], int]:
75
  try:
76
  image_bytes = base64.b64decode(image_b64)
77
  size_bytes = len(image_bytes)
 
78
 
79
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
80
  local_path = f"/tmp/robot_img_{timestamp}.jpg"
@@ -82,6 +76,8 @@ def save_and_upload_image(image_b64: str, hf_token: str) -> Tuple[Optional[str],
82
  with open(local_path, "wb") as f:
83
  f.write(image_bytes)
84
 
 
 
85
  filename = f"robot_{timestamp}.jpg"
86
 
87
  upload_file(
@@ -89,24 +85,25 @@ def save_and_upload_image(image_b64: str, hf_token: str) -> Tuple[Optional[str],
89
  path_in_repo=filename,
90
  repo_id=HF_DATASET_REPO,
91
  token=hf_token,
92
- repo_type="dataset"
93
  )
94
 
 
 
95
  url = f"https://huggingface.co/datasets/{HF_DATASET_REPO}/resolve/main/{filename}"
96
  return local_path, url, filename, size_bytes
97
 
98
- except Exception as e:
99
  traceback.print_exc()
100
  return None, None, None, 0
101
 
102
 
103
- # ================================================================
104
- # VLM JSON PARSER
105
- # ================================================================
106
- def safe_parse_json_from_text(text: str) -> Optional[dict]:
107
  if not text:
108
  return None
109
-
110
  try:
111
  return json.loads(text)
112
  except:
@@ -117,40 +114,39 @@ def safe_parse_json_from_text(text: str) -> Optional[dict]:
117
  start = cleaned.find("{")
118
  end = cleaned.rfind("}")
119
  if start >= 0 and end > start:
120
- return json.loads(cleaned[start:end+1])
121
  except:
122
  pass
123
 
124
  return None
125
 
126
 
127
- # ================================================================
128
- # EXECUTE TOOL USING MCP INTERNAL DISPATCH
129
- # ================================================================
130
- def execute_tool(tool_name: str, tool_args: dict):
131
- tools = {t["name"]: t for t in TOOL_SPECS}
132
-
133
- if tool_name not in tools:
134
- return {"error": f"Unknown tool '{tool_name}'"}
135
-
136
  try:
137
- # Run actual MCP tool function
138
- fn = mcp.tools[tool_name]
139
- return fn(**tool_args)
140
  except Exception as e:
141
  traceback.print_exc()
142
- return {"error": f"Tool execution error: {str(e)}"}
143
 
144
 
145
- # ================================================================
146
- # MAIN API HANDLER (used by Gradio)
147
- # ================================================================
148
  def process_and_describe(payload):
 
149
  if isinstance(payload, str):
150
  try:
151
  payload = json.loads(payload)
152
  except:
153
- return {"error": "Invalid JSON string"}
 
 
 
154
 
155
  hf_token = payload.get("hf_token")
156
  if not hf_token:
@@ -158,95 +154,99 @@ def process_and_describe(payload):
158
 
159
  robot_id = payload.get("robot_id", "unknown")
160
  image_b64 = payload.get("image_b64")
161
-
162
  if not image_b64:
163
  return {"error": "image_b64 missing"}
164
 
165
- # ---- save & upload ----
166
- local_tmp_path, hf_url, path_in_repo, size_bytes = save_and_upload_image(image_b64, hf_token)
 
 
 
167
  if not hf_url:
168
  return {"error": "Image upload failed"}
169
 
170
- # ---- Build VLM prompt ----
171
- tool_list_json = json.dumps(TOOL_SPECS, indent=2)
172
-
173
- system_prompt = f"""
174
- You are an AI that MUST respond in valid JSON only.
175
-
176
- You have the following robot tools available:
177
- {tool_list_json}
178
 
179
- Return ONLY this format:
180
-
181
- {{
 
182
  "description": "short visual description",
183
- "tool_name": "<one of the tool names>",
184
- "arguments": {{ ... }}
185
- }}
186
  """
187
 
188
  messages = [
189
  {"role": "system", "content": system_prompt},
190
- {"role": "user", "content": [
191
- {"type": "text", "text": "Analyze the image and pick EXACTLY ONE tool."},
192
- {"type": "image_url",
193
- "image_url": {"url": f"data:image/jpeg;base64,{image_b64}"}}
194
- ]}
 
 
 
 
 
195
  ]
196
 
 
 
197
  client = InferenceClient(token=hf_token)
198
 
199
  response = client.chat.completions.create(
200
  model=HF_VLM_MODEL,
201
  messages=messages,
 
202
  temperature=0.1,
203
- max_tokens=300
204
  )
205
 
206
- vlm_raw = response.choices[0].message.content.strip()
 
 
 
 
 
 
207
 
208
- parsed = safe_parse_json_from_text(vlm_raw)
209
- if not parsed:
210
  return {
211
  "status": "model_no_json",
212
  "robot_id": robot_id,
213
  "image_url": hf_url,
214
- "vlm_raw": vlm_raw,
215
- "error": "VLM did not provide valid JSON"
216
  }
217
 
218
  tool_name = parsed.get("tool_name")
219
  tool_args = parsed.get("arguments") or {}
220
 
221
- tool_exec = execute_tool(tool_name, tool_args)
222
 
223
- result = {
224
  "status": "success",
225
  "robot_id": robot_id,
226
  "image_url": hf_url,
227
- "image_bytes": size_bytes,
228
- "analysis": parsed.get("description"),
229
  "chosen_tool": tool_name,
230
  "tool_arguments": tool_args,
231
- "tool_execution_result": tool_exec,
232
- "vlm_raw": vlm_raw
233
  }
234
 
235
- return result
236
-
237
 
238
- # ================================================================
239
- # GRADIO API (for your client script)
240
- # ================================================================
241
  iface = gr.Interface(
242
  fn=process_and_describe,
243
  inputs=gr.JSON(label="Input JSON"),
244
  outputs=gr.JSON(label="Output JSON"),
245
  api_name="predict",
246
- allow_flagging="never"
247
  )
248
 
249
  if __name__ == "__main__":
250
- # Start MCP server (background)
251
- mcp.run_in_thread()
252
- iface.launch()
 
1
+ # app.py
2
  import os
3
  import base64
4
  import json
 
6
  from huggingface_hub import upload_file, InferenceClient
7
  from datetime import datetime
8
  import traceback
9
+ import threading
10
+ from typing import Optional, Dict, Any, Tuple
11
+
12
  from fastmcp import FastMCP
13
 
14
+
15
  HF_DATASET_REPO = "OppaAI/Robot_MCP"
16
  HF_VLM_MODEL = "Qwen/Qwen2.5-VL-7B-Instruct"
17
 
18
+ mcp = FastMCP("Robot_MCP")
 
 
 
19
 
20
+ # -----------------------------------------------------
21
+ # Register Robot Tools (MCP)
22
+ # -----------------------------------------------------
23
  @mcp.tool()
24
+ def speak(text: str, emotion: str = "neutral"):
25
+ """Robot speech output"""
 
 
26
  return {
27
  "status": "success",
28
  "action_executed": "speak",
29
+ "payload": {"text": text, "emotion": emotion},
30
  }
31
 
32
+
33
  @mcp.tool()
34
+ def navigate(direction: str, distance_meters: float):
35
+ """Move robot safely"""
 
 
36
  if distance_meters > 5.0:
37
  return {"status": "error", "message": "Safety limit exceeded"}
38
  return {
39
  "status": "success",
40
  "action_executed": "navigate",
41
+ "payload": {"direction": direction, "distance": distance_meters},
42
  }
43
 
44
+
45
  @mcp.tool()
46
+ def scan_hazard(hazard_type: str, severity: str):
47
+ """Hazard scan + log"""
 
 
48
  timestamp = datetime.now().isoformat()
49
  return {
50
  "status": "warning_logged",
51
+ "log": f"[{timestamp}] HAZARD: {hazard_type} (Severity: {severity})",
52
  }
53
 
54
+
55
  @mcp.tool()
56
+ def analyze_human(clothing_color: str, estimated_action: str):
57
+ """Human detection description"""
 
 
58
  return {
59
  "status": "human_tracked",
60
+ "details": f"Human wearing {clothing_color} is {estimated_action}",
61
  }
62
 
 
 
63
 
64
+ # -----------------------------------------------------
65
+ # Save and Upload Image
66
+ # -----------------------------------------------------
67
+ def save_and_upload_image(image_b64: str, hf_token: str):
68
  try:
69
  image_bytes = base64.b64decode(image_b64)
70
  size_bytes = len(image_bytes)
71
+ print("[debug] decoded image bytes:", size_bytes)
72
 
73
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
74
  local_path = f"/tmp/robot_img_{timestamp}.jpg"
 
76
  with open(local_path, "wb") as f:
77
  f.write(image_bytes)
78
 
79
+ print("[debug] wrote local tmp file:", local_path)
80
+
81
  filename = f"robot_{timestamp}.jpg"
82
 
83
  upload_file(
 
85
  path_in_repo=filename,
86
  repo_id=HF_DATASET_REPO,
87
  token=hf_token,
88
+ repo_type="dataset",
89
  )
90
 
91
+ print("[debug] upload successful:", filename)
92
+
93
  url = f"https://huggingface.co/datasets/{HF_DATASET_REPO}/resolve/main/{filename}"
94
  return local_path, url, filename, size_bytes
95
 
96
+ except Exception:
97
  traceback.print_exc()
98
  return None, None, None, 0
99
 
100
 
101
+ # -----------------------------------------------------
102
+ # JSON Parsing Helper
103
+ # -----------------------------------------------------
104
+ def safe_parse_json_from_text(text: str):
105
  if not text:
106
  return None
 
107
  try:
108
  return json.loads(text)
109
  except:
 
114
  start = cleaned.find("{")
115
  end = cleaned.rfind("}")
116
  if start >= 0 and end > start:
117
+ return json.loads(cleaned[start : end + 1])
118
  except:
119
  pass
120
 
121
  return None
122
 
123
 
124
+ # -----------------------------------------------------
125
+ # Only allow tools from MCP registry
126
+ # -----------------------------------------------------
127
+ def validate_and_call_tool(tool_name: str, tool_args: dict):
128
+ if tool_name not in mcp.tools:
129
+ return {"error": f"Unknown or unauthorized tool '{tool_name}'"}
 
 
 
130
  try:
131
+ return mcp.tools[tool_name](**tool_args)
 
 
132
  except Exception as e:
133
  traceback.print_exc()
134
+ return {"error": f"Tool error: {str(e)}"}
135
 
136
 
137
+ # -----------------------------------------------------
138
+ # Main Pipeline
139
+ # -----------------------------------------------------
140
  def process_and_describe(payload):
141
+
142
  if isinstance(payload, str):
143
  try:
144
  payload = json.loads(payload)
145
  except:
146
+ return {"error": "Invalid JSON payload"}
147
+
148
+ print("\n========== NEW REQUEST ==========")
149
+ print("[debug] Incoming payload:", payload)
150
 
151
  hf_token = payload.get("hf_token")
152
  if not hf_token:
 
154
 
155
  robot_id = payload.get("robot_id", "unknown")
156
  image_b64 = payload.get("image_b64")
 
157
  if not image_b64:
158
  return {"error": "image_b64 missing"}
159
 
160
+ # Save + Upload
161
+ local_tmp_path, hf_url, filename, size_bytes = save_and_upload_image(
162
+ image_b64, hf_token
163
+ )
164
+
165
  if not hf_url:
166
  return {"error": "Image upload failed"}
167
 
168
+ print("[debug] HF image URL:", hf_url)
 
 
 
 
 
 
 
169
 
170
+ # VLM SYSTEM PROMPT
171
+ system_prompt = """
172
+ Respond in STRICT JSON ONLY. Format:
173
+ {
174
  "description": "short visual description",
175
+ "tool_name": "one of: speak, navigate, scan_hazard, analyze_human",
176
+ "arguments": { ... }
177
+ }
178
  """
179
 
180
  messages = [
181
  {"role": "system", "content": system_prompt},
182
+ {
183
+ "role": "user",
184
+ "content": [
185
+ {"type": "text", "text": "Analyze the image and choose ONE tool."},
186
+ {
187
+ "type": "image_url",
188
+ "image_url": {"url": f"data:image/jpeg;base64,{image_b64}"},
189
+ },
190
+ ],
191
+ },
192
  ]
193
 
194
+ # VLM CALL
195
+ print("[debug] Calling VLM model...")
196
  client = InferenceClient(token=hf_token)
197
 
198
  response = client.chat.completions.create(
199
  model=HF_VLM_MODEL,
200
  messages=messages,
201
+ max_tokens=300,
202
  temperature=0.1,
 
203
  )
204
 
205
+ vlm_output = response.choices[0].message.content.strip()
206
+
207
+ print("\n------ VLM RAW OUTPUT ------")
208
+ print(vlm_output)
209
+ print("------ END VLM RAW ------\n")
210
+
211
+ parsed = safe_parse_json_from_text(vlm_output)
212
 
213
+ if parsed is None:
 
214
  return {
215
  "status": "model_no_json",
216
  "robot_id": robot_id,
217
  "image_url": hf_url,
218
+ "vlm_raw": vlm_output,
219
+ "message": "VLM returned invalid JSON",
220
  }
221
 
222
  tool_name = parsed.get("tool_name")
223
  tool_args = parsed.get("arguments") or {}
224
 
225
+ tool_result = validate_and_call_tool(tool_name, tool_args)
226
 
227
+ return {
228
  "status": "success",
229
  "robot_id": robot_id,
230
  "image_url": hf_url,
231
+ "file_size_bytes": size_bytes,
232
+ "vlm_description": parsed.get("description"),
233
  "chosen_tool": tool_name,
234
  "tool_arguments": tool_args,
235
+ "tool_execution_result": tool_result,
236
+ "vlm_raw": vlm_output,
237
  }
238
 
 
 
239
 
240
+ # -----------------------------------------------------
241
+ # Gradio Interface + MCP Serve
242
+ # -----------------------------------------------------
243
  iface = gr.Interface(
244
  fn=process_and_describe,
245
  inputs=gr.JSON(label="Input JSON"),
246
  outputs=gr.JSON(label="Output JSON"),
247
  api_name="predict",
248
+ allow_flagging="never",
249
  )
250
 
251
  if __name__ == "__main__":
252
+ mcp.run_gradio(iface)