OppaAI commited on
Commit
5df7db5
·
verified ·
1 Parent(s): d192cfe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +154 -143
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py
2
  import os
3
  import base64
4
  import json
@@ -6,56 +6,82 @@ import gradio as gr
6
  from huggingface_hub import upload_file, InferenceClient
7
  from datetime import datetime
8
  import traceback
9
- import threading
10
  from typing import Tuple, Optional, Dict, Any
 
11
 
12
  HF_DATASET_REPO = "OppaAI/Robot_MCP"
13
  HF_VLM_MODEL = "Qwen/Qwen2.5-VL-7B-Instruct"
14
 
15
- PROCESSED_REQUESTS: Dict[str, Dict[str, Any]] = {}
16
- PROCESSED_LOCK = threading.Lock()
17
-
18
- # --------------------
19
- # Robot Tools
20
- # --------------------
21
- def tool_speak(text: str, emotion: str = "neutral") -> dict:
22
- return {"status": "success", "action_executed": "speak", "payload": {"text": text, "emotion": emotion}}
23
-
24
- def tool_navigate(direction: str, distance_meters: float) -> dict:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  if distance_meters > 5.0:
26
  return {"status": "error", "message": "Safety limit exceeded"}
27
- return {"status": "success", "action_executed": "navigate", "payload": {"direction": direction, "distance": distance_meters}}
28
-
29
- def tool_scan_hazard(hazard_type: str, severity: str) -> dict:
 
 
 
 
 
 
 
 
30
  timestamp = datetime.now().isoformat()
31
- return {"status": "warning_logged", "log": f"[{timestamp}] HAZARD: {hazard_type} (Severity: {severity})"}
32
-
33
- def tool_analyze_human(clothing_color: str, estimated_action: str) -> dict:
34
- return {"status": "human_tracked", "details": f"Human wearing {clothing_color} is {estimated_action}"}
35
-
36
- TOOL_REGISTRY = {
37
- "speak": tool_speak,
38
- "navigate": tool_navigate,
39
- "scan_hazard": tool_scan_hazard,
40
- "analyze_human": tool_analyze_human
41
- }
42
-
43
- # --------------------
44
- # Save + Upload
45
- # --------------------
 
 
 
 
 
 
46
  def save_and_upload_image(image_b64: str, hf_token: str) -> Tuple[Optional[str], Optional[str], Optional[str], int]:
47
  try:
48
  image_bytes = base64.b64decode(image_b64)
49
  size_bytes = len(image_bytes)
50
- print("[debug] decoded image bytes:", size_bytes)
51
 
52
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
53
  local_path = f"/tmp/robot_img_{timestamp}.jpg"
 
54
  with open(local_path, "wb") as f:
55
  f.write(image_bytes)
56
 
57
- print("[debug] wrote local tmp file:", local_path)
58
-
59
  filename = f"robot_{timestamp}.jpg"
60
 
61
  upload_file(
@@ -65,7 +91,6 @@ def save_and_upload_image(image_b64: str, hf_token: str) -> Tuple[Optional[str],
65
  token=hf_token,
66
  repo_type="dataset"
67
  )
68
- print("[debug] upload successful:", filename)
69
 
70
  url = f"https://huggingface.co/datasets/{HF_DATASET_REPO}/resolve/main/{filename}"
71
  return local_path, url, filename, size_bytes
@@ -75,160 +100,144 @@ def save_and_upload_image(image_b64: str, hf_token: str) -> Tuple[Optional[str],
75
  return None, None, None, 0
76
 
77
 
78
- # --------------------
79
- # JSON Parse Helper
80
- # --------------------
81
  def safe_parse_json_from_text(text: str) -> Optional[dict]:
82
  if not text:
83
  return None
 
84
  try:
85
  return json.loads(text)
86
  except:
87
  pass
88
 
89
- cleaned = text.strip()
90
- if cleaned.startswith("```"):
91
- cleaned = cleaned.strip("`")
92
-
93
  try:
94
  start = cleaned.find("{")
95
  end = cleaned.rfind("}")
96
  if start >= 0 and end > start:
97
  return json.loads(cleaned[start:end+1])
98
  except:
99
- return None
100
 
101
  return None
102
 
103
 
104
- # --------------------
105
- # Tool validation + exec
106
- # --------------------
107
- def validate_and_call_tool(tool_name: str, tool_args: dict):
108
- if not tool_name:
109
- return {"error": "Missing tool_name"}
110
- if tool_name not in TOOL_REGISTRY:
111
  return {"error": f"Unknown tool '{tool_name}'"}
 
112
  try:
113
- return TOOL_REGISTRY[tool_name](**tool_args)
 
 
114
  except Exception as e:
115
  traceback.print_exc()
116
- return {"error": f"Tool error: {str(e)}"}
117
 
118
 
119
- # --------------------
120
- # Main Function
121
- # --------------------
122
  def process_and_describe(payload):
123
-
124
- # If string → parse JSON
125
  if isinstance(payload, str):
126
  try:
127
  payload = json.loads(payload)
128
- except Exception as e:
129
- print("[error] invalid JSON from client:", payload)
130
- return {"error": f"Invalid JSON string: {str(e)}"}
131
 
132
- print("\n================ NEW REQUEST ================")
133
- print("[debug] Incoming payload:", payload)
134
-
135
- try:
136
- hf_token = payload.get("hf_token")
137
- if not hf_token:
138
- return {"error": "hf_token missing"}
139
-
140
- robot_id = payload.get("robot_id", "unknown")
141
- image_b64 = payload.get("image_b64")
142
- if not image_b64:
143
- return {"error": "image_b64 missing"}
144
-
145
- # Save & Upload
146
- local_tmp_path, hf_url, path_in_repo, size_bytes = save_and_upload_image(image_b64, hf_token)
147
- if not hf_url:
148
- print("[error] Image upload failed.")
149
- return {"error": "Image upload failed"}
150
-
151
- print("[debug] HF image URL:", hf_url)
152
-
153
- # Build prompt
154
- system_prompt = """
155
- Respond in STRICT JSON:
156
- {
157
- "description":"short visual description",
158
- "tool_name":"name",
159
- "arguments": { ... }
160
- }
161
- """
162
 
163
- messages = [
164
- {"role": "system", "content": system_prompt},
165
- {"role": "user", "content": [
166
- {"type": "text", "text": "Analyze image and select one tool"},
167
- {"type": "image_url",
168
- "image_url": {"url": f"data:image/jpeg;base64,{image_b64}"}}
169
- ]}
170
- ]
171
-
172
- print("[debug] Calling VLM model...")
173
-
174
- client = InferenceClient(token=hf_token)
175
- response = client.chat.completions.create(
176
- model=HF_VLM_MODEL,
177
- messages=messages,
178
- max_tokens=300,
179
- temperature=0.1
180
- )
181
 
182
- vlm_output = response.choices[0].message.content.strip()
 
183
 
184
- # 🔥 PRINT VLM RAW OUTPUT (你要求的)
185
- print("\n------ VLM RAW OUTPUT ------")
186
- print(vlm_output)
187
- print("------ END VLM RAW ------\n")
188
 
189
- parsed = safe_parse_json_from_text(vlm_output)
 
190
 
191
- if parsed is None:
192
- print("[error] VLM did NOT return valid JSON")
193
- return {
194
- "status": "model_no_json",
195
- "robot_id": robot_id,
196
- "image_url": hf_url,
197
- "vlm_raw": vlm_output,
198
- "message": "VLM did not output valid JSON"
199
- }
200
 
201
- tool_name = parsed.get("tool_name")
202
- tool_args = parsed.get("arguments") or {}
203
 
204
- print("[debug] Parsed JSON:", parsed)
205
 
206
- tool_result = validate_and_call_tool(tool_name, tool_args)
 
 
 
 
 
207
 
208
- result = {
209
- "status": "success",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  "robot_id": robot_id,
211
  "image_url": hf_url,
212
- "image_bytes": size_bytes,
213
- "analysis": parsed.get("description"),
214
- "chosen_tool": tool_name,
215
- "tool_arguments": tool_args,
216
- "tool_execution_result": tool_result,
217
- "vlm_raw": vlm_output
218
  }
219
 
220
- print("[debug] Final result:", result)
221
- print("============================================\n")
222
- return result
223
 
224
- except Exception as e:
225
- traceback.print_exc()
226
- return {"error": f"Server exception: {str(e)}"}
 
 
 
 
 
 
 
 
 
 
 
 
227
 
228
 
229
- # --------------------
230
- # Gradio
231
- # --------------------
232
  iface = gr.Interface(
233
  fn=process_and_describe,
234
  inputs=gr.JSON(label="Input JSON"),
@@ -238,4 +247,6 @@ iface = gr.Interface(
238
  )
239
 
240
  if __name__ == "__main__":
 
 
241
  iface.launch()
 
1
+ # app.py (MCP + HF Space unified)
2
  import os
3
  import base64
4
  import json
 
6
  from huggingface_hub import upload_file, InferenceClient
7
  from datetime import datetime
8
  import traceback
 
9
  from typing import Tuple, Optional, Dict, Any
10
+ from fastmcp import FastMCP, Tool
11
 
12
  HF_DATASET_REPO = "OppaAI/Robot_MCP"
13
  HF_VLM_MODEL = "Qwen/Qwen2.5-VL-7B-Instruct"
14
 
15
+ # ================================================================
16
+ # MCP SERVER + TOOLS (FASTMCP)
17
+ # ================================================================
18
+ mcp = FastMCP("Robot_MCP_Server")
19
+
20
+ # -------------------------
21
+ # MCP Tools
22
+ # -------------------------
23
+ @mcp.tool()
24
+ def speak(text: str, emotion: str = "neutral") -> dict:
25
+ """
26
+ Speak something with a given emotion.
27
+ """
28
+ return {
29
+ "status": "success",
30
+ "action_executed": "speak",
31
+ "payload": {"text": text, "emotion": emotion}
32
+ }
33
+
34
+ @mcp.tool()
35
+ def navigate(direction: str, distance_meters: float) -> dict:
36
+ """
37
+ Navigate the robot safely. Max distance: 5m.
38
+ """
39
  if distance_meters > 5.0:
40
  return {"status": "error", "message": "Safety limit exceeded"}
41
+ return {
42
+ "status": "success",
43
+ "action_executed": "navigate",
44
+ "payload": {"direction": direction, "distance": distance_meters}
45
+ }
46
+
47
+ @mcp.tool()
48
+ def scan_hazard(hazard_type: str, severity: str) -> dict:
49
+ """
50
+ Log a hazard event.
51
+ """
52
  timestamp = datetime.now().isoformat()
53
+ return {
54
+ "status": "warning_logged",
55
+ "log": f"[{timestamp}] HAZARD: {hazard_type} (Severity: {severity})"
56
+ }
57
+
58
+ @mcp.tool()
59
+ def analyze_human(clothing_color: str, estimated_action: str) -> dict:
60
+ """
61
+ Describe a detected human.
62
+ """
63
+ return {
64
+ "status": "human_tracked",
65
+ "details": f"Human wearing {clothing_color} is {estimated_action}"
66
+ }
67
+
68
+ # MCP tool definitions to embed into VLM system prompt
69
+ TOOL_SPECS = mcp.get_tool_schemas()
70
+
71
+ # ================================================================
72
+ # HELPER: SAVE + UPLOAD IMAGE
73
+ # ================================================================
74
  def save_and_upload_image(image_b64: str, hf_token: str) -> Tuple[Optional[str], Optional[str], Optional[str], int]:
75
  try:
76
  image_bytes = base64.b64decode(image_b64)
77
  size_bytes = len(image_bytes)
 
78
 
79
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
80
  local_path = f"/tmp/robot_img_{timestamp}.jpg"
81
+
82
  with open(local_path, "wb") as f:
83
  f.write(image_bytes)
84
 
 
 
85
  filename = f"robot_{timestamp}.jpg"
86
 
87
  upload_file(
 
91
  token=hf_token,
92
  repo_type="dataset"
93
  )
 
94
 
95
  url = f"https://huggingface.co/datasets/{HF_DATASET_REPO}/resolve/main/{filename}"
96
  return local_path, url, filename, size_bytes
 
100
  return None, None, None, 0
101
 
102
 
103
+ # ================================================================
104
+ # VLM JSON PARSER
105
+ # ================================================================
106
  def safe_parse_json_from_text(text: str) -> Optional[dict]:
107
  if not text:
108
  return None
109
+
110
  try:
111
  return json.loads(text)
112
  except:
113
  pass
114
 
115
+ cleaned = text.strip().strip("`")
 
 
 
116
  try:
117
  start = cleaned.find("{")
118
  end = cleaned.rfind("}")
119
  if start >= 0 and end > start:
120
  return json.loads(cleaned[start:end+1])
121
  except:
122
+ pass
123
 
124
  return None
125
 
126
 
127
+ # ================================================================
128
+ # EXECUTE TOOL USING MCP INTERNAL DISPATCH
129
+ # ================================================================
130
+ def execute_tool(tool_name: str, tool_args: dict):
131
+ tools = {t["name"]: t for t in TOOL_SPECS}
132
+
133
+ if tool_name not in tools:
134
  return {"error": f"Unknown tool '{tool_name}'"}
135
+
136
  try:
137
+ # Run actual MCP tool function
138
+ fn = mcp.tools[tool_name]
139
+ return fn(**tool_args)
140
  except Exception as e:
141
  traceback.print_exc()
142
+ return {"error": f"Tool execution error: {str(e)}"}
143
 
144
 
145
+ # ================================================================
146
+ # MAIN API HANDLER (used by Gradio)
147
+ # ================================================================
148
  def process_and_describe(payload):
 
 
149
  if isinstance(payload, str):
150
  try:
151
  payload = json.loads(payload)
152
+ except:
153
+ return {"error": "Invalid JSON string"}
 
154
 
155
+ hf_token = payload.get("hf_token")
156
+ if not hf_token:
157
+ return {"error": "hf_token missing"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
+ robot_id = payload.get("robot_id", "unknown")
160
+ image_b64 = payload.get("image_b64")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
+ if not image_b64:
163
+ return {"error": "image_b64 missing"}
164
 
165
+ # ---- save & upload ----
166
+ local_tmp_path, hf_url, path_in_repo, size_bytes = save_and_upload_image(image_b64, hf_token)
167
+ if not hf_url:
168
+ return {"error": "Image upload failed"}
169
 
170
+ # ---- Build VLM prompt ----
171
+ tool_list_json = json.dumps(TOOL_SPECS, indent=2)
172
 
173
+ system_prompt = f"""
174
+ You are an AI that MUST respond in valid JSON only.
 
 
 
 
 
 
 
175
 
176
+ You have the following robot tools available:
177
+ {tool_list_json}
178
 
179
+ Return ONLY this format:
180
 
181
+ {{
182
+ "description": "short visual description",
183
+ "tool_name": "<one of the tool names>",
184
+ "arguments": {{ ... }}
185
+ }}
186
+ """
187
 
188
+ messages = [
189
+ {"role": "system", "content": system_prompt},
190
+ {"role": "user", "content": [
191
+ {"type": "text", "text": "Analyze the image and pick EXACTLY ONE tool."},
192
+ {"type": "image_url",
193
+ "image_url": {"url": f"data:image/jpeg;base64,{image_b64}"}}
194
+ ]}
195
+ ]
196
+
197
+ client = InferenceClient(token=hf_token)
198
+
199
+ response = client.chat.completions.create(
200
+ model=HF_VLM_MODEL,
201
+ messages=messages,
202
+ temperature=0.1,
203
+ max_tokens=300
204
+ )
205
+
206
+ vlm_raw = response.choices[0].message.content.strip()
207
+
208
+ parsed = safe_parse_json_from_text(vlm_raw)
209
+ if not parsed:
210
+ return {
211
+ "status": "model_no_json",
212
  "robot_id": robot_id,
213
  "image_url": hf_url,
214
+ "vlm_raw": vlm_raw,
215
+ "error": "VLM did not provide valid JSON"
 
 
 
 
216
  }
217
 
218
+ tool_name = parsed.get("tool_name")
219
+ tool_args = parsed.get("arguments") or {}
 
220
 
221
+ tool_exec = execute_tool(tool_name, tool_args)
222
+
223
+ result = {
224
+ "status": "success",
225
+ "robot_id": robot_id,
226
+ "image_url": hf_url,
227
+ "image_bytes": size_bytes,
228
+ "analysis": parsed.get("description"),
229
+ "chosen_tool": tool_name,
230
+ "tool_arguments": tool_args,
231
+ "tool_execution_result": tool_exec,
232
+ "vlm_raw": vlm_raw
233
+ }
234
+
235
+ return result
236
 
237
 
238
+ # ================================================================
239
+ # GRADIO API (for your client script)
240
+ # ================================================================
241
  iface = gr.Interface(
242
  fn=process_and_describe,
243
  inputs=gr.JSON(label="Input JSON"),
 
247
  )
248
 
249
  if __name__ == "__main__":
250
+ # Start MCP server (background)
251
+ mcp.run_in_thread()
252
  iface.launch()