OppaAI commited on
Commit
a4f7543
·
verified ·
1 Parent(s): 024277f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -81
app.py CHANGED
@@ -18,38 +18,23 @@ PROCESSED_REQUESTS: Dict[str, Dict[str, Any]] = {}
18
  PROCESSED_LOCK = threading.Lock()
19
 
20
  # ==========================================
21
- # Robot Tools (unchanged semantics)
22
  # ==========================================
23
  def tool_speak(text: str, emotion: str = "neutral") -> dict:
24
- return {
25
- "status": "success",
26
- "action_executed": "speak",
27
- "payload": {"text": text, "emotion": emotion}
28
- }
29
 
30
  def tool_navigate(direction: str, distance_meters: float) -> dict:
31
  if distance_meters > 5.0:
32
  return {"status": "error", "message": "Safety limit: Cannot move more than 5m at once."}
33
- return {
34
- "status": "success",
35
- "action_executed": "navigate",
36
- "payload": {"direction": direction, "distance": distance_meters}
37
- }
38
 
39
  def tool_scan_hazard(hazard_type: str, severity: str) -> dict:
40
  timestamp = datetime.now().isoformat()
41
  log_entry = f"[{timestamp}] WARNING: {hazard_type} detected (Severity: {severity})"
42
- # (in real system: write to file/logging infra)
43
- return {
44
- "status": "warning_logged",
45
- "log": log_entry
46
- }
47
 
48
  def tool_analyze_human(clothing_color: str, estimated_action: str) -> dict:
49
- return {
50
- "status": "human_tracked",
51
- "details": f"Human wearing {clothing_color} is likely {estimated_action}."
52
- }
53
 
54
  TOOL_REGISTRY = {
55
  "speak": tool_speak,
@@ -59,33 +44,23 @@ TOOL_REGISTRY = {
59
  }
60
 
61
  # ==========================================
62
- # Helper: Save & Upload (robust)
63
  # ==========================================
64
  def save_and_upload_image(image_b64: str, hf_token: str) -> Tuple[Optional[str], Optional[str], Optional[str], int]:
65
- """
66
- Save a base64 image to a uniquely named /tmp file and upload to HF dataset repo.
67
- Returns: local_tmp_path, hf_url, path_in_repo, size_bytes
68
- """
69
  try:
70
- # decode
71
  image_bytes = base64.b64decode(image_b64)
72
  size_bytes = len(image_bytes)
73
- print("[debug] decoded image bytes:", size_bytes)
74
  if size_bytes < 10:
75
  raise ValueError("Decoded image is too small or invalid base64")
76
 
77
- # unique tmp filename (avoid collision across workers)
78
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
79
  local_tmp_path = f"/tmp/robot_img_{timestamp}.jpg"
80
  with open(local_tmp_path, "wb") as f:
81
  f.write(image_bytes)
82
- print(f"[debug] wrote local tmp file: {local_tmp_path}")
83
 
84
- # Prepare filename in repo (put at repo root to avoid folder permission issues)
85
  filename = f"robot_{timestamp}.jpg"
86
  path_in_repo = filename
87
 
88
- # upload_file might raise. capture exception and show traceback
89
  upload_file(
90
  path_or_fileobj=local_tmp_path,
91
  path_in_repo=path_in_repo,
@@ -95,31 +70,21 @@ def save_and_upload_image(image_b64: str, hf_token: str) -> Tuple[Optional[str],
95
  )
96
 
97
  hf_image_url = f"https://huggingface.co/datasets/{HF_DATASET_REPO}/resolve/main/{path_in_repo}"
98
- print("[debug] upload successful:", hf_image_url)
99
  return local_tmp_path, hf_image_url, path_in_repo, size_bytes
100
 
101
  except Exception as e:
102
- print("[error] save_and_upload_image failed:", e)
103
  traceback.print_exc()
104
  return None, None, None, 0
105
 
106
  # ==========================================
107
- # Main logic
108
  # ==========================================
109
  def safe_parse_json_from_text(text: str) -> Optional[dict]:
110
- """
111
- Try to extract JSON object from model output.
112
- Accepts raw JSON, or a ```json\n{...}``` block, or text with JSON substring.
113
- Returns dict or None.
114
- """
115
  if not text:
116
  return None
117
- # remove markdown fences
118
  t = text.strip()
119
  if t.startswith("```") and "```" in t[3:]:
120
- # remove outer fences
121
  t = t.strip("`")
122
- # find first '{' and last '}' to try to extract JSON substring
123
  start = t.find("{")
124
  end = t.rfind("}")
125
  if start >= 0 and end > start:
@@ -127,7 +92,6 @@ def safe_parse_json_from_text(text: str) -> Optional[dict]:
127
  try:
128
  return json.loads(candidate)
129
  except Exception:
130
- # fallback: try the whole text
131
  try:
132
  return json.loads(t)
133
  except Exception:
@@ -138,44 +102,46 @@ def safe_parse_json_from_text(text: str) -> Optional[dict]:
138
  except Exception:
139
  return None
140
 
 
 
 
141
  def validate_and_call_tool(tool_name: str, tool_args: dict):
142
  if not tool_name:
143
  return {"error": "No tool_name provided by VLM."}
144
  if tool_name not in TOOL_REGISTRY:
145
  return {"error": f"Tool '{tool_name}' not found in registry."}
146
- # safe-call: ensure dict args only contain acceptable keys for that tool
147
  try:
148
- result = TOOL_REGISTRY[tool_name](**tool_args)
149
- return result
150
  except TypeError as e:
151
  return {"error": f"Tool call argument mismatch: {str(e)}"}
152
  except Exception as e:
153
  traceback.print_exc()
154
  return {"error": f"Tool execution failed: {str(e)}"}
155
 
156
- def process_and_describe(payload: dict):
157
- """
158
- payload expects keys:
159
- - hf_token (string)
160
- - image_b64 (base64 str)
161
- - robot_id (optional)
162
- - request_id (optional) # recommended to dedupe retries
163
- """
 
 
 
164
  vlm_text = ""
165
  tool_result = None
166
  action_data = {}
167
 
168
  try:
169
- # basic checks
170
  hf_token = payload.get("hf_token")
171
  if not hf_token:
172
- return {"error": "HF token not provided in payload. Token must have datasets write permission if uploading."}
173
 
174
  request_id = payload.get("request_id") or payload.get("robot_id") or None
175
  if request_id:
176
  with PROCESSED_LOCK:
177
  if request_id in PROCESSED_REQUESTS:
178
- print("[info] duplicate request_id detected; returning cached result")
179
  return PROCESSED_REQUESTS[request_id]
180
 
181
  robot_id = payload.get("robot_id", "unknown")
@@ -183,20 +149,11 @@ def process_and_describe(payload: dict):
183
  if not image_b64:
184
  return {"error": "No image provided in payload."}
185
 
186
- # Save & upload (only once per invocation)
187
  local_tmp_path, hf_url, path_in_repo, size_bytes = save_and_upload_image(image_b64, hf_token)
188
  if not hf_url:
189
- # Upload failed: return error with helpful debug info
190
- return {
191
- "error": "Image upload failed on server.",
192
- "debug": {
193
- "local_tmp_path": local_tmp_path,
194
- "path_in_repo": path_in_repo,
195
- "size_bytes": size_bytes
196
- }
197
- }
198
 
199
- # Build system prompt (kept compact)
200
  tools_desc = json.dumps({
201
  "speak": {"text": "string", "emotion": "string"},
202
  "navigate": {"direction": "forward/left/right", "distance_meters": "float"},
@@ -222,7 +179,6 @@ RESPONSE FORMAT (Strict JSON):
222
  }}
223
  """
224
 
225
- # Build messages payload for VLM - include the uploaded HF URL (some VLMs can fetch it)
226
  messages_payload = [
227
  {"role": "system", "content": system_prompt},
228
  {"role": "user", "content": [
@@ -231,10 +187,7 @@ RESPONSE FORMAT (Strict JSON):
231
  ]}
232
  ]
233
 
234
- # Instantiate HF Inference client and call chat completion
235
  hf_client = InferenceClient(token=hf_token)
236
-
237
- # NOTE: huggingface InferenceClient usage may vary by version. We use the chat completions create call.
238
  chat_completion = hf_client.chat.completions.create(
239
  model=HF_VLM_MODEL,
240
  messages=messages_payload,
@@ -243,12 +196,8 @@ RESPONSE FORMAT (Strict JSON):
243
  )
244
 
245
  vlm_text = chat_completion.choices[0].message.content.strip()
246
- print("[debug] VLM raw output:", vlm_text[:1000])
247
-
248
- # attempt to parse JSON
249
  parsed = safe_parse_json_from_text(vlm_text)
250
  if parsed is None:
251
- # If the model didn't return JSON, return descriptive fallback but do not execute tools
252
  result = {
253
  "status": "model_no_json",
254
  "robot_id": robot_id,
@@ -264,13 +213,9 @@ RESPONSE FORMAT (Strict JSON):
264
  action_data = parsed
265
  tool_name = action_data.get("tool_name")
266
  tool_args = action_data.get("arguments", {}) or {}
267
-
268
- # Validate that arguments is a dict
269
  if not isinstance(tool_args, dict):
270
  tool_args = {}
271
 
272
- # Execute the tool once and capture result
273
- print(f"[info] Executing tool: {tool_name} with args {tool_args}")
274
  tool_result = validate_and_call_tool(tool_name, tool_args)
275
 
276
  result = {
@@ -306,5 +251,4 @@ iface = gr.Interface(
306
  )
307
 
308
  if __name__ == "__main__":
309
- # When deploying to HF Space: set server_name and server_port via env if you need
310
  iface.launch()
 
18
  PROCESSED_LOCK = threading.Lock()
19
 
20
  # ==========================================
21
+ # Robot Tools
22
  # ==========================================
23
  def tool_speak(text: str, emotion: str = "neutral") -> dict:
24
+ return {"status": "success", "action_executed": "speak", "payload": {"text": text, "emotion": emotion}}
 
 
 
 
25
 
26
  def tool_navigate(direction: str, distance_meters: float) -> dict:
27
  if distance_meters > 5.0:
28
  return {"status": "error", "message": "Safety limit: Cannot move more than 5m at once."}
29
+ return {"status": "success", "action_executed": "navigate", "payload": {"direction": direction, "distance": distance_meters}}
 
 
 
 
30
 
31
  def tool_scan_hazard(hazard_type: str, severity: str) -> dict:
32
  timestamp = datetime.now().isoformat()
33
  log_entry = f"[{timestamp}] WARNING: {hazard_type} detected (Severity: {severity})"
34
+ return {"status": "warning_logged", "log": log_entry}
 
 
 
 
35
 
36
  def tool_analyze_human(clothing_color: str, estimated_action: str) -> dict:
37
+ return {"status": "human_tracked", "details": f"Human wearing {clothing_color} is likely {estimated_action}."}
 
 
 
38
 
39
  TOOL_REGISTRY = {
40
  "speak": tool_speak,
 
44
  }
45
 
46
  # ==========================================
47
+ # Helper: Save & Upload
48
  # ==========================================
49
  def save_and_upload_image(image_b64: str, hf_token: str) -> Tuple[Optional[str], Optional[str], Optional[str], int]:
 
 
 
 
50
  try:
 
51
  image_bytes = base64.b64decode(image_b64)
52
  size_bytes = len(image_bytes)
 
53
  if size_bytes < 10:
54
  raise ValueError("Decoded image is too small or invalid base64")
55
 
 
56
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
57
  local_tmp_path = f"/tmp/robot_img_{timestamp}.jpg"
58
  with open(local_tmp_path, "wb") as f:
59
  f.write(image_bytes)
 
60
 
 
61
  filename = f"robot_{timestamp}.jpg"
62
  path_in_repo = filename
63
 
 
64
  upload_file(
65
  path_or_fileobj=local_tmp_path,
66
  path_in_repo=path_in_repo,
 
70
  )
71
 
72
  hf_image_url = f"https://huggingface.co/datasets/{HF_DATASET_REPO}/resolve/main/{path_in_repo}"
 
73
  return local_tmp_path, hf_image_url, path_in_repo, size_bytes
74
 
75
  except Exception as e:
 
76
  traceback.print_exc()
77
  return None, None, None, 0
78
 
79
  # ==========================================
80
+ # JSON parsing helper
81
  # ==========================================
82
  def safe_parse_json_from_text(text: str) -> Optional[dict]:
 
 
 
 
 
83
  if not text:
84
  return None
 
85
  t = text.strip()
86
  if t.startswith("```") and "```" in t[3:]:
 
87
  t = t.strip("`")
 
88
  start = t.find("{")
89
  end = t.rfind("}")
90
  if start >= 0 and end > start:
 
92
  try:
93
  return json.loads(candidate)
94
  except Exception:
 
95
  try:
96
  return json.loads(t)
97
  except Exception:
 
102
  except Exception:
103
  return None
104
 
105
+ # ==========================================
106
+ # Tool executor
107
+ # ==========================================
108
  def validate_and_call_tool(tool_name: str, tool_args: dict):
109
  if not tool_name:
110
  return {"error": "No tool_name provided by VLM."}
111
  if tool_name not in TOOL_REGISTRY:
112
  return {"error": f"Tool '{tool_name}' not found in registry."}
 
113
  try:
114
+ return TOOL_REGISTRY[tool_name](**tool_args)
 
115
  except TypeError as e:
116
  return {"error": f"Tool call argument mismatch: {str(e)}"}
117
  except Exception as e:
118
  traceback.print_exc()
119
  return {"error": f"Tool execution failed: {str(e)}"}
120
 
121
+ # ==========================================
122
+ # Main logic
123
+ # ==========================================
124
+ def process_and_describe(payload):
125
+ # If payload is str, try to parse it
126
+ if isinstance(payload, str):
127
+ try:
128
+ payload = json.loads(payload)
129
+ except Exception as e:
130
+ return {"error": f"Invalid JSON string: {str(e)}"}
131
+
132
  vlm_text = ""
133
  tool_result = None
134
  action_data = {}
135
 
136
  try:
 
137
  hf_token = payload.get("hf_token")
138
  if not hf_token:
139
+ return {"error": "HF token not provided in payload."}
140
 
141
  request_id = payload.get("request_id") or payload.get("robot_id") or None
142
  if request_id:
143
  with PROCESSED_LOCK:
144
  if request_id in PROCESSED_REQUESTS:
 
145
  return PROCESSED_REQUESTS[request_id]
146
 
147
  robot_id = payload.get("robot_id", "unknown")
 
149
  if not image_b64:
150
  return {"error": "No image provided in payload."}
151
 
 
152
  local_tmp_path, hf_url, path_in_repo, size_bytes = save_and_upload_image(image_b64, hf_token)
153
  if not hf_url:
154
+ return {"error": "Image upload failed.", "debug": {"local_tmp_path": local_tmp_path, "size_bytes": size_bytes}}
 
 
 
 
 
 
 
 
155
 
156
+ # Build system prompt
157
  tools_desc = json.dumps({
158
  "speak": {"text": "string", "emotion": "string"},
159
  "navigate": {"direction": "forward/left/right", "distance_meters": "float"},
 
179
  }}
180
  """
181
 
 
182
  messages_payload = [
183
  {"role": "system", "content": system_prompt},
184
  {"role": "user", "content": [
 
187
  ]}
188
  ]
189
 
 
190
  hf_client = InferenceClient(token=hf_token)
 
 
191
  chat_completion = hf_client.chat.completions.create(
192
  model=HF_VLM_MODEL,
193
  messages=messages_payload,
 
196
  )
197
 
198
  vlm_text = chat_completion.choices[0].message.content.strip()
 
 
 
199
  parsed = safe_parse_json_from_text(vlm_text)
200
  if parsed is None:
 
201
  result = {
202
  "status": "model_no_json",
203
  "robot_id": robot_id,
 
213
  action_data = parsed
214
  tool_name = action_data.get("tool_name")
215
  tool_args = action_data.get("arguments", {}) or {}
 
 
216
  if not isinstance(tool_args, dict):
217
  tool_args = {}
218
 
 
 
219
  tool_result = validate_and_call_tool(tool_name, tool_args)
220
 
221
  result = {
 
251
  )
252
 
253
  if __name__ == "__main__":
 
254
  iface.launch()