Forrest Wargo commited on
Commit
582f8ae
·
1 Parent(s): cde6e20

unification

Browse files
Files changed (1) hide show
  1. handler.py +29 -11
handler.py CHANGED
@@ -191,6 +191,8 @@ class EndpointHandler:
191
  )
192
  inputs = {k: (v.to(self.model.device) if hasattr(v, "to") else v) for k, v in inputs.items()}
193
 
 
 
194
  with torch.no_grad():
195
  out_ids = self.model.generate(
196
  **inputs,
@@ -202,6 +204,7 @@ class EndpointHandler:
202
  out_text = self.processor.batch_decode(
203
  trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
204
  )[0]
 
205
 
206
  # Extract coordinates from model output and rescale to original image
207
  def _extract_xy(s: str):
@@ -215,16 +218,31 @@ class EndpointHandler:
215
  return None
216
 
217
  pred = _extract_xy(out_text)
218
- payload: Dict[str, Any] = {"raw": out_text}
219
- if width and height:
220
- payload.update({"width": width, "height": height})
221
- if pred is not None and width and height:
222
- # The model returns pixel coordinates on the input image; we did not pre-resize
223
- px = max(0.0, min(float(pred[0]), float(width)))
224
- py = max(0.0, min(float(pred[1]), float(height)))
225
- nx = px / float(width)
226
- ny = py / float(height)
227
- payload.update({"x": nx, "y": ny})
228
- return payload
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
 
230
 
 
191
  )
192
  inputs = {k: (v.to(self.model.device) if hasattr(v, "to") else v) for k, v in inputs.items()}
193
 
194
+ import time
195
+ t0 = time.time()
196
  with torch.no_grad():
197
  out_ids = self.model.generate(
198
  **inputs,
 
204
  out_text = self.processor.batch_decode(
205
  trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
206
  )[0]
207
+ t1 = time.time()
208
 
209
  # Extract coordinates from model output and rescale to original image
210
  def _extract_xy(s: str):
 
218
  return None
219
 
220
  pred = _extract_xy(out_text)
221
+ if user_text is not None:
222
+ try:
223
+ print(f"[gta1-endpoint] Prompt: {user_text}")
224
+ except Exception:
225
+ pass
226
+ try:
227
+ print(f"[gta1-endpoint] Raw output: {out_text}")
228
+ except Exception:
229
+ pass
230
+ try:
231
+ print(f"[gta1-endpoint] Inference time: {t1 - t0:.3f}s")
232
+ except Exception:
233
+ pass
234
+
235
+ if pred is None or not (width and height):
236
+ return {"error": "Failed to parse coordinates or missing image dimensions."}
237
+
238
+ # The model returns pixel coordinates on the input image; we did not pre-resize
239
+ px = max(0.0, min(float(pred[0]), float(width)))
240
+ py = max(0.0, min(float(pred[1]), float(height)))
241
+ nx = px / float(width)
242
+ ny = py / float(height)
243
+ return {
244
+ "points": [{"x": nx, "y": ny}],
245
+ "raw": out_text,
246
+ }
247
 
248