Forrest Wargo
commited on
Commit
·
582f8ae
1
Parent(s):
cde6e20
unification
Browse files- handler.py +29 -11
handler.py
CHANGED
|
@@ -191,6 +191,8 @@ class EndpointHandler:
|
|
| 191 |
)
|
| 192 |
inputs = {k: (v.to(self.model.device) if hasattr(v, "to") else v) for k, v in inputs.items()}
|
| 193 |
|
|
|
|
|
|
|
| 194 |
with torch.no_grad():
|
| 195 |
out_ids = self.model.generate(
|
| 196 |
**inputs,
|
|
@@ -202,6 +204,7 @@ class EndpointHandler:
|
|
| 202 |
out_text = self.processor.batch_decode(
|
| 203 |
trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
|
| 204 |
)[0]
|
|
|
|
| 205 |
|
| 206 |
# Extract coordinates from model output and rescale to original image
|
| 207 |
def _extract_xy(s: str):
|
|
@@ -215,16 +218,31 @@ class EndpointHandler:
|
|
| 215 |
return None
|
| 216 |
|
| 217 |
pred = _extract_xy(out_text)
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
|
| 230 |
|
|
|
|
| 191 |
)
|
| 192 |
inputs = {k: (v.to(self.model.device) if hasattr(v, "to") else v) for k, v in inputs.items()}
|
| 193 |
|
| 194 |
+
import time
|
| 195 |
+
t0 = time.time()
|
| 196 |
with torch.no_grad():
|
| 197 |
out_ids = self.model.generate(
|
| 198 |
**inputs,
|
|
|
|
| 204 |
out_text = self.processor.batch_decode(
|
| 205 |
trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
|
| 206 |
)[0]
|
| 207 |
+
t1 = time.time()
|
| 208 |
|
| 209 |
# Extract coordinates from model output and rescale to original image
|
| 210 |
def _extract_xy(s: str):
|
|
|
|
| 218 |
return None
|
| 219 |
|
| 220 |
pred = _extract_xy(out_text)
|
| 221 |
+
if user_text is not None:
|
| 222 |
+
try:
|
| 223 |
+
print(f"[gta1-endpoint] Prompt: {user_text}")
|
| 224 |
+
except Exception:
|
| 225 |
+
pass
|
| 226 |
+
try:
|
| 227 |
+
print(f"[gta1-endpoint] Raw output: {out_text}")
|
| 228 |
+
except Exception:
|
| 229 |
+
pass
|
| 230 |
+
try:
|
| 231 |
+
print(f"[gta1-endpoint] Inference time: {t1 - t0:.3f}s")
|
| 232 |
+
except Exception:
|
| 233 |
+
pass
|
| 234 |
+
|
| 235 |
+
if pred is None or not (width and height):
|
| 236 |
+
return {"error": "Failed to parse coordinates or missing image dimensions."}
|
| 237 |
+
|
| 238 |
+
# The model returns pixel coordinates on the input image; we did not pre-resize
|
| 239 |
+
px = max(0.0, min(float(pred[0]), float(width)))
|
| 240 |
+
py = max(0.0, min(float(pred[1]), float(height)))
|
| 241 |
+
nx = px / float(width)
|
| 242 |
+
ny = py / float(height)
|
| 243 |
+
return {
|
| 244 |
+
"points": [{"x": nx, "y": ny}],
|
| 245 |
+
"raw": out_text,
|
| 246 |
+
}
|
| 247 |
|
| 248 |
|