Spaces:
Running
Running
Cap 5B ORT cache + 6 more demo images
Browse files- app.py +28 -2
- assets/images/pexels-alimuart-16152561.jpg +3 -0
- assets/images/pexels-manish-jain-1176829519-31654008.jpg +3 -0
- assets/images/pexels-mehmet-altintas-392989477-31615337.jpg +3 -0
- assets/images/pexels-mihaela-claudia-puscas-836545137-31903488.jpg +3 -0
- assets/images/pexels-moph-29727777.jpg +3 -0
- assets/images/pexels-ruslan-rozanov-174297693-11404049.jpg +3 -0
app.py
CHANGED
|
@@ -233,6 +233,12 @@ def _get_dense_model(task: str, size: str):
|
|
| 233 |
os.makedirs(local_dir, exist_ok=True)
|
| 234 |
ckpt = hf_hub_download(repo_id=spec["repo"], filename=spec["filename"], local_dir=local_dir)
|
| 235 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
model = init_model(config, ckpt, device="cpu")
|
| 237 |
|
| 238 |
while len(_MODELS) >= _MAX_CACHED:
|
|
@@ -273,6 +279,13 @@ def _get_pose_model(size: str):
|
|
| 273 |
local_dir = f"/tmp/sapiens_models/pose-{size}"
|
| 274 |
os.makedirs(local_dir, exist_ok=True)
|
| 275 |
ckpt = hf_hub_download(repo_id=spec["repo"], filename=spec["filename"], local_dir=local_dir)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
model = init_model(config, ckpt, device="cpu")
|
| 277 |
|
| 278 |
codec_cfg = dict(model.cfg.codec)
|
|
@@ -396,7 +409,9 @@ def _infer_pointmap(image_bgr, model):
|
|
| 396 |
|
| 397 |
# --- 5B INT8 ONNX path -------------------------------------------------------
|
| 398 |
def _get_ort_session(task: str):
|
| 399 |
-
"""Lazy-load + cache an ORT session for {task}_5b_int8.onnx.
|
|
|
|
|
|
|
| 400 |
key = (task, "5b")
|
| 401 |
sess = _ORT_SESSIONS.get(key)
|
| 402 |
if sess is not None:
|
|
@@ -406,10 +421,21 @@ def _get_ort_session(task: str):
|
|
| 406 |
spec = VARIANTS[key]
|
| 407 |
cache_dir = os.environ.get("ONNX_5B_CACHE", "/app/onnx_5b")
|
| 408 |
os.makedirs(cache_dir, exist_ok=True)
|
| 409 |
-
# Download both the graph .onnx and its external-data sidecar.
|
| 410 |
fn = spec["onnx_filename"]
|
| 411 |
onnx_path = hf_hub_download(repo_id=spec["onnx_repo"], filename=fn, local_dir=cache_dir)
|
| 412 |
hf_hub_download(repo_id=spec["onnx_repo"], filename=fn + ".data", local_dir=cache_dir)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 413 |
sess = ort.InferenceSession(onnx_path, providers=["CPUExecutionProvider"])
|
| 414 |
_ORT_SESSIONS[key] = sess
|
| 415 |
return sess
|
|
|
|
| 233 |
os.makedirs(local_dir, exist_ok=True)
|
| 234 |
ckpt = hf_hub_download(repo_id=spec["repo"], filename=spec["filename"], local_dir=local_dir)
|
| 235 |
|
| 236 |
+
# If a 5B ORT session (5-6 GB) is resident, evict it before init_model allocates the 1B dense weights.
|
| 237 |
+
if size == "1b" and _ORT_SESSIONS:
|
| 238 |
+
_ORT_SESSIONS.clear()
|
| 239 |
+
import gc
|
| 240 |
+
gc.collect()
|
| 241 |
+
|
| 242 |
model = init_model(config, ckpt, device="cpu")
|
| 243 |
|
| 244 |
while len(_MODELS) >= _MAX_CACHED:
|
|
|
|
| 279 |
local_dir = f"/tmp/sapiens_models/pose-{size}"
|
| 280 |
os.makedirs(local_dir, exist_ok=True)
|
| 281 |
ckpt = hf_hub_download(repo_id=spec["repo"], filename=spec["filename"], local_dir=local_dir)
|
| 282 |
+
|
| 283 |
+
# Drop a resident 5B ORT session before allocating the 1B pose weights.
|
| 284 |
+
if size == "1b" and _ORT_SESSIONS:
|
| 285 |
+
_ORT_SESSIONS.clear()
|
| 286 |
+
import gc
|
| 287 |
+
gc.collect()
|
| 288 |
+
|
| 289 |
model = init_model(config, ckpt, device="cpu")
|
| 290 |
|
| 291 |
codec_cfg = dict(model.cfg.codec)
|
|
|
|
| 409 |
|
| 410 |
# --- 5B INT8 ONNX path -------------------------------------------------------
|
| 411 |
def _get_ort_session(task: str):
|
| 412 |
+
"""Lazy-load + cache an ORT session for {task}_5b_int8.onnx.
|
| 413 |
+
Each 5B session is 5-6 GB RAM. cpu-basic has 16 GB total, so keep at most one
|
| 414 |
+
5B session live and evict cached dense/pose PyTorch models that would push us OOM."""
|
| 415 |
key = (task, "5b")
|
| 416 |
sess = _ORT_SESSIONS.get(key)
|
| 417 |
if sess is not None:
|
|
|
|
| 421 |
spec = VARIANTS[key]
|
| 422 |
cache_dir = os.environ.get("ONNX_5B_CACHE", "/app/onnx_5b")
|
| 423 |
os.makedirs(cache_dir, exist_ok=True)
|
|
|
|
| 424 |
fn = spec["onnx_filename"]
|
| 425 |
onnx_path = hf_hub_download(repo_id=spec["onnx_repo"], filename=fn, local_dir=cache_dir)
|
| 426 |
hf_hub_download(repo_id=spec["onnx_repo"], filename=fn + ".data", local_dir=cache_dir)
|
| 427 |
+
# Evict any prior 5B ORT session and any 1b dense models — they together exceed 16 GB.
|
| 428 |
+
import gc
|
| 429 |
+
if _ORT_SESSIONS:
|
| 430 |
+
_ORT_SESSIONS.clear()
|
| 431 |
+
gc.collect()
|
| 432 |
+
for k in list(_MODELS.keys()):
|
| 433 |
+
if k[1] in ("1b", "0.8b"):
|
| 434 |
+
del _MODELS[k]
|
| 435 |
+
for k in list(_POSE_MODELS.keys()):
|
| 436 |
+
if k[1] in ("1b", "0.8b"):
|
| 437 |
+
del _POSE_MODELS[k]
|
| 438 |
+
gc.collect()
|
| 439 |
sess = ort.InferenceSession(onnx_path, providers=["CPUExecutionProvider"])
|
| 440 |
_ORT_SESSIONS[key] = sess
|
| 441 |
return sess
|
assets/images/pexels-alimuart-16152561.jpg
ADDED
|
Git LFS Details
|
assets/images/pexels-manish-jain-1176829519-31654008.jpg
ADDED
|
Git LFS Details
|
assets/images/pexels-mehmet-altintas-392989477-31615337.jpg
ADDED
|
Git LFS Details
|
assets/images/pexels-mihaela-claudia-puscas-836545137-31903488.jpg
ADDED
|
Git LFS Details
|
assets/images/pexels-moph-29727777.jpg
ADDED
|
Git LFS Details
|
assets/images/pexels-ruslan-rozanov-174297693-11404049.jpg
ADDED
|
Git LFS Details
|