Nekochu commited on
Commit
807e510
·
1 Parent(s): 8fb3a26

Cap 5B ORT cache + 6 more demo images

Browse files
app.py CHANGED
@@ -233,6 +233,12 @@ def _get_dense_model(task: str, size: str):
233
  os.makedirs(local_dir, exist_ok=True)
234
  ckpt = hf_hub_download(repo_id=spec["repo"], filename=spec["filename"], local_dir=local_dir)
235
 
 
 
 
 
 
 
236
  model = init_model(config, ckpt, device="cpu")
237
 
238
  while len(_MODELS) >= _MAX_CACHED:
@@ -273,6 +279,13 @@ def _get_pose_model(size: str):
273
  local_dir = f"/tmp/sapiens_models/pose-{size}"
274
  os.makedirs(local_dir, exist_ok=True)
275
  ckpt = hf_hub_download(repo_id=spec["repo"], filename=spec["filename"], local_dir=local_dir)
 
 
 
 
 
 
 
276
  model = init_model(config, ckpt, device="cpu")
277
 
278
  codec_cfg = dict(model.cfg.codec)
@@ -396,7 +409,9 @@ def _infer_pointmap(image_bgr, model):
396
 
397
  # --- 5B INT8 ONNX path -------------------------------------------------------
398
  def _get_ort_session(task: str):
399
- """Lazy-load + cache an ORT session for {task}_5b_int8.onnx."""
 
 
400
  key = (task, "5b")
401
  sess = _ORT_SESSIONS.get(key)
402
  if sess is not None:
@@ -406,10 +421,21 @@ def _get_ort_session(task: str):
406
  spec = VARIANTS[key]
407
  cache_dir = os.environ.get("ONNX_5B_CACHE", "/app/onnx_5b")
408
  os.makedirs(cache_dir, exist_ok=True)
409
- # Download both the graph .onnx and its external-data sidecar.
410
  fn = spec["onnx_filename"]
411
  onnx_path = hf_hub_download(repo_id=spec["onnx_repo"], filename=fn, local_dir=cache_dir)
412
  hf_hub_download(repo_id=spec["onnx_repo"], filename=fn + ".data", local_dir=cache_dir)
 
 
 
 
 
 
 
 
 
 
 
 
413
  sess = ort.InferenceSession(onnx_path, providers=["CPUExecutionProvider"])
414
  _ORT_SESSIONS[key] = sess
415
  return sess
 
233
  os.makedirs(local_dir, exist_ok=True)
234
  ckpt = hf_hub_download(repo_id=spec["repo"], filename=spec["filename"], local_dir=local_dir)
235
 
236
+ # If a 5B ORT session (5-6 GB) is resident, evict it before init_model allocates the 1B dense weights.
237
+ if size == "1b" and _ORT_SESSIONS:
238
+ _ORT_SESSIONS.clear()
239
+ import gc
240
+ gc.collect()
241
+
242
  model = init_model(config, ckpt, device="cpu")
243
 
244
  while len(_MODELS) >= _MAX_CACHED:
 
279
  local_dir = f"/tmp/sapiens_models/pose-{size}"
280
  os.makedirs(local_dir, exist_ok=True)
281
  ckpt = hf_hub_download(repo_id=spec["repo"], filename=spec["filename"], local_dir=local_dir)
282
+
283
+ # Drop a resident 5B ORT session before allocating the 1B pose weights.
284
+ if size == "1b" and _ORT_SESSIONS:
285
+ _ORT_SESSIONS.clear()
286
+ import gc
287
+ gc.collect()
288
+
289
  model = init_model(config, ckpt, device="cpu")
290
 
291
  codec_cfg = dict(model.cfg.codec)
 
409
 
410
  # --- 5B INT8 ONNX path -------------------------------------------------------
411
  def _get_ort_session(task: str):
412
+ """Lazy-load + cache an ORT session for {task}_5b_int8.onnx.
413
+ Each 5B session is 5-6 GB RAM. cpu-basic has 16 GB total, so keep at most one
414
+ 5B session live and evict cached dense/pose PyTorch models that would push us OOM."""
415
  key = (task, "5b")
416
  sess = _ORT_SESSIONS.get(key)
417
  if sess is not None:
 
421
  spec = VARIANTS[key]
422
  cache_dir = os.environ.get("ONNX_5B_CACHE", "/app/onnx_5b")
423
  os.makedirs(cache_dir, exist_ok=True)
 
424
  fn = spec["onnx_filename"]
425
  onnx_path = hf_hub_download(repo_id=spec["onnx_repo"], filename=fn, local_dir=cache_dir)
426
  hf_hub_download(repo_id=spec["onnx_repo"], filename=fn + ".data", local_dir=cache_dir)
427
+ # Evict any prior 5B ORT session and any 1b dense models — they together exceed 16 GB.
428
+ import gc
429
+ if _ORT_SESSIONS:
430
+ _ORT_SESSIONS.clear()
431
+ gc.collect()
432
+ for k in list(_MODELS.keys()):
433
+ if k[1] in ("1b", "0.8b"):
434
+ del _MODELS[k]
435
+ for k in list(_POSE_MODELS.keys()):
436
+ if k[1] in ("1b", "0.8b"):
437
+ del _POSE_MODELS[k]
438
+ gc.collect()
439
  sess = ort.InferenceSession(onnx_path, providers=["CPUExecutionProvider"])
440
  _ORT_SESSIONS[key] = sess
441
  return sess
assets/images/pexels-alimuart-16152561.jpg ADDED

Git LFS Details

  • SHA256: 7899a084900cd741959c886d2b7bad00e042d5def226bdb554cde28e7da98cdf
  • Pointer size: 132 Bytes
  • Size of remote file: 1.59 MB
assets/images/pexels-manish-jain-1176829519-31654008.jpg ADDED

Git LFS Details

  • SHA256: 6f6ae7b2e3e71304efdb26466689f2df4a1fe767d8f9c93e3dd79ffd6b4a6f50
  • Pointer size: 132 Bytes
  • Size of remote file: 2.39 MB
assets/images/pexels-mehmet-altintas-392989477-31615337.jpg ADDED

Git LFS Details

  • SHA256: 2986083665052e10bbefb7363c5d1e98c3fe0b8cc4e5beb6d6bb7e2cfd01fae4
  • Pointer size: 132 Bytes
  • Size of remote file: 1.92 MB
assets/images/pexels-mihaela-claudia-puscas-836545137-31903488.jpg ADDED

Git LFS Details

  • SHA256: 1b9f09f929eb0eae0f559b766cbe86c01ae0510d36af5d33e776163197123850
  • Pointer size: 132 Bytes
  • Size of remote file: 1.11 MB
assets/images/pexels-moph-29727777.jpg ADDED

Git LFS Details

  • SHA256: 3afb657363bb4b689796d5f3bc47a9a16084921fbd81832bfffe37c08d3c1d86
  • Pointer size: 131 Bytes
  • Size of remote file: 364 kB
assets/images/pexels-ruslan-rozanov-174297693-11404049.jpg ADDED

Git LFS Details

  • SHA256: 717fc317962427bb90655269957ca1d8dbcaeba3f45c175e96ef539d385d6e9f
  • Pointer size: 132 Bytes
  • Size of remote file: 2.71 MB