fffiloni commited on
Commit
cb08e69
·
verified ·
1 Parent(s): c75edcb

Improve ZeroGPU duration estimation using video metadata and cache-awareness for faster, more accurate allocation

Browse files
Files changed (1) hide show
  1. app.py +51 -6
app.py CHANGED
@@ -219,7 +219,7 @@ def get_autogaze_setup(device: torch.device | str):
219
  return AUTOGAZE_SETUP
220
 
221
 
222
- def get_processor(
223
  num_video_frames: int,
224
  num_video_frames_thumbnail: int,
225
  max_tiles_video: int,
@@ -229,8 +229,8 @@ def get_processor(
229
  gazing_ratio_thumbnail: float,
230
  task_loss_requirement_thumbnail: float | None,
231
  max_batch_size_autogaze: int,
232
- ):
233
- key = (
234
  num_video_frames,
235
  num_video_frames_thumbnail,
236
  max_tiles_video,
@@ -244,6 +244,30 @@ def get_processor(
244
  max_batch_size_autogaze,
245
  )
246
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
  if key not in PROCESSOR_CACHE:
248
  debug_log(f"[NVILA] creating processor for key={key}")
249
  gazing_ratio_tile = [first_frame_ratio] + [other_frame_ratio] * 15
@@ -1418,6 +1442,18 @@ def estimate_duration(*args):
1418
  + max_new_tokens * 0.35
1419
  )
1420
 
 
 
 
 
 
 
 
 
 
 
 
 
1421
  if video_path and os.path.exists(video_path):
1422
  cache_key = make_nvila_video_cache_key(
1423
  video_path=video_path,
@@ -1474,15 +1510,24 @@ def estimate_duration(*args):
1474
  score -= 10
1475
 
1476
  cold_model = MODEL is None
1477
- cold_processor = len(PROCESSOR_CACHE) == 0
 
1478
 
1479
  if cold_model:
1480
  score += 90
1481
- if cold_processor:
1482
- score += 20
 
 
 
 
 
1483
  if cold_model and not nvila_cache_hit:
1484
  score += 20
1485
 
 
 
 
1486
  score = max(score, 20)
1487
 
1488
  if score <= 50:
 
219
  return AUTOGAZE_SETUP
220
 
221
 
222
+ def make_processor_cache_key(
223
  num_video_frames: int,
224
  num_video_frames_thumbnail: int,
225
  max_tiles_video: int,
 
229
  gazing_ratio_thumbnail: float,
230
  task_loss_requirement_thumbnail: float | None,
231
  max_batch_size_autogaze: int,
232
+ ) -> tuple:
233
+ return (
234
  num_video_frames,
235
  num_video_frames_thumbnail,
236
  max_tiles_video,
 
244
  max_batch_size_autogaze,
245
  )
246
 
247
+
248
+ def get_processor(
249
+ num_video_frames: int,
250
+ num_video_frames_thumbnail: int,
251
+ max_tiles_video: int,
252
+ first_frame_ratio: float,
253
+ other_frame_ratio: float,
254
+ task_loss_requirement_tile: float,
255
+ gazing_ratio_thumbnail: float,
256
+ task_loss_requirement_thumbnail: float | None,
257
+ max_batch_size_autogaze: int,
258
+ ):
259
+ key = make_processor_cache_key(
260
+ num_video_frames=num_video_frames,
261
+ num_video_frames_thumbnail=num_video_frames_thumbnail,
262
+ max_tiles_video=max_tiles_video,
263
+ first_frame_ratio=first_frame_ratio,
264
+ other_frame_ratio=other_frame_ratio,
265
+ task_loss_requirement_tile=task_loss_requirement_tile,
266
+ gazing_ratio_thumbnail=gazing_ratio_thumbnail,
267
+ task_loss_requirement_thumbnail=task_loss_requirement_thumbnail,
268
+ max_batch_size_autogaze=max_batch_size_autogaze,
269
+ )
270
+
271
  if key not in PROCESSOR_CACHE:
272
  debug_log(f"[NVILA] creating processor for key={key}")
273
  gazing_ratio_tile = [first_frame_ratio] + [other_frame_ratio] * 15
 
1442
  + max_new_tokens * 0.35
1443
  )
1444
 
1445
+ processor_key = make_processor_cache_key(
1446
+ num_video_frames=num_video_frames,
1447
+ num_video_frames_thumbnail=num_video_frames_thumbnail,
1448
+ max_tiles_video=max_tiles_video,
1449
+ first_frame_ratio=first_frame_ratio,
1450
+ other_frame_ratio=other_frame_ratio,
1451
+ task_loss_requirement_tile=task_loss_requirement_tile,
1452
+ gazing_ratio_thumbnail=gazing_ratio_thumbnail,
1453
+ task_loss_requirement_thumbnail=thumbnail_task_loss,
1454
+ max_batch_size_autogaze=max_batch_size_autogaze,
1455
+ )
1456
+
1457
  if video_path and os.path.exists(video_path):
1458
  cache_key = make_nvila_video_cache_key(
1459
  video_path=video_path,
 
1510
  score -= 10
1511
 
1512
  cold_model = MODEL is None
1513
+ processor_exact_hit = processor_key in PROCESSOR_CACHE
1514
+ any_processor_loaded = len(PROCESSOR_CACHE) > 0
1515
 
1516
  if cold_model:
1517
  score += 90
1518
+
1519
+ if not processor_exact_hit:
1520
+ if any_processor_loaded:
1521
+ score += 8
1522
+ else:
1523
+ score += 20
1524
+
1525
  if cold_model and not nvila_cache_hit:
1526
  score += 20
1527
 
1528
+ if processor_exact_hit:
1529
+ score -= 5
1530
+
1531
  score = max(score, 20)
1532
 
1533
  if score <= 50: