Spaces:

fffiloni
/

NVILA-HD-Video-AutoGaze

Running on Zero

App Files Files Community

fffiloni commited on 26 days ago

Commit

cb08e69

verified ·

1 Parent(s): c75edcb

Improve ZeroGPU duration estimation using video metadata and cache-awareness for faster, more accurate allocation

Browse files

Files changed (1) hide show

app.py +51 -6

app.py CHANGED Viewed

@@ -219,7 +219,7 @@ def get_autogaze_setup(device: torch.device | str):
     return AUTOGAZE_SETUP
-def get_processor(
     num_video_frames: int,
     num_video_frames_thumbnail: int,
     max_tiles_video: int,
@@ -229,8 +229,8 @@ def get_processor(
     gazing_ratio_thumbnail: float,
     task_loss_requirement_thumbnail: float | None,
     max_batch_size_autogaze: int,
-):
-    key = (
         num_video_frames,
         num_video_frames_thumbnail,
         max_tiles_video,
@@ -244,6 +244,30 @@ def get_processor(
         max_batch_size_autogaze,
     )
     if key not in PROCESSOR_CACHE:
         debug_log(f"[NVILA] creating processor for key={key}")
         gazing_ratio_tile = [first_frame_ratio] + [other_frame_ratio] * 15
@@ -1418,6 +1442,18 @@ def estimate_duration(*args):
         + max_new_tokens * 0.35
     )
     if video_path and os.path.exists(video_path):
         cache_key = make_nvila_video_cache_key(
             video_path=video_path,
@@ -1474,15 +1510,24 @@ def estimate_duration(*args):
         score -= 10
     cold_model = MODEL is None
-    cold_processor = len(PROCESSOR_CACHE) == 0
     if cold_model:
         score += 90
-    if cold_processor:
-        score += 20
     if cold_model and not nvila_cache_hit:
         score += 20
     score = max(score, 20)
     if score <= 50:

     return AUTOGAZE_SETUP
+def make_processor_cache_key(
     num_video_frames: int,
     num_video_frames_thumbnail: int,
     max_tiles_video: int,
     gazing_ratio_thumbnail: float,
     task_loss_requirement_thumbnail: float | None,
     max_batch_size_autogaze: int,
+) -> tuple:
+    return (
         num_video_frames,
         num_video_frames_thumbnail,
         max_tiles_video,
         max_batch_size_autogaze,
     )
+def get_processor(
+    num_video_frames: int,
+    num_video_frames_thumbnail: int,
+    max_tiles_video: int,
+    first_frame_ratio: float,
+    other_frame_ratio: float,
+    task_loss_requirement_tile: float,
+    gazing_ratio_thumbnail: float,
+    task_loss_requirement_thumbnail: float | None,
+    max_batch_size_autogaze: int,
+):
+    key = make_processor_cache_key(
+        num_video_frames=num_video_frames,
+        num_video_frames_thumbnail=num_video_frames_thumbnail,
+        max_tiles_video=max_tiles_video,
+        first_frame_ratio=first_frame_ratio,
+        other_frame_ratio=other_frame_ratio,
+        task_loss_requirement_tile=task_loss_requirement_tile,
+        gazing_ratio_thumbnail=gazing_ratio_thumbnail,
+        task_loss_requirement_thumbnail=task_loss_requirement_thumbnail,
+        max_batch_size_autogaze=max_batch_size_autogaze,
+    )
     if key not in PROCESSOR_CACHE:
         debug_log(f"[NVILA] creating processor for key={key}")
         gazing_ratio_tile = [first_frame_ratio] + [other_frame_ratio] * 15
         + max_new_tokens * 0.35
     )
+    processor_key = make_processor_cache_key(
+        num_video_frames=num_video_frames,
+        num_video_frames_thumbnail=num_video_frames_thumbnail,
+        max_tiles_video=max_tiles_video,
+        first_frame_ratio=first_frame_ratio,
+        other_frame_ratio=other_frame_ratio,
+        task_loss_requirement_tile=task_loss_requirement_tile,
+        gazing_ratio_thumbnail=gazing_ratio_thumbnail,
+        task_loss_requirement_thumbnail=thumbnail_task_loss,
+        max_batch_size_autogaze=max_batch_size_autogaze,
+    )
     if video_path and os.path.exists(video_path):
         cache_key = make_nvila_video_cache_key(
             video_path=video_path,
         score -= 10
     cold_model = MODEL is None
+    processor_exact_hit = processor_key in PROCESSOR_CACHE
+    any_processor_loaded = len(PROCESSOR_CACHE) > 0
     if cold_model:
         score += 90
+    if not processor_exact_hit:
+        if any_processor_loaded:
+            score += 8
+        else:
+            score += 20
     if cold_model and not nvila_cache_hit:
         score += 20
+    if processor_exact_hit:
+        score -= 5
     score = max(score, 20)
     if score <= 50: