Spaces:
Running on Zero
Running on Zero
Improve ZeroGPU duration estimation using video metadata and cache-awareness for faster, more accurate allocation
Browse files
app.py
CHANGED
|
@@ -219,7 +219,7 @@ def get_autogaze_setup(device: torch.device | str):
|
|
| 219 |
return AUTOGAZE_SETUP
|
| 220 |
|
| 221 |
|
| 222 |
-
def
|
| 223 |
num_video_frames: int,
|
| 224 |
num_video_frames_thumbnail: int,
|
| 225 |
max_tiles_video: int,
|
|
@@ -229,8 +229,8 @@ def get_processor(
|
|
| 229 |
gazing_ratio_thumbnail: float,
|
| 230 |
task_loss_requirement_thumbnail: float | None,
|
| 231 |
max_batch_size_autogaze: int,
|
| 232 |
-
):
|
| 233 |
-
|
| 234 |
num_video_frames,
|
| 235 |
num_video_frames_thumbnail,
|
| 236 |
max_tiles_video,
|
|
@@ -244,6 +244,30 @@ def get_processor(
|
|
| 244 |
max_batch_size_autogaze,
|
| 245 |
)
|
| 246 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
if key not in PROCESSOR_CACHE:
|
| 248 |
debug_log(f"[NVILA] creating processor for key={key}")
|
| 249 |
gazing_ratio_tile = [first_frame_ratio] + [other_frame_ratio] * 15
|
|
@@ -1418,6 +1442,18 @@ def estimate_duration(*args):
|
|
| 1418 |
+ max_new_tokens * 0.35
|
| 1419 |
)
|
| 1420 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1421 |
if video_path and os.path.exists(video_path):
|
| 1422 |
cache_key = make_nvila_video_cache_key(
|
| 1423 |
video_path=video_path,
|
|
@@ -1474,15 +1510,24 @@ def estimate_duration(*args):
|
|
| 1474 |
score -= 10
|
| 1475 |
|
| 1476 |
cold_model = MODEL is None
|
| 1477 |
-
|
|
|
|
| 1478 |
|
| 1479 |
if cold_model:
|
| 1480 |
score += 90
|
| 1481 |
-
|
| 1482 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1483 |
if cold_model and not nvila_cache_hit:
|
| 1484 |
score += 20
|
| 1485 |
|
|
|
|
|
|
|
|
|
|
| 1486 |
score = max(score, 20)
|
| 1487 |
|
| 1488 |
if score <= 50:
|
|
|
|
| 219 |
return AUTOGAZE_SETUP
|
| 220 |
|
| 221 |
|
| 222 |
+
def make_processor_cache_key(
|
| 223 |
num_video_frames: int,
|
| 224 |
num_video_frames_thumbnail: int,
|
| 225 |
max_tiles_video: int,
|
|
|
|
| 229 |
gazing_ratio_thumbnail: float,
|
| 230 |
task_loss_requirement_thumbnail: float | None,
|
| 231 |
max_batch_size_autogaze: int,
|
| 232 |
+
) -> tuple:
|
| 233 |
+
return (
|
| 234 |
num_video_frames,
|
| 235 |
num_video_frames_thumbnail,
|
| 236 |
max_tiles_video,
|
|
|
|
| 244 |
max_batch_size_autogaze,
|
| 245 |
)
|
| 246 |
|
| 247 |
+
|
| 248 |
+
def get_processor(
|
| 249 |
+
num_video_frames: int,
|
| 250 |
+
num_video_frames_thumbnail: int,
|
| 251 |
+
max_tiles_video: int,
|
| 252 |
+
first_frame_ratio: float,
|
| 253 |
+
other_frame_ratio: float,
|
| 254 |
+
task_loss_requirement_tile: float,
|
| 255 |
+
gazing_ratio_thumbnail: float,
|
| 256 |
+
task_loss_requirement_thumbnail: float | None,
|
| 257 |
+
max_batch_size_autogaze: int,
|
| 258 |
+
):
|
| 259 |
+
key = make_processor_cache_key(
|
| 260 |
+
num_video_frames=num_video_frames,
|
| 261 |
+
num_video_frames_thumbnail=num_video_frames_thumbnail,
|
| 262 |
+
max_tiles_video=max_tiles_video,
|
| 263 |
+
first_frame_ratio=first_frame_ratio,
|
| 264 |
+
other_frame_ratio=other_frame_ratio,
|
| 265 |
+
task_loss_requirement_tile=task_loss_requirement_tile,
|
| 266 |
+
gazing_ratio_thumbnail=gazing_ratio_thumbnail,
|
| 267 |
+
task_loss_requirement_thumbnail=task_loss_requirement_thumbnail,
|
| 268 |
+
max_batch_size_autogaze=max_batch_size_autogaze,
|
| 269 |
+
)
|
| 270 |
+
|
| 271 |
if key not in PROCESSOR_CACHE:
|
| 272 |
debug_log(f"[NVILA] creating processor for key={key}")
|
| 273 |
gazing_ratio_tile = [first_frame_ratio] + [other_frame_ratio] * 15
|
|
|
|
| 1442 |
+ max_new_tokens * 0.35
|
| 1443 |
)
|
| 1444 |
|
| 1445 |
+
processor_key = make_processor_cache_key(
|
| 1446 |
+
num_video_frames=num_video_frames,
|
| 1447 |
+
num_video_frames_thumbnail=num_video_frames_thumbnail,
|
| 1448 |
+
max_tiles_video=max_tiles_video,
|
| 1449 |
+
first_frame_ratio=first_frame_ratio,
|
| 1450 |
+
other_frame_ratio=other_frame_ratio,
|
| 1451 |
+
task_loss_requirement_tile=task_loss_requirement_tile,
|
| 1452 |
+
gazing_ratio_thumbnail=gazing_ratio_thumbnail,
|
| 1453 |
+
task_loss_requirement_thumbnail=thumbnail_task_loss,
|
| 1454 |
+
max_batch_size_autogaze=max_batch_size_autogaze,
|
| 1455 |
+
)
|
| 1456 |
+
|
| 1457 |
if video_path and os.path.exists(video_path):
|
| 1458 |
cache_key = make_nvila_video_cache_key(
|
| 1459 |
video_path=video_path,
|
|
|
|
| 1510 |
score -= 10
|
| 1511 |
|
| 1512 |
cold_model = MODEL is None
|
| 1513 |
+
processor_exact_hit = processor_key in PROCESSOR_CACHE
|
| 1514 |
+
any_processor_loaded = len(PROCESSOR_CACHE) > 0
|
| 1515 |
|
| 1516 |
if cold_model:
|
| 1517 |
score += 90
|
| 1518 |
+
|
| 1519 |
+
if not processor_exact_hit:
|
| 1520 |
+
if any_processor_loaded:
|
| 1521 |
+
score += 8
|
| 1522 |
+
else:
|
| 1523 |
+
score += 20
|
| 1524 |
+
|
| 1525 |
if cold_model and not nvila_cache_hit:
|
| 1526 |
score += 20
|
| 1527 |
|
| 1528 |
+
if processor_exact_hit:
|
| 1529 |
+
score -= 5
|
| 1530 |
+
|
| 1531 |
score = max(score, 20)
|
| 1532 |
|
| 1533 |
if score <= 50:
|