Spaces:

Intel
/

low_bit_open_llm_leaderboard

Running

App Files Files Community

low_bit_open_llm_leaderboard / src /queue_eta.py

wenjiao

refactor repo code

b15b21e about 24 hours ago

raw

history blame contribute delete

3.61 kB

	"""Queue ETA estimation for the leaderboard.

	Provides a slot-simulation based ETA calculation:

	ETA(pos) = running_remaining + ⌈pos / concurrency⌉ × task_duration

	where task_duration depends on model size:
	- ≤ 30B params → 3 hours
	- > 30B params → 5 hours

	running_remaining is approximated as the average estimated task duration
	of currently active (Running/Waiting/Triggered) entries, because we don't
	know exactly when each one started.
	"""

	import json
	import logging
	import math
	import os
	from datetime import datetime, timedelta

	logger = logging.getLogger(__name__)

	# ── Task duration estimates (hours) ──────────────────────────────────────
	_SMALL_MODEL_HOURS = 3 # ≤ 30B
	_LARGE_MODEL_HOURS = 5 # > 30B
	_SIZE_THRESHOLD_B = 30.0 # billions of params


	def estimate_task_hours(model_params: float \| None) -> float:
	"""Return estimated task duration in hours based on model size."""
	if model_params is None or model_params <= 0:
	return _SMALL_MODEL_HOURS # default when unknown
	if model_params > _SIZE_THRESHOLD_B:
	return _LARGE_MODEL_HOURS
	return _SMALL_MODEL_HOURS


	def compute_single_eta(
	status_path: str,
	model_params: float \| None,
	concurrency: int = 2,
	) -> float:
	"""Compute ETA for a newly submitted model (it goes to the end of the queue).

	Used by submit.py to show the user an immediate estimate.
	"""
	active_count = 0
	pending_count = 0
	active_hours_sum = 0.0

	if os.path.isdir(status_path):
	for root, _dirs, files in os.walk(status_path):
	for fname in files:
	if not fname.endswith(".json"):
	continue
	fpath = os.path.join(root, fname)
	try:
	with open(fpath) as f:
	data = json.load(f)
	except (json.JSONDecodeError, OSError):
	continue

	status = data.get("status", "")
	script = data.get("script", "")

	if status in ("Running", "Waiting", "Triggered"):
	active_count += 1
	active_hours_sum += estimate_task_hours(_get_params(data))
	elif status == "Pending" and script in ("auto_quant", "auto_eval"):
	pending_count += 1

	# The model's own status file has already been written by _upload_to_hub,
	# so it is already included in pending_count.
	queue_pos = max(pending_count, 1)
	task_hours = estimate_task_hours(model_params)

	if active_count > 0:
	running_remaining = active_hours_sum / active_count
	else:
	running_remaining = 0.0

	eta = running_remaining + math.ceil(queue_pos / concurrency) * task_hours
	return round(eta, 1)


	def format_eta(hours: float) -> str:
	"""Format ETA hours into a human-readable string like '~6h' or '~1d 2h'."""
	if hours <= 0:
	return "< 1h"
	total_h = int(math.ceil(hours))
	if total_h < 24:
	return f"~{total_h}h"
	days = total_h // 24
	remaining_h = total_h % 24
	if remaining_h == 0:
	return f"~{days}d"
	return f"~{days}d {remaining_h}h"


	def _get_params(data: dict) -> float \| None:
	"""Extract model parameter count (in billions) from a status entry."""
	for key in ("model_params", "params"):
	val = data.get(key)
	if val is not None:
	try:
	return float(val)
	except (TypeError, ValueError):
	pass
	return None