EvalsOnTheHub / utils.py
Clémentine
change it into a command writer
c97be0a
import gradio as gr
import subprocess
import os
from huggingface_hub import list_models, model_info, run_job, inspect_job, whoami
from lighteval.tasks.registry import Registry
from lighteval.utils.utils import as_list
def get_models():
"""Get list of popular text generation models from Hugging Face"""
try:
models = list_models(
filter="text-generation",
sort="likes",
direction=-1,
limit=1000,
expand="inferenceProviderMapping"
)
model_ids = [
model.id
for model in models
if hasattr(model, 'inference_provider_mapping') and model.inference_provider_mapping
]
return model_ids
except Exception as e:
print(f"Error fetching models: {e}")
return []
def get_providers_for_model(model_id):
"""Get inference providers for a specific model"""
if not model_id:
return gr.update(choices=["NO PROVIDER"], value=None)
try:
info = model_info(model_id, expand="inferenceProviderMapping")
if hasattr(info, 'inference_provider_mapping') and info.inference_provider_mapping:
providers = [p.provider for p in info.inference_provider_mapping]
return gr.update(choices=providers, value=providers[0] if providers else None)
else:
return gr.update(choices=[], value=None)
except Exception as e:
print(f"Error fetching providers for {model_id}: {e}")
return gr.update(choices=["NO PROVIDER"], value=None)
# Cache tasks at module level to avoid reloading
_TASKS_CACHE = None
def get_available_tasks():
"""Get list of available lighteval tasks"""
global _TASKS_CACHE
if _TASKS_CACHE is not None:
return _TASKS_CACHE
try:
print("Loading lighteval tasks...")
registry = Registry()
# We only want generative metrics as the others won't run with inference providers
tasks_list = [
f"{config.suite[0]}|{name}"
for name, config in registry.load_tasks().items()
if all(metric.category.value == "GENERATIVE" for metric in config.metrics)
]
tasks = sorted(tasks_list)
_TASKS_CACHE = tasks
print(f"Loaded {len(tasks)} tasks")
return tasks
except Exception as e:
print(f"Error fetching tasks: {e}")
return []
def build_lighteval_command(
model = "MODEL",
provider = "PROVIDER",
tasks = "TASKS",
results_org = "YOUR_ORG",
username="YOUR_USERNAME"
):
"""Build run_job command from selections"""
tasks = as_list(tasks)
model_provider_arg = f'model_name={model},provider={provider}'
task_args = ','.join([f'{task}' for task in tasks])
# Build the Python code string
command = f'''from huggingface_hub import run_job, inspect_job
import os
# The token must be able to call inference providers and write to the org you selected on your behalf
# Test the command with `"--max-samples", "10",` first, and remove to run a full evaluation job
job = run_job(
image="hf.co/spaces/OpenEvals/EvalsOnTheHub",
command=[
"lighteval", "endpoint", "inference-providers",
"{model_provider_arg}",
"{task_args}",
"--push-to-hub", "--save-details",
"--results-org", "{results_org}"
],
namespace="{username}",
secrets={{"HF_TOKEN": os.getenv("HF_TOKEN")}},
token=os.getenv("HF_TOKEN")
)'''
return command