Spaces:

OpenEvals
/

EvalsOnTheHub

Running on CPU Upgrade

File size: 3,497 Bytes

70a6b6b
 
 
c97be0a
70a6b6b
c97be0a
70a6b6b
 
 
 
 
c97be0a
 
70a6b6b
c97be0a
 
70a6b6b
c97be0a
 
 
 
 
 
70a6b6b
 
 
c97be0a
70a6b6b
 
 
 
c97be0a
70a6b6b
 
 
 
 
 
 
 
 
 
c97be0a
70a6b6b
 
 
 
 
 
 
 
 
 
 
 
 
c97be0a
 
 
 
 
 
70a6b6b
 
 
 
 
 
 
 
c97be0a
 
 
 
 
 
 
 
 
70a6b6b
c97be0a
70a6b6b
c97be0a
 
 
70a6b6b
c97be0a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70a6b6b

import gradio as gr
import subprocess
import os
from huggingface_hub import list_models, model_info, run_job, inspect_job, whoami
from lighteval.tasks.registry import Registry
from lighteval.utils.utils import as_list

def get_models():
    """Get list of popular text generation models from Hugging Face"""
    try:
        models = list_models(
            filter="text-generation",
            sort="likes",
            direction=-1,
            limit=1000,
            expand="inferenceProviderMapping"
        )

        model_ids = [
            model.id 
            for model in models 
            if hasattr(model, 'inference_provider_mapping') and model.inference_provider_mapping
        ]
        return model_ids
    except Exception as e:
        print(f"Error fetching models: {e}")
        return []

def get_providers_for_model(model_id):
    """Get inference providers for a specific model"""
    if not model_id:
        return gr.update(choices=["NO PROVIDER"], value=None)

    try:
        info = model_info(model_id, expand="inferenceProviderMapping")
        if hasattr(info, 'inference_provider_mapping') and info.inference_provider_mapping:
            providers = [p.provider for p in info.inference_provider_mapping]
            return gr.update(choices=providers, value=providers[0] if providers else None)
        else:
            return gr.update(choices=[], value=None)
    except Exception as e:
        print(f"Error fetching providers for {model_id}: {e}")
        return gr.update(choices=["NO PROVIDER"], value=None)

# Cache tasks at module level to avoid reloading
_TASKS_CACHE = None

def get_available_tasks():
    """Get list of available lighteval tasks"""
    global _TASKS_CACHE
    if _TASKS_CACHE is not None:
        return _TASKS_CACHE

    try:
        print("Loading lighteval tasks...")
        registry = Registry()
        # We only want generative metrics as the others won't run with inference providers
        tasks_list = [
            f"{config.suite[0]}|{name}" 
            for name, config in registry.load_tasks().items() 
            if all(metric.category.value == "GENERATIVE" for metric in config.metrics)
        ]
        tasks = sorted(tasks_list)
        _TASKS_CACHE = tasks
        print(f"Loaded {len(tasks)} tasks")
        return tasks
    except Exception as e:
        print(f"Error fetching tasks: {e}")
        return []

def build_lighteval_command(
    model = "MODEL", 
    provider = "PROVIDER", 
    tasks = "TASKS", 
    results_org = "YOUR_ORG", 
    username="YOUR_USERNAME"
):
    """Build run_job command from selections"""
    tasks = as_list(tasks)
    model_provider_arg = f'model_name={model},provider={provider}'
    task_args = ','.join([f'{task}' for task in tasks])

    # Build the Python code string
    command = f'''from huggingface_hub import run_job, inspect_job
import os

# The token must be able to call inference providers and write to the org you selected on your behalf
# Test the command with `"--max-samples", "10",` first, and remove to run a full evaluation job

job = run_job(
    image="hf.co/spaces/OpenEvals/EvalsOnTheHub",
    command=[
        "lighteval", "endpoint", "inference-providers",
        "{model_provider_arg}",
        "{task_args}",
        "--push-to-hub", "--save-details",
        "--results-org", "{results_org}"
    ],
    namespace="{username}",
    secrets={{"HF_TOKEN": os.getenv("HF_TOKEN")}},
    token=os.getenv("HF_TOKEN")
)'''
    return command