import gradio as gr import subprocess import os from huggingface_hub import list_models, model_info, run_job, inspect_job, whoami from lighteval.tasks.registry import Registry from lighteval.utils.utils import as_list def get_models(): """Get list of popular text generation models from Hugging Face""" try: models = list_models( filter="text-generation", sort="likes", direction=-1, limit=1000, expand="inferenceProviderMapping" ) model_ids = [ model.id for model in models if hasattr(model, 'inference_provider_mapping') and model.inference_provider_mapping ] return model_ids except Exception as e: print(f"Error fetching models: {e}") return [] def get_providers_for_model(model_id): """Get inference providers for a specific model""" if not model_id: return gr.update(choices=["NO PROVIDER"], value=None) try: info = model_info(model_id, expand="inferenceProviderMapping") if hasattr(info, 'inference_provider_mapping') and info.inference_provider_mapping: providers = [p.provider for p in info.inference_provider_mapping] return gr.update(choices=providers, value=providers[0] if providers else None) else: return gr.update(choices=[], value=None) except Exception as e: print(f"Error fetching providers for {model_id}: {e}") return gr.update(choices=["NO PROVIDER"], value=None) # Cache tasks at module level to avoid reloading _TASKS_CACHE = None def get_available_tasks(): """Get list of available lighteval tasks""" global _TASKS_CACHE if _TASKS_CACHE is not None: return _TASKS_CACHE try: print("Loading lighteval tasks...") registry = Registry() # We only want generative metrics as the others won't run with inference providers tasks_list = [ f"{config.suite[0]}|{name}" for name, config in registry.load_tasks().items() if all(metric.category.value == "GENERATIVE" for metric in config.metrics) ] tasks = sorted(tasks_list) _TASKS_CACHE = tasks print(f"Loaded {len(tasks)} tasks") return tasks except Exception as e: print(f"Error fetching tasks: {e}") return [] def build_lighteval_command( model = "MODEL", provider = "PROVIDER", tasks = "TASKS", results_org = "YOUR_ORG", username="YOUR_USERNAME" ): """Build run_job command from selections""" tasks = as_list(tasks) model_provider_arg = f'model_name={model},provider={provider}' task_args = ','.join([f'{task}' for task in tasks]) # Build the Python code string command = f'''from huggingface_hub import run_job, inspect_job import os # The token must be able to call inference providers and write to the org you selected on your behalf # Test the command with `"--max-samples", "10",` first, and remove to run a full evaluation job job = run_job( image="hf.co/spaces/OpenEvals/EvalsOnTheHub", command=[ "lighteval", "endpoint", "inference-providers", "{model_provider_arg}", "{task_args}", "--push-to-hub", "--save-details", "--results-org", "{results_org}" ], namespace="{username}", secrets={{"HF_TOKEN": os.getenv("HF_TOKEN")}}, token=os.getenv("HF_TOKEN") )''' return command