Spaces:

OpenEvals
/

EvalsOnTheHub

Sleeping

App Files Files Community

Clémentine commited on Oct 10, 2025

Commit

c97be0a

1 Parent(s): 9c61c2c

change it into a command writer

Browse files

Files changed (3) hide show

Dockerfile +0 -19
app.py +26 -47
utils.py +51 -68

Dockerfile DELETED Viewed

@@ -1,19 +0,0 @@
-# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
-# you will also find guides on how best to write your Dockerfile
-FROM python:3.12
-RUN useradd -m -u 1000 user
-USER user
-ENV PATH="/home/user/.local/bin:$PATH"
-ENV HOME=/home/user
-WORKDIR $HOME
-COPY requirements.txt requirements.txt
-RUN pip install -r requirements.txt
-COPY app.py app.py
-COPY utils.py utils.py
-CMD ["python", "app.py"]

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
-from utils import get_models, get_available_tasks, get_providers_for_model, build_lighteval_command, run_lighteval
 # Handle login and token storage
 def on_login(profile: gr.OAuthProfile):
@@ -9,13 +9,14 @@ def on_login(profile: gr.OAuthProfile):
     return f"Logged in as **{profile.name}** (@{profile.username})"
 # Update command when selections change
-def update_command_display(model, provider, tasks):
-    return build_lighteval_command(model, provider, tasks)
-with gr.Blocks(title="LightEval Job Runner") as demo:
-    gr.Markdown("# LightEval Job Runner")
-    gr.Markdown("Configure and run lighteval jobs on Hugging Face models")
     # Add login button and user info
     with gr.Row():
@@ -40,30 +41,26 @@ with gr.Blocks(title="LightEval Job Runner") as demo:
             tasks_dropdown = gr.Dropdown(
                 label="Tasks (searchable - type to filter)",
                 choices=get_available_tasks(),
-                value=None,
                 multiselect=True,
                 interactive=True,
                 allow_custom_value=False,
                 filterable=True
             )
-            command_display = gr.Textbox(
-                label="Generated LightEval Command",
-                placeholder="Command will be generated from your selections",
-                lines=4,
-                interactive=False,
-                show_copy_button=True
             )
-            run_btn = gr.Button("Run Command", variant="primary")
-    with gr.Row():
-        logs_output = gr.Textbox(
-            label="Logs",
-            lines=20,
-            max_lines=30,
-            show_copy_button=True
-        )
     # Link model to providers
     model_dropdown.change(
@@ -72,31 +69,13 @@ with gr.Blocks(title="LightEval Job Runner") as demo:
         outputs=provider_dropdown
     )
-    model_dropdown.change(
-        fn=update_command_display,
-        inputs=[model_dropdown, provider_dropdown, tasks_dropdown],
-        outputs=command_display
-    )
-    provider_dropdown.change(
-        fn=update_command_display,
-        inputs=[model_dropdown, provider_dropdown, tasks_dropdown],
-        outputs=command_display
-    )
-    tasks_dropdown.change(
-        fn=update_command_display,
-        inputs=[model_dropdown, provider_dropdown, tasks_dropdown],
-        outputs=command_display
-    )
-    # Run command
-    run_btn.click(
-        fn=run_lighteval,
-        inputs=[command_display],
-        outputs=logs_output
-    )
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
+from utils import get_models, get_available_tasks, get_providers_for_model, build_lighteval_command
 # Handle login and token storage
 def on_login(profile: gr.OAuthProfile):
     return f"Logged in as **{profile.name}** (@{profile.username})"
 # Update command when selections change
+def update_command_display(model, provider, tasks, results_org, profile: gr.OAuthProfile):
+    username = profile.username if profile else "YOUR_USERNAME"
+    return build_lighteval_command(model, provider, tasks, results_org, username)
+with gr.Blocks(title="LightEval ❤️ Jobs") as demo:
+    gr.Markdown("# Run evaluations in 5 lines of code!")
+    gr.Markdown("Generate your own snippet to run lighteval easily using jobs and inference providers")
     # Add login button and user info
     with gr.Row():
             tasks_dropdown = gr.Dropdown(
                 label="Tasks (searchable - type to filter)",
                 choices=get_available_tasks(),
+                value=get_available_tasks()[0],
                 multiselect=True,
                 interactive=True,
                 allow_custom_value=False,
                 filterable=True
             )
+            results_org = gr.Textbox(
+                label="Results Organization",
+                placeholder="Enter HF organization to save detailed results to",
+                interactive=True
             )
+            command_display = gr.Code(
+                label="Get the command for your model",
+                value=build_lighteval_command(),
+                language="python",
+                lines=15,
+                interactive=False
+            )
     # Link model to providers
     model_dropdown.change(
         outputs=provider_dropdown
     )
+    # Update command display when any input changes
+    for component in [model_dropdown, provider_dropdown, tasks_dropdown, results_org]:
+        component.change(
+            fn=update_command_display,
+            inputs=[model_dropdown, provider_dropdown, tasks_dropdown, results_org],
+            outputs=command_display
+        )
 if __name__ == "__main__":
     demo.launch()

utils.py CHANGED Viewed

@@ -1,28 +1,35 @@
 import gradio as gr
 import subprocess
 import os
-from huggingface_hub import list_models, model_info
 from lighteval.tasks.registry import Registry
 def get_models():
     """Get list of popular text generation models from Hugging Face"""
     try:
         models = list_models(
-            task="text-generation",
-            sort="downloads",
             direction=-1,
-            limit=100
         )
-        model_ids = [model.id for model in models if hasattr(model, 'id')]
         return model_ids
     except Exception as e:
         print(f"Error fetching models: {e}")
-        return ["meta-llama/Llama-2-7b-hf", "mistralai/Mistral-7B-v0.1"]
 def get_providers_for_model(model_id):
     """Get inference providers for a specific model"""
     if not model_id:
-        return gr.update(choices=[], value=None)
     try:
         info = model_info(model_id, expand="inferenceProviderMapping")
@@ -33,7 +40,7 @@ def get_providers_for_model(model_id):
             return gr.update(choices=[], value=None)
     except Exception as e:
         print(f"Error fetching providers for {model_id}: {e}")
-        return gr.update(choices=[], value=None)
 # Cache tasks at module level to avoid reloading
 _TASKS_CACHE = None
@@ -47,7 +54,12 @@ def get_available_tasks():
     try:
         print("Loading lighteval tasks...")
         registry = Registry()
-        tasks_list = [f"{v.suite[0]}|{k}" for k, v in registry.load_tasks().items()]
         tasks = sorted(tasks_list)
         _TASKS_CACHE = tasks
         print(f"Loaded {len(tasks)} tasks")
@@ -56,66 +68,37 @@ def get_available_tasks():
         print(f"Error fetching tasks: {e}")
         return []
-def build_lighteval_command(model, provider, tasks):
-    """Build lighteval command from selections"""
-    if not model:
-        return "Error: Please select a model"
-    if not provider:
-        return "Error: Please select a provider"
-    if not tasks or len(tasks) == 0:
-        return "Error: Please select at least one task"
-    # Format: lighteval endpoint inference-providers "model_name=MODEL,provider=PROVIDER" "TASK1|0" "TASK2|0"
     model_provider_arg = f'model_name={model},provider={provider}'
-    task_args = ','.join([f'{task}|0' for task in tasks])
-    command = f'lighteval endpoint inference-providers "{model_provider_arg}" "{task_args}"'
-    return command
-def run_lighteval(command, oauth_token: gr.OAuthToken):
-    """Run lighteval command and yield logs in real-time"""
-    if not command.strip():
-        yield "Error: Please enter a command"
-        return
-    if not oauth_token:
-        yield "Error: Please login first to authenticate with Hugging Face"
-        return
-    try:
-        # Set up environment with HF token
-        env = os.environ.copy()
-        env['HF_TOKEN'] = oauth_token.token
-        # Run the command
-        process = subprocess.Popen(
-            command,
-            shell=True,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.STDOUT,
-            text=True,
-            bufsize=1,
-            universal_newlines=True,
-            env=env
-        )
-        output = ""
-        # Stream output line by line
-        for line in process.stdout:
-            output += line
-            yield output
-        # Wait for process to complete
-        process.wait()
-        if process.returncode != 0:
-            output += f"\n\nProcess exited with code {process.returncode}"
-            yield output
-        else:
-            output += "\n\nProcess completed successfully!"
-            yield output
-    except Exception as e:
-        yield f"Error running command: {str(e)}"

 import gradio as gr
 import subprocess
 import os
+from huggingface_hub import list_models, model_info, run_job, inspect_job, whoami
 from lighteval.tasks.registry import Registry
+from lighteval.utils.utils import as_list
 def get_models():
     """Get list of popular text generation models from Hugging Face"""
     try:
         models = list_models(
+            filter="text-generation",
+            sort="likes",
             direction=-1,
+            limit=1000,
+            expand="inferenceProviderMapping"
         )
+        model_ids = [
+            model.id
+            for model in models
+            if hasattr(model, 'inference_provider_mapping') and model.inference_provider_mapping
+        ]
         return model_ids
     except Exception as e:
         print(f"Error fetching models: {e}")
+        return []
 def get_providers_for_model(model_id):
     """Get inference providers for a specific model"""
     if not model_id:
+        return gr.update(choices=["NO PROVIDER"], value=None)
     try:
         info = model_info(model_id, expand="inferenceProviderMapping")
             return gr.update(choices=[], value=None)
     except Exception as e:
         print(f"Error fetching providers for {model_id}: {e}")
+        return gr.update(choices=["NO PROVIDER"], value=None)
 # Cache tasks at module level to avoid reloading
 _TASKS_CACHE = None
     try:
         print("Loading lighteval tasks...")
         registry = Registry()
+        # We only want generative metrics as the others won't run with inference providers
+        tasks_list = [
+            f"{config.suite[0]}|{name}"
+            for name, config in registry.load_tasks().items()
+            if all(metric.category.value == "GENERATIVE" for metric in config.metrics)
+        ]
         tasks = sorted(tasks_list)
         _TASKS_CACHE = tasks
         print(f"Loaded {len(tasks)} tasks")
         print(f"Error fetching tasks: {e}")
         return []
+def build_lighteval_command(
+    model = "MODEL",
+    provider = "PROVIDER",
+    tasks = "TASKS",
+    results_org = "YOUR_ORG",
+    username="YOUR_USERNAME"
+):
+    """Build run_job command from selections"""
+    tasks = as_list(tasks)
     model_provider_arg = f'model_name={model},provider={provider}'
+    task_args = ','.join([f'{task}' for task in tasks])
+    # Build the Python code string
+    command = f'''from huggingface_hub import run_job, inspect_job
+import os
+# The token must be able to call inference providers and write to the org you selected on your behalf
+# Test the command with `"--max-samples", "10",` first, and remove to run a full evaluation job
+job = run_job(
+    image="hf.co/spaces/OpenEvals/EvalsOnTheHub",
+    command=[
+        "lighteval", "endpoint", "inference-providers",
+        "{model_provider_arg}",
+        "{task_args}",
+        "--push-to-hub", "--save-details",
+        "--results-org", "{results_org}"
+    ],
+    namespace="{username}",
+    secrets={{"HF_TOKEN": os.getenv("HF_TOKEN")}},
+    token=os.getenv("HF_TOKEN")
+)'''
+    return command