Buckets:
| #!/usr/bin/env python3 | |
| """ | |
| Evals Leaderboard - Gradio app for displaying model evaluation scores. | |
| Reads leaderboard data from the hf-skills/evals-leaderboard dataset. | |
| Run collect_evals.py separately to update the dataset. | |
| Usage: | |
| python app.py | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import gradio as gr | |
| import requests | |
| TABLE_HEADERS = [ | |
| "Model", | |
| "Benchmark", | |
| "Score", | |
| "Source", | |
| ] | |
| TABLE_DATATYPES = [ | |
| "markdown", | |
| "text", | |
| "number", | |
| "markdown", | |
| ] | |
| DATASET_REPO = "hf-skills/evals-leaderboard" | |
| LEADERBOARD_URL = f"https://huggingface.co/datasets/{DATASET_REPO}/raw/main/data/leaderboard.jsonl" | |
| METADATA_URL = f"https://huggingface.co/datasets/{DATASET_REPO}/raw/main/data/metadata.json" | |
| def format_model_link(model_id: str) -> str: | |
| """Format model ID as a clickable link.""" | |
| return f"[{model_id}](https://huggingface.co/{model_id})" | |
| def format_source_link(source_type: str, contributor: str, source_url: str) -> str: | |
| """Format source as a clickable link.""" | |
| return f"{source_type} by [{contributor}]({source_url})" | |
| def fetch_leaderboard() -> tuple[list[dict], dict]: | |
| """Fetch leaderboard data from the HF dataset.""" | |
| # Fetch leaderboard JSONL | |
| resp = requests.get(LEADERBOARD_URL, timeout=30) | |
| resp.raise_for_status() | |
| leaderboard = [json.loads(line) for line in resp.text.strip().split("\n") if line] | |
| # Fetch metadata | |
| resp = requests.get(METADATA_URL, timeout=30) | |
| resp.raise_for_status() | |
| metadata = resp.json() | |
| return leaderboard, metadata | |
| def refresh_handler() -> tuple[str, list[list]]: | |
| """Refresh the leaderboard data from the dataset.""" | |
| try: | |
| leaderboard, metadata = fetch_leaderboard() | |
| # Build table rows | |
| rows = [] | |
| for entry in leaderboard: | |
| rows.append( | |
| [ | |
| format_model_link(entry["model_id"]), | |
| entry["benchmark"], | |
| entry["score"], | |
| format_source_link( | |
| entry["source_type"], | |
| entry["contributor"], | |
| entry["source_url"], | |
| ), | |
| ] | |
| ) | |
| status = "\n".join( | |
| [ | |
| f"**Data from:** [{DATASET_REPO}](https://huggingface.co/datasets/{DATASET_REPO})", | |
| f"**Last updated:** {metadata.get('generated_at', 'unknown')}", | |
| f"**Models with scores:** {metadata.get('models_with_scores', 'unknown')}", | |
| f"**Total entries:** {metadata.get('total_entries', len(leaderboard))}", | |
| ] | |
| ) | |
| return status, rows | |
| except Exception as e: | |
| return f"❌ Failed to load leaderboard: {e}", [] | |
| with gr.Blocks() as demo: | |
| gr.Markdown( | |
| """ | |
| # 📊 HF Evaluation Leaderboard | |
| Shows MMLU, BigCodeBench, and ARC MC scores pulled from model-index | |
| metadata or their pull requests for trending text-generation models. | |
| """ | |
| ) | |
| status_box = gr.Markdown("Loading leaderboard...") | |
| leaderboard_table = gr.Dataframe( | |
| headers=TABLE_HEADERS, | |
| datatype=TABLE_DATATYPES, | |
| interactive=False, | |
| wrap=True, | |
| ) | |
| demo.load( | |
| refresh_handler, | |
| outputs=[status_box, leaderboard_table], | |
| ) | |
| gr.Markdown( | |
| f""" | |
| --- | |
| **Links:** | |
| - [Dataset: {DATASET_REPO}](https://huggingface.co/datasets/{DATASET_REPO}) | |
| - [GitHub Repository](https://github.com/huggingface/skills) | |
| """ | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |
Xet Storage Details
- Size:
- 3.63 kB
- Xet hash:
- 02fab4004e77d4d1b708f4e502267026194a4a7d046e6b16cfad65da6b766955
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.