Spaces:
Runtime error
Runtime error
| # Code adapted from: https://huggingface.co/spaces/RaoFoundation/pretraining-leaderboard/blob/main/app.py | |
| import datetime | |
| import os | |
| import gradio as gr | |
| import matplotlib.pyplot as plt | |
| from apscheduler.schedulers.background import BackgroundScheduler | |
| from dotenv import load_dotenv | |
| from huggingface_hub import HfApi | |
| import competitions | |
| import utils | |
| FONT = ( | |
| """<link href="https://fonts.cdnfonts.com/css/jmh-typewriter" rel="stylesheet">""" | |
| ) | |
| TITLE = """<h1 align="center" id="space-title" class="typewriter">Finetuning Subnet Leaderboard</h1>""" | |
| HEADER = """<h2 align="center" class="typewriter"><a href="https://github.com/macrocosm-os/finetuning" target="_blank">Finetuning</a> is a <a href="https://bittensor.com/" target="_blank">Bittensor</a> subnet that rewards miners for producing finetuned models in defined competitions. The model with the best head-to-head score in each competition receive a steady emission of TAO.</h3>""" | |
| EVALUATION_HEADER = """<h3 align="center">Shows the latest per-competition evaluation statistics as calculated by the Taoverse validator</h3>""" | |
| HF_REPO_ID = "macrocosm-os/finetuning-leaderboard" | |
| SECONDS_PER_BLOCK = 12 | |
| load_dotenv() | |
| HF_TOKEN = os.environ.get("HF_TOKEN", None) | |
| API = HfApi(token=HF_TOKEN) | |
| def get_next_update_div(current_block: int, next_update_block: int) -> str: | |
| now = datetime.datetime.now() | |
| blocks_to_go = next_update_block - current_block | |
| next_update_time = now + datetime.timedelta( | |
| seconds=blocks_to_go * SECONDS_PER_BLOCK | |
| ) | |
| delta = next_update_time - now | |
| return f"""<div align="center" style="font-size: larger;">Next reward update: <b>{blocks_to_go}</b> blocks (~{int(delta.total_seconds() // 60)} minutes)</div>""" | |
| def get_last_updated_div() -> str: | |
| return f"""<div>Last Updated: {datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")} (UTC)</div>""" | |
| def restart_space(): | |
| API.restart_space(repo_id=HF_REPO_ID, token=HF_TOKEN) | |
| def main(): | |
| # To avoid leaderboard failures, infinitely try until we get all data | |
| # needed to populate the dashboard | |
| state_vars = utils.load_state_vars() | |
| model_data = state_vars["model_data"] | |
| vali_runs = state_vars["vali_runs"] | |
| scores = state_vars["scores"] | |
| validator_df = state_vars["validator_df"] | |
| benchmarks_df = state_vars["benchmarks_df"] | |
| benchmarks_targets = state_vars["benchmarks_targets"] | |
| demo = gr.Blocks(css=".typewriter {font-family: 'JMH Typewriter', sans-serif;}") | |
| with demo: | |
| gr.HTML(FONT) | |
| gr.HTML(TITLE) | |
| gr.HTML(HEADER) | |
| gr.Label( | |
| label="Emissions", | |
| value={ | |
| f"{c.namespace}/{c.name} ({c.commit[0:8]}) · (τ{round(c.emission, 2):,})": c.incentive | |
| for c in model_data | |
| if c.incentive | |
| }, | |
| num_top_classes=10, | |
| ) | |
| comp_ids = [2, 3] | |
| with gr.Accordion("Competition Results"): | |
| gr.HTML(EVALUATION_HEADER) | |
| show_stale = gr.Checkbox(label="Show Stale", interactive=True) | |
| competition_leaderboards = [] | |
| for comp_id in comp_ids: | |
| details = competitions.COMPETITION_DETAILS[comp_id] | |
| with gr.Accordion(f"{details.name} Competition"): | |
| gr.HTML(details.html_description) | |
| competition_leaderboards.append( | |
| gr.components.Dataframe( | |
| value=utils.leaderboard_data( | |
| model_data, scores, comp_id, show_stale.value | |
| ), | |
| headers=[ | |
| "Name", | |
| "Win Rate", | |
| "Score", | |
| "Weight", | |
| "UID", | |
| "Block", | |
| ], | |
| datatype=[ | |
| "markdown", | |
| "number", | |
| "number", | |
| "number", | |
| "number", | |
| "number", | |
| ], | |
| elem_id=f"comp{comp_id}-table", | |
| interactive=False, | |
| visible=True, | |
| ) | |
| ) | |
| gr.HTML( | |
| """ | |
| <ul><li><b>Name:</b> the 🤗 Hugging Face repo (click to go to the model card)</li> | |
| <li><b>Win Rate:</b> % of head-to-head evals won vs. other eval'd models, given an epsilon advantage or disadvantage</li> | |
| <li><b>Score:</b> the combined model score as calculated by the OTF validator (lower is better)</li> | |
| <li><b>UID:</b> the Bittensor UID of the miner</li> | |
| <li><b>Weight:</b> the bittensor weight set for this model</li> | |
| <li><b>Block:</b> the Bittensor block that the model was submitted in</li></ul><br/>More stats on <a href="https://taostats.io/subnets/netuid-37/" target="_blank">taostats</a>.""" | |
| ) | |
| show_stale.change( | |
| lambda stale: [ | |
| utils.leaderboard_data(model_data, scores, id, stale) | |
| for id in comp_ids | |
| ], | |
| inputs=[show_stale], | |
| outputs=competition_leaderboards, | |
| ) | |
| if benchmarks_df is not None: | |
| def create_benchmark_plot(benchmark: str, comp_id: int): | |
| fig = plt.figure(figsize=(10, 8)) | |
| # Filter to just entries for this competition. | |
| df = benchmarks_df[benchmarks_df["competition_id"] == comp_id] | |
| plt.plot(df["timestamp"], df[benchmark]) | |
| # Adding horizontal dotted lines for various benchmark targets (well-known models) | |
| for model, score in benchmarks_targets[benchmark].items(): | |
| plt.axhline(y=score, linestyle="--", label=f"{model}") | |
| plt.text( | |
| benchmarks_df["timestamp"].max(), | |
| score, | |
| f"{model}", | |
| va="center", | |
| ha="right", | |
| backgroundcolor="white", | |
| ) | |
| # Adding labels and title | |
| plt.ylabel(benchmark.upper()) | |
| plt.title(f"{benchmark.upper()} Over Time") | |
| plt.xticks(rotation=45) | |
| return fig | |
| with gr.Accordion("Top Model Benchmarks"): | |
| for comp_id in comp_ids: | |
| details = competitions.COMPETITION_DETAILS[comp_id] | |
| with gr.Accordion(f"{details.name} Benchmarks"): | |
| mmlu = create_benchmark_plot("mmlu", comp_id) | |
| mmlu_pro = create_benchmark_plot("mmlu_pro", comp_id) | |
| gr.Plot(mmlu) | |
| gr.Plot(mmlu_pro) | |
| gr.HTML( | |
| """<div>Benchmarks computed using <a href='https://github.com/EleutherAI/lm-evaluation-harness'>lm-eval harness</a></div>""" | |
| ) | |
| gr.HTML( | |
| """<ul><li>MMLU: Raw score</li><li>MMLU Pro: Normalized score using <a href='https://huggingface.co/docs/leaderboards/open_llm_leaderboard/normalization'>this</a> method</li></ul>""" | |
| ) | |
| with gr.Accordion("Validator Stats"): | |
| gr.components.Dataframe( | |
| utils.make_validator_dataframe(validator_df, model_data), | |
| interactive=False, | |
| visible=True, | |
| ) | |
| gr.HTML(value=get_last_updated_div()) | |
| scheduler = BackgroundScheduler() | |
| scheduler.add_job( | |
| restart_space, "interval", seconds=60 * 30 | |
| ) # restart every 15 minutes | |
| scheduler.start() | |
| demo.launch() | |
| main() | |