Spaces:

RaoFoundation
/

pretraining-leaderboard

Runtime error

App Files Files Community

rusticluftig commited on Feb 13, 2024

Commit

4a5c2b7

1 Parent(s): 8c72a30

Add table for benchmarks

Browse files

Files changed (1) hide show

app.py +27 -6

app.py CHANGED Viewed

@@ -27,6 +27,7 @@ HEADER = """<h2 align="center" class="typewriter"><a href="https://github.com/Ra
 EVALUATION_DETAILS = """<ul><li><b>Name:</b> the 🤗 Hugging Face model name (click to go to the model card)</li><li><b>Rewards / Day:</b> the expected rewards per day based on current ranking.</li><li><b>Last Average Loss:</b> the last loss value on the evaluation data for the model as calculated by a validator (lower is better)</li><li><b>UID:</b> the Bittensor UID of the miner</li><li><b>Block:</b> the Bittensor block that the model was submitted in</li></ul><br/>More stats on <a href="https://taostats.io/subnets/netuid-9/" target="_blank">taostats</a>."""
 EVALUATION_HEADER = """<h3 align="center">Shows the latest internal evaluation statistics as calculated by the Opentensor validator</h3>"""
 VALIDATOR_WANDB_PROJECT = "opentensor-dev/pretraining-subnet"
 H4_TOKEN = os.environ.get("H4_TOKEN", None)
 API = HfApi(token=H4_TOKEN)
 WANDB_TOKEN = os.environ.get("WANDB_API_KEY", None)
@@ -157,14 +158,14 @@ def is_floatable(x) -> bool:
     ) or isinstance(x, int)
-def get_wandb_runs() -> List:
     """Get the latest runs from Wandb, retrying infinitely until we get them."""
     while True:
         api = wandb.Api(api_key=WANDB_TOKEN)
         runs = list(
             api.runs(
-                VALIDATOR_WANDB_PROJECT,
-                filters={"config.type": "validator", "config.uid": 238},
             )
         )
         if len(runs) > 0:
@@ -286,6 +287,18 @@ def leaderboard_data(
         for c in leaderboard
         if (c.uid in scores and scores[c.uid]["fresh"]) or show_stale
     ]
 def restart_space():
@@ -298,9 +311,9 @@ def main():
     model_data: List[ModelData] = get_subnet_data(subtensor, metagraph)
     model_data.sort(key=lambda x: x.incentive, reverse=True)
-    wandb_runs = get_wandb_runs()
-    scores = get_scores([x.uid for x in model_data], wandb_runs)
     current_block = metagraph.block.item()
     next_epoch_block = next_epoch(subtensor, current_block)
@@ -309,6 +322,8 @@ def main():
     weight_keys = set()
     for uid, stats in validator_df.items():
         weight_keys.update(stats[-1].keys())
     demo = gr.Blocks(css=".typewriter {font-family: 'JMH Typewriter', sans-serif;}")
     with demo:
@@ -326,6 +341,12 @@ def main():
             },
             num_top_classes=10,
         )
         with gr.Accordion("Evaluation Stats"):
             gr.HTML(EVALUATION_HEADER)
@@ -346,7 +367,7 @@ def main():
             )
             gr.LinePlot(
-                get_losses_over_time(wandb_runs),
                 x="timestamp",
                 x_title="Date",
                 y="best_loss",

 EVALUATION_DETAILS = """<ul><li><b>Name:</b> the 🤗 Hugging Face model name (click to go to the model card)</li><li><b>Rewards / Day:</b> the expected rewards per day based on current ranking.</li><li><b>Last Average Loss:</b> the last loss value on the evaluation data for the model as calculated by a validator (lower is better)</li><li><b>UID:</b> the Bittensor UID of the miner</li><li><b>Block:</b> the Bittensor block that the model was submitted in</li></ul><br/>More stats on <a href="https://taostats.io/subnets/netuid-9/" target="_blank">taostats</a>."""
 EVALUATION_HEADER = """<h3 align="center">Shows the latest internal evaluation statistics as calculated by the Opentensor validator</h3>"""
 VALIDATOR_WANDB_PROJECT = "opentensor-dev/pretraining-subnet"
+BENCHMARK_WANDB_PROJECT = "raofoundation/pretraining-leaderboard-data"
 H4_TOKEN = os.environ.get("H4_TOKEN", None)
 API = HfApi(token=H4_TOKEN)
 WANDB_TOKEN = os.environ.get("WANDB_API_KEY", None)
     ) or isinstance(x, int)
+def get_wandb_runs(project: str, filters: Dict[str, Any]) -> List:
     """Get the latest runs from Wandb, retrying infinitely until we get them."""
     while True:
         api = wandb.Api(api_key=WANDB_TOKEN)
         runs = list(
             api.runs(
+                project,
+                filters=filters,
             )
         )
         if len(runs) > 0:
         for c in leaderboard
         if (c.uid in scores and scores[c.uid]["fresh"]) or show_stale
     ]
+def get_benchmarks() -> Tuple[pd.DataFrame, datetime.datetime]:
+    """Returns the latest benchmarks and the time they were run."""
+    runs = get_wandb_runs(project=BENCHMARK_WANDB_PROJECT, filters=None)
+    for run in runs:
+        artifacts = list(run.logged_artifacts())
+        if artifacts:
+            table = artifacts[-1].get("benchmarks")
+            if table:
+                return table.get_dataframe(), datetime.datetime.strptime(run.metadata["startedAt"], "%Y-%m-%dT%H:%M:%S.%f")
+    bt.logging.error("Failed to get benchmarks from Wandb.")
+    return None, None
 def restart_space():
     model_data: List[ModelData] = get_subnet_data(subtensor, metagraph)
     model_data.sort(key=lambda x: x.incentive, reverse=True)
+    vali_runs = get_wandb_runs(project=VALIDATOR_WANDB_PROJECT, filters={"config.type": "validator", "config.uid": 238})
+    scores = get_scores([x.uid for x in model_data], vali_runs)
     current_block = metagraph.block.item()
     next_epoch_block = next_epoch(subtensor, current_block)
     weight_keys = set()
     for uid, stats in validator_df.items():
         weight_keys.update(stats[-1].keys())
+    benchmarks, benchmark_timestamp = get_benchmarks()
     demo = gr.Blocks(css=".typewriter {font-family: 'JMH Typewriter', sans-serif;}")
     with demo:
             },
             num_top_classes=10,
         )
+        if benchmarks is not None:
+            with gr.Accordion("Top Model Benchmarks"):
+                gr.components.Dataframe(benchmarks)
+                gr.HTML("""<div>PPL computed using a stride of 512. See <a href='https://github.com/RaoFoundation/pretraining/blob/main/scripts/run_benchmarks.py'>here</a> for the full code.</div>""")
+                gr.HTML(f"""<div>Last Updated: {benchmark_timestamp.strftime("%Y-%m-%d %H:%M:%S")} (UTC)</div>""")
         with gr.Accordion("Evaluation Stats"):
             gr.HTML(EVALUATION_HEADER)
             )
             gr.LinePlot(
+                get_losses_over_time(vali_runs),
                 x="timestamp",
                 x_title="Date",
                 y="best_loss",