Spaces:
Runtime error
Runtime error
rusticluftig
commited on
Commit
·
4a5c2b7
1
Parent(s):
8c72a30
Add table for benchmarks
Browse files
app.py
CHANGED
|
@@ -27,6 +27,7 @@ HEADER = """<h2 align="center" class="typewriter"><a href="https://github.com/Ra
|
|
| 27 |
EVALUATION_DETAILS = """<ul><li><b>Name:</b> the 🤗 Hugging Face model name (click to go to the model card)</li><li><b>Rewards / Day:</b> the expected rewards per day based on current ranking.</li><li><b>Last Average Loss:</b> the last loss value on the evaluation data for the model as calculated by a validator (lower is better)</li><li><b>UID:</b> the Bittensor UID of the miner</li><li><b>Block:</b> the Bittensor block that the model was submitted in</li></ul><br/>More stats on <a href="https://taostats.io/subnets/netuid-9/" target="_blank">taostats</a>."""
|
| 28 |
EVALUATION_HEADER = """<h3 align="center">Shows the latest internal evaluation statistics as calculated by the Opentensor validator</h3>"""
|
| 29 |
VALIDATOR_WANDB_PROJECT = "opentensor-dev/pretraining-subnet"
|
|
|
|
| 30 |
H4_TOKEN = os.environ.get("H4_TOKEN", None)
|
| 31 |
API = HfApi(token=H4_TOKEN)
|
| 32 |
WANDB_TOKEN = os.environ.get("WANDB_API_KEY", None)
|
|
@@ -157,14 +158,14 @@ def is_floatable(x) -> bool:
|
|
| 157 |
) or isinstance(x, int)
|
| 158 |
|
| 159 |
|
| 160 |
-
def get_wandb_runs() -> List:
|
| 161 |
"""Get the latest runs from Wandb, retrying infinitely until we get them."""
|
| 162 |
while True:
|
| 163 |
api = wandb.Api(api_key=WANDB_TOKEN)
|
| 164 |
runs = list(
|
| 165 |
api.runs(
|
| 166 |
-
|
| 167 |
-
filters=
|
| 168 |
)
|
| 169 |
)
|
| 170 |
if len(runs) > 0:
|
|
@@ -286,6 +287,18 @@ def leaderboard_data(
|
|
| 286 |
for c in leaderboard
|
| 287 |
if (c.uid in scores and scores[c.uid]["fresh"]) or show_stale
|
| 288 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 289 |
|
| 290 |
|
| 291 |
def restart_space():
|
|
@@ -298,9 +311,9 @@ def main():
|
|
| 298 |
model_data: List[ModelData] = get_subnet_data(subtensor, metagraph)
|
| 299 |
model_data.sort(key=lambda x: x.incentive, reverse=True)
|
| 300 |
|
| 301 |
-
|
| 302 |
|
| 303 |
-
scores = get_scores([x.uid for x in model_data],
|
| 304 |
|
| 305 |
current_block = metagraph.block.item()
|
| 306 |
next_epoch_block = next_epoch(subtensor, current_block)
|
|
@@ -309,6 +322,8 @@ def main():
|
|
| 309 |
weight_keys = set()
|
| 310 |
for uid, stats in validator_df.items():
|
| 311 |
weight_keys.update(stats[-1].keys())
|
|
|
|
|
|
|
| 312 |
|
| 313 |
demo = gr.Blocks(css=".typewriter {font-family: 'JMH Typewriter', sans-serif;}")
|
| 314 |
with demo:
|
|
@@ -326,6 +341,12 @@ def main():
|
|
| 326 |
},
|
| 327 |
num_top_classes=10,
|
| 328 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 329 |
|
| 330 |
with gr.Accordion("Evaluation Stats"):
|
| 331 |
gr.HTML(EVALUATION_HEADER)
|
|
@@ -346,7 +367,7 @@ def main():
|
|
| 346 |
)
|
| 347 |
|
| 348 |
gr.LinePlot(
|
| 349 |
-
get_losses_over_time(
|
| 350 |
x="timestamp",
|
| 351 |
x_title="Date",
|
| 352 |
y="best_loss",
|
|
|
|
| 27 |
EVALUATION_DETAILS = """<ul><li><b>Name:</b> the 🤗 Hugging Face model name (click to go to the model card)</li><li><b>Rewards / Day:</b> the expected rewards per day based on current ranking.</li><li><b>Last Average Loss:</b> the last loss value on the evaluation data for the model as calculated by a validator (lower is better)</li><li><b>UID:</b> the Bittensor UID of the miner</li><li><b>Block:</b> the Bittensor block that the model was submitted in</li></ul><br/>More stats on <a href="https://taostats.io/subnets/netuid-9/" target="_blank">taostats</a>."""
|
| 28 |
EVALUATION_HEADER = """<h3 align="center">Shows the latest internal evaluation statistics as calculated by the Opentensor validator</h3>"""
|
| 29 |
VALIDATOR_WANDB_PROJECT = "opentensor-dev/pretraining-subnet"
|
| 30 |
+
BENCHMARK_WANDB_PROJECT = "raofoundation/pretraining-leaderboard-data"
|
| 31 |
H4_TOKEN = os.environ.get("H4_TOKEN", None)
|
| 32 |
API = HfApi(token=H4_TOKEN)
|
| 33 |
WANDB_TOKEN = os.environ.get("WANDB_API_KEY", None)
|
|
|
|
| 158 |
) or isinstance(x, int)
|
| 159 |
|
| 160 |
|
| 161 |
+
def get_wandb_runs(project: str, filters: Dict[str, Any]) -> List:
|
| 162 |
"""Get the latest runs from Wandb, retrying infinitely until we get them."""
|
| 163 |
while True:
|
| 164 |
api = wandb.Api(api_key=WANDB_TOKEN)
|
| 165 |
runs = list(
|
| 166 |
api.runs(
|
| 167 |
+
project,
|
| 168 |
+
filters=filters,
|
| 169 |
)
|
| 170 |
)
|
| 171 |
if len(runs) > 0:
|
|
|
|
| 287 |
for c in leaderboard
|
| 288 |
if (c.uid in scores and scores[c.uid]["fresh"]) or show_stale
|
| 289 |
]
|
| 290 |
+
|
| 291 |
+
def get_benchmarks() -> Tuple[pd.DataFrame, datetime.datetime]:
|
| 292 |
+
"""Returns the latest benchmarks and the time they were run."""
|
| 293 |
+
runs = get_wandb_runs(project=BENCHMARK_WANDB_PROJECT, filters=None)
|
| 294 |
+
for run in runs:
|
| 295 |
+
artifacts = list(run.logged_artifacts())
|
| 296 |
+
if artifacts:
|
| 297 |
+
table = artifacts[-1].get("benchmarks")
|
| 298 |
+
if table:
|
| 299 |
+
return table.get_dataframe(), datetime.datetime.strptime(run.metadata["startedAt"], "%Y-%m-%dT%H:%M:%S.%f")
|
| 300 |
+
bt.logging.error("Failed to get benchmarks from Wandb.")
|
| 301 |
+
return None, None
|
| 302 |
|
| 303 |
|
| 304 |
def restart_space():
|
|
|
|
| 311 |
model_data: List[ModelData] = get_subnet_data(subtensor, metagraph)
|
| 312 |
model_data.sort(key=lambda x: x.incentive, reverse=True)
|
| 313 |
|
| 314 |
+
vali_runs = get_wandb_runs(project=VALIDATOR_WANDB_PROJECT, filters={"config.type": "validator", "config.uid": 238})
|
| 315 |
|
| 316 |
+
scores = get_scores([x.uid for x in model_data], vali_runs)
|
| 317 |
|
| 318 |
current_block = metagraph.block.item()
|
| 319 |
next_epoch_block = next_epoch(subtensor, current_block)
|
|
|
|
| 322 |
weight_keys = set()
|
| 323 |
for uid, stats in validator_df.items():
|
| 324 |
weight_keys.update(stats[-1].keys())
|
| 325 |
+
|
| 326 |
+
benchmarks, benchmark_timestamp = get_benchmarks()
|
| 327 |
|
| 328 |
demo = gr.Blocks(css=".typewriter {font-family: 'JMH Typewriter', sans-serif;}")
|
| 329 |
with demo:
|
|
|
|
| 341 |
},
|
| 342 |
num_top_classes=10,
|
| 343 |
)
|
| 344 |
+
|
| 345 |
+
if benchmarks is not None:
|
| 346 |
+
with gr.Accordion("Top Model Benchmarks"):
|
| 347 |
+
gr.components.Dataframe(benchmarks)
|
| 348 |
+
gr.HTML("""<div>PPL computed using a stride of 512. See <a href='https://github.com/RaoFoundation/pretraining/blob/main/scripts/run_benchmarks.py'>here</a> for the full code.</div>""")
|
| 349 |
+
gr.HTML(f"""<div>Last Updated: {benchmark_timestamp.strftime("%Y-%m-%d %H:%M:%S")} (UTC)</div>""")
|
| 350 |
|
| 351 |
with gr.Accordion("Evaluation Stats"):
|
| 352 |
gr.HTML(EVALUATION_HEADER)
|
|
|
|
| 367 |
)
|
| 368 |
|
| 369 |
gr.LinePlot(
|
| 370 |
+
get_losses_over_time(vali_runs),
|
| 371 |
x="timestamp",
|
| 372 |
x_title="Date",
|
| 373 |
y="best_loss",
|