mnemotree-root-v1 / training_metadata.json
kurcontko's picture
Upload training_metadata.json with huggingface_hub
d38b1f5 verified
{
"args": {
"model_name": "answerdotai/ModernBERT-base",
"data_dir": null,
"output_dir": null,
"epochs": 5,
"batch_size": 64,
"eval_batch_size": 512,
"lr": 2e-05,
"max_seq_len": 256,
"focal_gamma": 2.0,
"seed": 42,
"merge_val": true,
"wandb_project": "mnemotree-router",
"wandb_run_name": null
},
"train_metrics": {
"train_runtime": 7379.1115,
"train_samples_per_second": 40.472,
"train_steps_per_second": 0.633,
"total_flos": 5.08840181169408e+16,
"train_loss": 0.14782962053695123,
"epoch": 5.0
},
"test_evaluation": {
"best_threshold": 0.55,
"best_macro_f1": 0.6742,
"all": {
"0.3": {
"macro_f1": 0.5129,
"micro_f1": 0.5811,
"per_class": {
"episodic": {
"p": 0.381,
"r": 0.9829,
"f1": 0.5492
},
"semantic": {
"p": 0.6637,
"r": 0.9864,
"f1": 0.7935
},
"procedural": {
"p": 0.1087,
"r": 0.9887,
"f1": 0.1959
}
}
},
"0.35": {
"macro_f1": 0.547,
"micro_f1": 0.6288,
"per_class": {
"episodic": {
"p": 0.4245,
"r": 0.9631,
"f1": 0.5892
},
"semantic": {
"p": 0.6796,
"r": 0.9689,
"f1": 0.7988
},
"procedural": {
"p": 0.1455,
"r": 0.9649,
"f1": 0.2529
}
}
},
"0.4": {
"macro_f1": 0.5861,
"micro_f1": 0.6741,
"per_class": {
"episodic": {
"p": 0.4722,
"r": 0.9246,
"f1": 0.6251
},
"semantic": {
"p": 0.7028,
"r": 0.9361,
"f1": 0.8028
},
"procedural": {
"p": 0.2022,
"r": 0.9002,
"f1": 0.3303
}
}
},
"0.45": {
"macro_f1": 0.6298,
"micro_f1": 0.7076,
"per_class": {
"episodic": {
"p": 0.5229,
"r": 0.8622,
"f1": 0.651
},
"semantic": {
"p": 0.731,
"r": 0.8778,
"f1": 0.7977
},
"procedural": {
"p": 0.2992,
"r": 0.8356,
"f1": 0.4407
}
}
},
"0.5": {
"macro_f1": 0.6664,
"micro_f1": 0.7206,
"per_class": {
"episodic": {
"p": 0.5776,
"r": 0.7836,
"f1": 0.665
},
"semantic": {
"p": 0.768,
"r": 0.7953,
"f1": 0.7814
},
"procedural": {
"p": 0.4275,
"r": 0.7823,
"f1": 0.5529
}
}
},
"0.55": {
"macro_f1": 0.6742,
"micro_f1": 0.7025,
"per_class": {
"episodic": {
"p": 0.6265,
"r": 0.6683,
"f1": 0.6467
},
"semantic": {
"p": 0.8096,
"r": 0.6897,
"f1": 0.7448
},
"procedural": {
"p": 0.5518,
"r": 0.737,
"f1": 0.6311
}
}
},
"0.6": {
"macro_f1": 0.6499,
"micro_f1": 0.649,
"per_class": {
"episodic": {
"p": 0.6763,
"r": 0.5061,
"f1": 0.579
},
"semantic": {
"p": 0.8527,
"r": 0.5654,
"f1": 0.68
},
"procedural": {
"p": 0.6646,
"r": 0.7188,
"f1": 0.6906
}
}
}
}
}
}