| { | |
| "args": { | |
| "model_name": "answerdotai/ModernBERT-base", | |
| "data_dir": null, | |
| "output_dir": null, | |
| "epochs": 5, | |
| "batch_size": 64, | |
| "eval_batch_size": 512, | |
| "lr": 2e-05, | |
| "max_seq_len": 256, | |
| "focal_gamma": 2.0, | |
| "seed": 42, | |
| "merge_val": true, | |
| "wandb_project": "mnemotree-router", | |
| "wandb_run_name": null | |
| }, | |
| "train_metrics": { | |
| "train_runtime": 7379.1115, | |
| "train_samples_per_second": 40.472, | |
| "train_steps_per_second": 0.633, | |
| "total_flos": 5.08840181169408e+16, | |
| "train_loss": 0.14782962053695123, | |
| "epoch": 5.0 | |
| }, | |
| "test_evaluation": { | |
| "best_threshold": 0.55, | |
| "best_macro_f1": 0.6742, | |
| "all": { | |
| "0.3": { | |
| "macro_f1": 0.5129, | |
| "micro_f1": 0.5811, | |
| "per_class": { | |
| "episodic": { | |
| "p": 0.381, | |
| "r": 0.9829, | |
| "f1": 0.5492 | |
| }, | |
| "semantic": { | |
| "p": 0.6637, | |
| "r": 0.9864, | |
| "f1": 0.7935 | |
| }, | |
| "procedural": { | |
| "p": 0.1087, | |
| "r": 0.9887, | |
| "f1": 0.1959 | |
| } | |
| } | |
| }, | |
| "0.35": { | |
| "macro_f1": 0.547, | |
| "micro_f1": 0.6288, | |
| "per_class": { | |
| "episodic": { | |
| "p": 0.4245, | |
| "r": 0.9631, | |
| "f1": 0.5892 | |
| }, | |
| "semantic": { | |
| "p": 0.6796, | |
| "r": 0.9689, | |
| "f1": 0.7988 | |
| }, | |
| "procedural": { | |
| "p": 0.1455, | |
| "r": 0.9649, | |
| "f1": 0.2529 | |
| } | |
| } | |
| }, | |
| "0.4": { | |
| "macro_f1": 0.5861, | |
| "micro_f1": 0.6741, | |
| "per_class": { | |
| "episodic": { | |
| "p": 0.4722, | |
| "r": 0.9246, | |
| "f1": 0.6251 | |
| }, | |
| "semantic": { | |
| "p": 0.7028, | |
| "r": 0.9361, | |
| "f1": 0.8028 | |
| }, | |
| "procedural": { | |
| "p": 0.2022, | |
| "r": 0.9002, | |
| "f1": 0.3303 | |
| } | |
| } | |
| }, | |
| "0.45": { | |
| "macro_f1": 0.6298, | |
| "micro_f1": 0.7076, | |
| "per_class": { | |
| "episodic": { | |
| "p": 0.5229, | |
| "r": 0.8622, | |
| "f1": 0.651 | |
| }, | |
| "semantic": { | |
| "p": 0.731, | |
| "r": 0.8778, | |
| "f1": 0.7977 | |
| }, | |
| "procedural": { | |
| "p": 0.2992, | |
| "r": 0.8356, | |
| "f1": 0.4407 | |
| } | |
| } | |
| }, | |
| "0.5": { | |
| "macro_f1": 0.6664, | |
| "micro_f1": 0.7206, | |
| "per_class": { | |
| "episodic": { | |
| "p": 0.5776, | |
| "r": 0.7836, | |
| "f1": 0.665 | |
| }, | |
| "semantic": { | |
| "p": 0.768, | |
| "r": 0.7953, | |
| "f1": 0.7814 | |
| }, | |
| "procedural": { | |
| "p": 0.4275, | |
| "r": 0.7823, | |
| "f1": 0.5529 | |
| } | |
| } | |
| }, | |
| "0.55": { | |
| "macro_f1": 0.6742, | |
| "micro_f1": 0.7025, | |
| "per_class": { | |
| "episodic": { | |
| "p": 0.6265, | |
| "r": 0.6683, | |
| "f1": 0.6467 | |
| }, | |
| "semantic": { | |
| "p": 0.8096, | |
| "r": 0.6897, | |
| "f1": 0.7448 | |
| }, | |
| "procedural": { | |
| "p": 0.5518, | |
| "r": 0.737, | |
| "f1": 0.6311 | |
| } | |
| } | |
| }, | |
| "0.6": { | |
| "macro_f1": 0.6499, | |
| "micro_f1": 0.649, | |
| "per_class": { | |
| "episodic": { | |
| "p": 0.6763, | |
| "r": 0.5061, | |
| "f1": 0.579 | |
| }, | |
| "semantic": { | |
| "p": 0.8527, | |
| "r": 0.5654, | |
| "f1": 0.68 | |
| }, | |
| "procedural": { | |
| "p": 0.6646, | |
| "r": 0.7188, | |
| "f1": 0.6906 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } |