| { | |
| "best_global_step": 8000, | |
| "best_metric": 0.8765376300834912, | |
| "best_model_checkpoint": "E:/ModernBERT_finetune_data/models/reranker-ModernBERT-base-s2orc\\checkpoint-8000", | |
| "epoch": 0.23087356786239935, | |
| "eval_steps": 4000, | |
| "global_step": 8000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 2.885919598279992e-05, | |
| "grad_norm": 7.188746452331543, | |
| "learning_rate": 0.0, | |
| "loss": 1.0682, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.01442959799139996, | |
| "grad_norm": 13.780556678771973, | |
| "learning_rate": 2.8793998845931915e-06, | |
| "loss": 1.1555, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.02885919598279992, | |
| "grad_norm": 52.33120346069336, | |
| "learning_rate": 5.76457010963647e-06, | |
| "loss": 0.7743, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.04328879397419988, | |
| "grad_norm": 2.325308322906494, | |
| "learning_rate": 8.649740334679747e-06, | |
| "loss": 0.538, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.05771839196559984, | |
| "grad_norm": 7.630313396453857, | |
| "learning_rate": 1.1534910559723025e-05, | |
| "loss": 0.5771, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.0721479899569998, | |
| "grad_norm": 0.15915359556674957, | |
| "learning_rate": 1.4420080784766301e-05, | |
| "loss": 0.5345, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.08657758794839976, | |
| "grad_norm": 45.458744049072266, | |
| "learning_rate": 1.730525100980958e-05, | |
| "loss": 0.4394, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.10100718593979972, | |
| "grad_norm": 1.0640169382095337, | |
| "learning_rate": 1.9978835978835978e-05, | |
| "loss": 0.4607, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.11543678393119967, | |
| "grad_norm": 0.10893326252698898, | |
| "learning_rate": 1.9658168991502328e-05, | |
| "loss": 0.3866, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.11543678393119967, | |
| "eval_NanoBEIR_R100_mean_base_map": 0.4895766320756843, | |
| "eval_NanoBEIR_R100_mean_base_mrr@10": 0.4775, | |
| "eval_NanoBEIR_R100_mean_base_ndcg@10": 0.5404259879670522, | |
| "eval_NanoBEIR_R100_mean_map": 0.49520150363958415, | |
| "eval_NanoBEIR_R100_mean_mrr@10": 0.48177777777777775, | |
| "eval_NanoBEIR_R100_mean_ndcg@10": 0.5468622576263062, | |
| "eval_NanoMSMARCO_R100_base_map": 0.4895766320756843, | |
| "eval_NanoMSMARCO_R100_base_mrr@10": 0.4775, | |
| "eval_NanoMSMARCO_R100_base_ndcg@10": 0.5404259879670522, | |
| "eval_NanoMSMARCO_R100_map": 0.49520150363958415, | |
| "eval_NanoMSMARCO_R100_mrr@10": 0.48177777777777775, | |
| "eval_NanoMSMARCO_R100_ndcg@10": 0.5468622576263062, | |
| "eval_runtime": 6161.5831, | |
| "eval_s2orc-dev_base_map": 0.7378771185598135, | |
| "eval_s2orc-dev_base_mrr@10": 0.7359269841269841, | |
| "eval_s2orc-dev_base_ndcg@10": 0.7659559959785154, | |
| "eval_s2orc-dev_map": 0.8617319597069597, | |
| "eval_s2orc-dev_mrr@10": 0.8613789682539682, | |
| "eval_s2orc-dev_ndcg@10": 0.8684642495142619, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.5468622576263062, | |
| "eval_steps_per_second": 0.0, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.12986638192259964, | |
| "grad_norm": 3.2822253704071045, | |
| "learning_rate": 1.9337502004168674e-05, | |
| "loss": 0.4222, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.1442959799139996, | |
| "grad_norm": 0.163385272026062, | |
| "learning_rate": 1.9016835016835017e-05, | |
| "loss": 0.3734, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.15872557790539957, | |
| "grad_norm": 1.1301782131195068, | |
| "learning_rate": 1.8696168029501366e-05, | |
| "loss": 0.3558, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.17315517589679952, | |
| "grad_norm": 26.550891876220703, | |
| "learning_rate": 1.837550104216771e-05, | |
| "loss": 0.3968, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.18758477388819947, | |
| "grad_norm": 52.34437942504883, | |
| "learning_rate": 1.8054834054834055e-05, | |
| "loss": 0.3203, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.20201437187959945, | |
| "grad_norm": 0.1360524594783783, | |
| "learning_rate": 1.77341670675004e-05, | |
| "loss": 0.3354, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.2164439698709994, | |
| "grad_norm": 0.3569001853466034, | |
| "learning_rate": 1.7413500080166747e-05, | |
| "loss": 0.3579, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.23087356786239935, | |
| "grad_norm": 0.017899315804243088, | |
| "learning_rate": 1.7092833092833094e-05, | |
| "loss": 0.3349, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.23087356786239935, | |
| "eval_NanoBEIR_R100_mean_base_map": 0.4895766320756843, | |
| "eval_NanoBEIR_R100_mean_base_mrr@10": 0.4775, | |
| "eval_NanoBEIR_R100_mean_base_ndcg@10": 0.5404259879670522, | |
| "eval_NanoBEIR_R100_mean_map": 0.49410785900248205, | |
| "eval_NanoBEIR_R100_mean_mrr@10": 0.482047619047619, | |
| "eval_NanoBEIR_R100_mean_ndcg@10": 0.5528612484142966, | |
| "eval_NanoMSMARCO_R100_base_map": 0.4895766320756843, | |
| "eval_NanoMSMARCO_R100_base_mrr@10": 0.4775, | |
| "eval_NanoMSMARCO_R100_base_ndcg@10": 0.5404259879670522, | |
| "eval_NanoMSMARCO_R100_map": 0.49410785900248205, | |
| "eval_NanoMSMARCO_R100_mrr@10": 0.482047619047619, | |
| "eval_NanoMSMARCO_R100_ndcg@10": 0.5528612484142966, | |
| "eval_runtime": 4663.3621, | |
| "eval_s2orc-dev_base_map": 0.7378771185598135, | |
| "eval_s2orc-dev_base_mrr@10": 0.7359269841269841, | |
| "eval_s2orc-dev_base_ndcg@10": 0.7659559959785154, | |
| "eval_s2orc-dev_map": 0.8712208333333333, | |
| "eval_s2orc-dev_mrr@10": 0.871075, | |
| "eval_s2orc-dev_ndcg@10": 0.8765376300834912, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.5528612484142966, | |
| "eval_steps_per_second": 0.0, | |
| "step": 8000 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 34651, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 4000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |