{ "best_metric": 0.03481420502066612, "best_model_checkpoint": "results_bert-base-french-europeana-cased/epoch20_bs64/checkpoint-48", "epoch": 11.0, "eval_steps": 500, "global_step": 176, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.47372615337371826, "learning_rate": 4.75e-05, "loss": 0.3883, "step": 16 }, { "epoch": 1.0, "eval_accurracy": { "accuracy": 0.9764285714285714 }, "eval_f1": [ 0.9799147900182593, 0.8114285714285714, 1.0 ], "eval_loss": 0.08146535605192184, "eval_precision": [ 0.971049457177322, 0.8875, 1.0 ], "eval_recall": [ 0.9889434889434889, 0.7473684210526316, 1.0 ], "eval_runtime": 4.1114, "eval_samples_per_second": 12.161, "eval_steps_per_second": 0.243, "step": 16 }, { "epoch": 2.0, "grad_norm": 0.4207443296909332, "learning_rate": 4.5e-05, "loss": 0.0566, "step": 32 }, { "epoch": 2.0, "eval_accurracy": { "accuracy": 0.9878571428571429 }, "eval_f1": [ 0.9895641497851443, 0.91005291005291, 1.0 ], "eval_loss": 0.04085615277290344, "eval_precision": [ 0.9889570552147239, 0.9148936170212766, 1.0 ], "eval_recall": [ 0.9901719901719902, 0.9052631578947369, 1.0 ], "eval_runtime": 3.9019, "eval_samples_per_second": 12.814, "eval_steps_per_second": 0.256, "step": 32 }, { "epoch": 3.0, "grad_norm": 0.4004434049129486, "learning_rate": 4.25e-05, "loss": 0.0291, "step": 48 }, { "epoch": 3.0, "eval_accurracy": { "accuracy": 0.99 }, "eval_f1": [ 0.991389913899139, 0.9270833333333334, 1.0 ], "eval_loss": 0.03481420502066612, "eval_precision": [ 0.9926108374384236, 0.9175257731958762, 1.0 ], "eval_recall": [ 0.9901719901719902, 0.9368421052631579, 1.0 ], "eval_runtime": 3.5383, "eval_samples_per_second": 14.131, "eval_steps_per_second": 0.283, "step": 48 }, { "epoch": 4.0, "grad_norm": 0.3703693747520447, "learning_rate": 4e-05, "loss": 0.0169, "step": 64 }, { "epoch": 4.0, "eval_accurracy": { "accuracy": 0.9885714285714285 }, "eval_f1": [ 0.9901719901719902, 0.9157894736842105, 1.0 ], "eval_loss": 0.03665424883365631, "eval_precision": [ 0.9901719901719902, 0.9157894736842105, 1.0 ], "eval_recall": [ 0.9901719901719902, 0.9157894736842105, 1.0 ], "eval_runtime": 3.4858, "eval_samples_per_second": 14.344, "eval_steps_per_second": 0.287, "step": 64 }, { "epoch": 5.0, "grad_norm": 0.3292807936668396, "learning_rate": 3.7500000000000003e-05, "loss": 0.0109, "step": 80 }, { "epoch": 5.0, "eval_accurracy": { "accuracy": 0.9878571428571429 }, "eval_f1": [ 0.9894867037724181, 0.9154228855721394, 1.0 ], "eval_loss": 0.03761892020702362, "eval_precision": [ 0.9962640099626401, 0.8679245283018868, 1.0 ], "eval_recall": [ 0.9828009828009828, 0.968421052631579, 1.0 ], "eval_runtime": 3.6656, "eval_samples_per_second": 13.64, "eval_steps_per_second": 0.273, "step": 80 }, { "epoch": 6.0, "grad_norm": 0.17665812373161316, "learning_rate": 3.5e-05, "loss": 0.0073, "step": 96 }, { "epoch": 6.0, "eval_accurracy": { "accuracy": 0.9878571428571429 }, "eval_f1": [ 0.9895897121861604, 0.9081081081081082, 1.0 ], "eval_loss": 0.04045535624027252, "eval_precision": [ 0.9865689865689866, 0.9333333333333333, 1.0 ], "eval_recall": [ 0.9926289926289926, 0.8842105263157894, 1.0 ], "eval_runtime": 3.1662, "eval_samples_per_second": 15.792, "eval_steps_per_second": 0.316, "step": 96 }, { "epoch": 7.0, "grad_norm": 0.46033716201782227, "learning_rate": 3.2500000000000004e-05, "loss": 0.0043, "step": 112 }, { "epoch": 7.0, "eval_accurracy": { "accuracy": 0.9885714285714285 }, "eval_f1": [ 0.9901840490797545, 0.9148936170212766, 1.0 ], "eval_loss": 0.0450633242726326, "eval_precision": [ 0.9889705882352942, 0.9247311827956989, 1.0 ], "eval_recall": [ 0.9914004914004914, 0.9052631578947369, 1.0 ], "eval_runtime": 3.1686, "eval_samples_per_second": 15.78, "eval_steps_per_second": 0.316, "step": 112 }, { "epoch": 8.0, "grad_norm": 0.7053983807563782, "learning_rate": 3e-05, "loss": 0.0029, "step": 128 }, { "epoch": 8.0, "eval_accurracy": { "accuracy": 0.9871428571428571 }, "eval_f1": [ 0.9889570552147239, 0.9042553191489362, 1.0 ], "eval_loss": 0.055573731660842896, "eval_precision": [ 0.9877450980392157, 0.9139784946236559, 1.0 ], "eval_recall": [ 0.9901719901719902, 0.8947368421052632, 1.0 ], "eval_runtime": 3.4353, "eval_samples_per_second": 14.555, "eval_steps_per_second": 0.291, "step": 128 }, { "epoch": 9.0, "grad_norm": 0.16288042068481445, "learning_rate": 2.7500000000000004e-05, "loss": 0.0023, "step": 144 }, { "epoch": 9.0, "eval_accurracy": { "accuracy": 0.9892857142857143 }, "eval_f1": [ 0.9908144519289651, 0.918918918918919, 1.0 ], "eval_loss": 0.05289832502603531, "eval_precision": [ 0.9877899877899878, 0.9444444444444444, 1.0 ], "eval_recall": [ 0.9938574938574939, 0.8947368421052632, 1.0 ], "eval_runtime": 3.8415, "eval_samples_per_second": 13.016, "eval_steps_per_second": 0.26, "step": 144 }, { "epoch": 10.0, "grad_norm": 0.19770997762680054, "learning_rate": 2.5e-05, "loss": 0.0021, "step": 160 }, { "epoch": 10.0, "eval_accurracy": { "accuracy": 0.9892857142857143 }, "eval_f1": [ 0.990791896869245, 0.9206349206349206, 1.0 ], "eval_loss": 0.04967246204614639, "eval_precision": [ 0.9901840490797545, 0.925531914893617, 1.0 ], "eval_recall": [ 0.9914004914004914, 0.9157894736842105, 1.0 ], "eval_runtime": 3.3345, "eval_samples_per_second": 14.995, "eval_steps_per_second": 0.3, "step": 160 }, { "epoch": 11.0, "grad_norm": 0.07273274660110474, "learning_rate": 2.25e-05, "loss": 0.0013, "step": 176 }, { "epoch": 11.0, "eval_accurracy": { "accuracy": 0.9921428571428571 }, "eval_f1": [ 0.9932390903503381, 0.9424083769633508, 1.0 ], "eval_loss": 0.04350681230425835, "eval_precision": [ 0.993849938499385, 0.9375, 1.0 ], "eval_recall": [ 0.9926289926289926, 0.9473684210526315, 1.0 ], "eval_runtime": 3.3327, "eval_samples_per_second": 15.003, "eval_steps_per_second": 0.3, "step": 176 } ], "logging_steps": 500, "max_steps": 320, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 168415447860000.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }