{ "best_metric": 0.02908700704574585, "best_model_checkpoint": "results_bert-base-italian-xxl-cased/epoch20_bs64/checkpoint-72", "epoch": 7.0, "eval_steps": 500, "global_step": 252, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.8602136373519897, "learning_rate": 4.75e-05, "loss": 0.1559, "step": 36 }, { "epoch": 1.0, "eval_accurracy": { "accuracy": 0.9891317050368119 }, "eval_f1": [ 0.9865353988707108, 0.8812260536398467, 1.0 ], "eval_loss": 0.030727442353963852, "eval_precision": [ 0.990119151409474, 0.8539603960396039, 1.0 ], "eval_recall": [ 0.9829774956722447, 0.9102902374670184, 1.0 ], "eval_runtime": 3.4924, "eval_samples_per_second": 56.981, "eval_steps_per_second": 1.145, "step": 36 }, { "epoch": 2.0, "grad_norm": 0.7721014618873596, "learning_rate": 4.5e-05, "loss": 0.0329, "step": 72 }, { "epoch": 2.0, "eval_accurracy": { "accuracy": 0.9892485684235129 }, "eval_f1": [ 0.9867511520737328, 0.8766756032171582, 1.0 ], "eval_loss": 0.02908700704574585, "eval_precision": [ 0.9850488786659, 0.8910081743869209, 1.0 ], "eval_recall": [ 0.9884593190998269, 0.862796833773087, 1.0 ], "eval_runtime": 3.6114, "eval_samples_per_second": 55.104, "eval_steps_per_second": 1.108, "step": 72 }, { "epoch": 3.0, "grad_norm": 0.09961529076099396, "learning_rate": 4.25e-05, "loss": 0.0245, "step": 108 }, { "epoch": 3.0, "eval_accurracy": { "accuracy": 0.9887811148767092 }, "eval_f1": [ 0.9860909881193857, 0.8781725888324873, 1.0 ], "eval_loss": 0.030218517407774925, "eval_precision": [ 0.990395809080326, 0.8459657701711492, 1.0 ], "eval_recall": [ 0.9818234275822274, 0.9129287598944591, 1.0 ], "eval_runtime": 4.3781, "eval_samples_per_second": 45.454, "eval_steps_per_second": 0.914, "step": 108 }, { "epoch": 4.0, "grad_norm": 0.6222161054611206, "learning_rate": 4e-05, "loss": 0.0193, "step": 144 }, { "epoch": 4.0, "eval_accurracy": { "accuracy": 0.9899497487437185 }, "eval_f1": [ 0.9876187733947596, 0.8844086021505376, 1.0 ], "eval_loss": 0.03514599800109863, "eval_precision": [ 0.985632183908046, 0.9013698630136986, 1.0 ], "eval_recall": [ 0.9896133871898442, 0.8680738786279684, 1.0 ], "eval_runtime": 4.0342, "eval_samples_per_second": 49.328, "eval_steps_per_second": 0.992, "step": 144 }, { "epoch": 5.0, "grad_norm": 0.7397810220718384, "learning_rate": 3.7500000000000003e-05, "loss": 0.016, "step": 180 }, { "epoch": 5.0, "eval_accurracy": { "accuracy": 0.9892485684235129 }, "eval_f1": [ 0.9867052023121388, 0.8805194805194805, 1.0 ], "eval_loss": 0.03297892585396767, "eval_precision": [ 0.9884192240880139, 0.8670076726342711, 1.0 ], "eval_recall": [ 0.9849971148297749, 0.8944591029023746, 1.0 ], "eval_runtime": 3.7497, "eval_samples_per_second": 53.071, "eval_steps_per_second": 1.067, "step": 180 }, { "epoch": 6.0, "grad_norm": 0.4494711756706238, "learning_rate": 3.5e-05, "loss": 0.0125, "step": 216 }, { "epoch": 6.0, "eval_accurracy": { "accuracy": 0.9891317050368119 }, "eval_f1": [ 0.9865781498051667, 0.8777923784494087, 1.0 ], "eval_loss": 0.03959830850362778, "eval_precision": [ 0.9870054865723361, 0.8743455497382199, 1.0 ], "eval_recall": [ 0.9861511829197923, 0.8812664907651715, 1.0 ], "eval_runtime": 3.3105, "eval_samples_per_second": 60.112, "eval_steps_per_second": 1.208, "step": 216 }, { "epoch": 7.0, "grad_norm": 0.7388216853141785, "learning_rate": 3.2500000000000004e-05, "loss": 0.0111, "step": 252 }, { "epoch": 7.0, "eval_accurracy": { "accuracy": 0.9893654318102139 }, "eval_f1": [ 0.9868211440984793, 0.8840764331210191, 1.0 ], "eval_loss": 0.04391070082783699, "eval_precision": [ 0.9906949694678686, 0.854679802955665, 1.0 ], "eval_recall": [ 0.9829774956722447, 0.9155672823218998, 1.0 ], "eval_runtime": 3.3481, "eval_samples_per_second": 59.437, "eval_steps_per_second": 1.195, "step": 252 } ], "logging_steps": 500, "max_steps": 720, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 328679588043576.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }