| { | |
| "best_metric": 0.02908700704574585, | |
| "best_model_checkpoint": "results_bert-base-italian-xxl-cased/epoch20_bs64/checkpoint-72", | |
| "epoch": 7.0, | |
| "eval_steps": 500, | |
| "global_step": 252, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.8602136373519897, | |
| "learning_rate": 4.75e-05, | |
| "loss": 0.1559, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accurracy": { | |
| "accuracy": 0.9891317050368119 | |
| }, | |
| "eval_f1": [ | |
| 0.9865353988707108, | |
| 0.8812260536398467, | |
| 1.0 | |
| ], | |
| "eval_loss": 0.030727442353963852, | |
| "eval_precision": [ | |
| 0.990119151409474, | |
| 0.8539603960396039, | |
| 1.0 | |
| ], | |
| "eval_recall": [ | |
| 0.9829774956722447, | |
| 0.9102902374670184, | |
| 1.0 | |
| ], | |
| "eval_runtime": 3.4924, | |
| "eval_samples_per_second": 56.981, | |
| "eval_steps_per_second": 1.145, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.7721014618873596, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.0329, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accurracy": { | |
| "accuracy": 0.9892485684235129 | |
| }, | |
| "eval_f1": [ | |
| 0.9867511520737328, | |
| 0.8766756032171582, | |
| 1.0 | |
| ], | |
| "eval_loss": 0.02908700704574585, | |
| "eval_precision": [ | |
| 0.9850488786659, | |
| 0.8910081743869209, | |
| 1.0 | |
| ], | |
| "eval_recall": [ | |
| 0.9884593190998269, | |
| 0.862796833773087, | |
| 1.0 | |
| ], | |
| "eval_runtime": 3.6114, | |
| "eval_samples_per_second": 55.104, | |
| "eval_steps_per_second": 1.108, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.09961529076099396, | |
| "learning_rate": 4.25e-05, | |
| "loss": 0.0245, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accurracy": { | |
| "accuracy": 0.9887811148767092 | |
| }, | |
| "eval_f1": [ | |
| 0.9860909881193857, | |
| 0.8781725888324873, | |
| 1.0 | |
| ], | |
| "eval_loss": 0.030218517407774925, | |
| "eval_precision": [ | |
| 0.990395809080326, | |
| 0.8459657701711492, | |
| 1.0 | |
| ], | |
| "eval_recall": [ | |
| 0.9818234275822274, | |
| 0.9129287598944591, | |
| 1.0 | |
| ], | |
| "eval_runtime": 4.3781, | |
| "eval_samples_per_second": 45.454, | |
| "eval_steps_per_second": 0.914, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.6222161054611206, | |
| "learning_rate": 4e-05, | |
| "loss": 0.0193, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accurracy": { | |
| "accuracy": 0.9899497487437185 | |
| }, | |
| "eval_f1": [ | |
| 0.9876187733947596, | |
| 0.8844086021505376, | |
| 1.0 | |
| ], | |
| "eval_loss": 0.03514599800109863, | |
| "eval_precision": [ | |
| 0.985632183908046, | |
| 0.9013698630136986, | |
| 1.0 | |
| ], | |
| "eval_recall": [ | |
| 0.9896133871898442, | |
| 0.8680738786279684, | |
| 1.0 | |
| ], | |
| "eval_runtime": 4.0342, | |
| "eval_samples_per_second": 49.328, | |
| "eval_steps_per_second": 0.992, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.7397810220718384, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 0.016, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accurracy": { | |
| "accuracy": 0.9892485684235129 | |
| }, | |
| "eval_f1": [ | |
| 0.9867052023121388, | |
| 0.8805194805194805, | |
| 1.0 | |
| ], | |
| "eval_loss": 0.03297892585396767, | |
| "eval_precision": [ | |
| 0.9884192240880139, | |
| 0.8670076726342711, | |
| 1.0 | |
| ], | |
| "eval_recall": [ | |
| 0.9849971148297749, | |
| 0.8944591029023746, | |
| 1.0 | |
| ], | |
| "eval_runtime": 3.7497, | |
| "eval_samples_per_second": 53.071, | |
| "eval_steps_per_second": 1.067, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.4494711756706238, | |
| "learning_rate": 3.5e-05, | |
| "loss": 0.0125, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accurracy": { | |
| "accuracy": 0.9891317050368119 | |
| }, | |
| "eval_f1": [ | |
| 0.9865781498051667, | |
| 0.8777923784494087, | |
| 1.0 | |
| ], | |
| "eval_loss": 0.03959830850362778, | |
| "eval_precision": [ | |
| 0.9870054865723361, | |
| 0.8743455497382199, | |
| 1.0 | |
| ], | |
| "eval_recall": [ | |
| 0.9861511829197923, | |
| 0.8812664907651715, | |
| 1.0 | |
| ], | |
| "eval_runtime": 3.3105, | |
| "eval_samples_per_second": 60.112, | |
| "eval_steps_per_second": 1.208, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 0.7388216853141785, | |
| "learning_rate": 3.2500000000000004e-05, | |
| "loss": 0.0111, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accurracy": { | |
| "accuracy": 0.9893654318102139 | |
| }, | |
| "eval_f1": [ | |
| 0.9868211440984793, | |
| 0.8840764331210191, | |
| 1.0 | |
| ], | |
| "eval_loss": 0.04391070082783699, | |
| "eval_precision": [ | |
| 0.9906949694678686, | |
| 0.854679802955665, | |
| 1.0 | |
| ], | |
| "eval_recall": [ | |
| 0.9829774956722447, | |
| 0.9155672823218998, | |
| 1.0 | |
| ], | |
| "eval_runtime": 3.3481, | |
| "eval_samples_per_second": 59.437, | |
| "eval_steps_per_second": 1.195, | |
| "step": 252 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 720, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 328679588043576.0, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |