| { | |
| "best_global_step": 8265, | |
| "best_metric": 0.10672979801893234, | |
| "best_model_checkpoint": "modelos/treinados/modelo_bert_fato_teses_bert_multilingual_cased/checkpoint-8265", | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 11020, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.18148820326678766, | |
| "grad_norm": 0.9450770020484924, | |
| "learning_rate": 1.9094373865698732e-05, | |
| "loss": 0.3273, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.3629764065335753, | |
| "grad_norm": 1.8763504028320312, | |
| "learning_rate": 1.8186932849364793e-05, | |
| "loss": 0.213, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5444646098003629, | |
| "grad_norm": 6.744544982910156, | |
| "learning_rate": 1.7279491833030854e-05, | |
| "loss": 0.1823, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.7259528130671506, | |
| "grad_norm": 0.1076168492436409, | |
| "learning_rate": 1.6372050816696915e-05, | |
| "loss": 0.1568, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.9074410163339383, | |
| "grad_norm": 0.03117656148970127, | |
| "learning_rate": 1.5464609800362976e-05, | |
| "loss": 0.1619, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.9738704409363091, | |
| "eval_loss": 0.11249715089797974, | |
| "eval_runtime": 700.568, | |
| "eval_samples_per_second": 7.866, | |
| "eval_steps_per_second": 0.983, | |
| "step": 2755 | |
| }, | |
| { | |
| "epoch": 1.0889292196007259, | |
| "grad_norm": 0.010204868391156197, | |
| "learning_rate": 1.4557168784029038e-05, | |
| "loss": 0.1101, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.2704174228675136, | |
| "grad_norm": 0.07266418635845184, | |
| "learning_rate": 1.36497277676951e-05, | |
| "loss": 0.1146, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.4519056261343013, | |
| "grad_norm": 16.94893455505371, | |
| "learning_rate": 1.2742286751361164e-05, | |
| "loss": 0.0961, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.633393829401089, | |
| "grad_norm": 4.739220142364502, | |
| "learning_rate": 1.1834845735027225e-05, | |
| "loss": 0.0783, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.8148820326678767, | |
| "grad_norm": 0.09760759770870209, | |
| "learning_rate": 1.0927404718693286e-05, | |
| "loss": 0.0901, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.9963702359346642, | |
| "grad_norm": 7.626439571380615, | |
| "learning_rate": 1.0019963702359348e-05, | |
| "loss": 0.0863, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9740518962075848, | |
| "eval_loss": 0.1354905664920807, | |
| "eval_runtime": 699.3375, | |
| "eval_samples_per_second": 7.88, | |
| "eval_steps_per_second": 0.985, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 2.1778584392014517, | |
| "grad_norm": 0.00856301560997963, | |
| "learning_rate": 9.11252268602541e-06, | |
| "loss": 0.0625, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.3593466424682394, | |
| "grad_norm": 0.054624781012535095, | |
| "learning_rate": 8.20508166969147e-06, | |
| "loss": 0.0416, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.540834845735027, | |
| "grad_norm": 1.524115800857544, | |
| "learning_rate": 7.297640653357533e-06, | |
| "loss": 0.0478, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.722323049001815, | |
| "grad_norm": 0.007021903060376644, | |
| "learning_rate": 6.390199637023594e-06, | |
| "loss": 0.0378, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.9038112522686026, | |
| "grad_norm": 0.003797353943809867, | |
| "learning_rate": 5.4827586206896556e-06, | |
| "loss": 0.0482, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.9834875703139176, | |
| "eval_loss": 0.10672979801893234, | |
| "eval_runtime": 708.5009, | |
| "eval_samples_per_second": 7.778, | |
| "eval_steps_per_second": 0.972, | |
| "step": 8265 | |
| }, | |
| { | |
| "epoch": 3.0852994555353903, | |
| "grad_norm": 0.0551212877035141, | |
| "learning_rate": 4.575317604355717e-06, | |
| "loss": 0.0396, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 3.266787658802178, | |
| "grad_norm": 0.003503380110487342, | |
| "learning_rate": 3.6678765880217788e-06, | |
| "loss": 0.0258, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 3.4482758620689653, | |
| "grad_norm": 0.034748516976833344, | |
| "learning_rate": 2.7604355716878406e-06, | |
| "loss": 0.0168, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 3.629764065335753, | |
| "grad_norm": 0.28772714734077454, | |
| "learning_rate": 1.8529945553539021e-06, | |
| "loss": 0.0246, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 3.8112522686025407, | |
| "grad_norm": 0.008872357197105885, | |
| "learning_rate": 9.455535390199638e-07, | |
| "loss": 0.0172, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 3.9927404718693285, | |
| "grad_norm": 0.0023940089158713818, | |
| "learning_rate": 3.8112522686025416e-08, | |
| "loss": 0.0149, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9831246597713663, | |
| "eval_loss": 0.11994421482086182, | |
| "eval_runtime": 701.1801, | |
| "eval_samples_per_second": 7.86, | |
| "eval_steps_per_second": 0.983, | |
| "step": 11020 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 11020, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.319607890690048e+16, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |