| { |
| "best_global_step": 1560, |
| "best_metric": 0.2530927062034607, |
| "best_model_checkpoint": "/content/drive/MyDrive/train_results/results_t5/afriteva_small-lora/checkpoint-1560", |
| "epoch": 4.9856, |
| "eval_steps": 500, |
| "global_step": 1560, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.5537114143371582, |
| "learning_rate": 0.00019371794871794873, |
| "loss": 2.1644, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.45213383436203003, |
| "learning_rate": 0.00018730769230769232, |
| "loss": 0.5264, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.5010946393013, |
| "learning_rate": 0.00018089743589743592, |
| "loss": 0.4825, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.564462423324585, |
| "learning_rate": 0.00017448717948717948, |
| "loss": 0.4543, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.4424746334552765, |
| "learning_rate": 0.00016807692307692308, |
| "loss": 0.4352, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.5562136173248291, |
| "learning_rate": 0.00016166666666666668, |
| "loss": 0.4672, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_bleu": 0.08270094772255246, |
| "eval_cer": 1.8354712455333493, |
| "eval_f1": 0.3748111649810917, |
| "eval_loss": 0.3291796147823334, |
| "eval_meteor": 0.23279765588863582, |
| "eval_runtime": 511.3288, |
| "eval_samples_per_second": 1.956, |
| "eval_steps_per_second": 0.244, |
| "eval_wer": 2.0786608462932272, |
| "step": 313 |
| }, |
| { |
| "epoch": 1.1184, |
| "grad_norm": 0.5687721967697144, |
| "learning_rate": 0.00015525641025641027, |
| "loss": 0.4037, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.2784, |
| "grad_norm": 0.6549445390701294, |
| "learning_rate": 0.00014884615384615387, |
| "loss": 0.4223, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.4384000000000001, |
| "grad_norm": 0.4887824058532715, |
| "learning_rate": 0.00014243589743589746, |
| "loss": 0.4199, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.5984, |
| "grad_norm": 0.9961332082748413, |
| "learning_rate": 0.00013602564102564103, |
| "loss": 0.398, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.7584, |
| "grad_norm": 0.5644873380661011, |
| "learning_rate": 0.00012961538461538462, |
| "loss": 0.4103, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.9184, |
| "grad_norm": 0.657461404800415, |
| "learning_rate": 0.00012320512820512822, |
| "loss": 0.3996, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_bleu": 0.1514995756426066, |
| "eval_cer": 1.0595959091289082, |
| "eval_f1": 0.4719226776684196, |
| "eval_loss": 0.28840696811676025, |
| "eval_meteor": 0.33743098014851813, |
| "eval_runtime": 423.4149, |
| "eval_samples_per_second": 2.362, |
| "eval_steps_per_second": 0.295, |
| "eval_wer": 1.215563584359524, |
| "step": 626 |
| }, |
| { |
| "epoch": 2.0768, |
| "grad_norm": 0.5938355326652527, |
| "learning_rate": 0.00011679487179487181, |
| "loss": 0.3942, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.2368, |
| "grad_norm": 0.6994353532791138, |
| "learning_rate": 0.0001103846153846154, |
| "loss": 0.3769, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.3968, |
| "grad_norm": 0.5972464680671692, |
| "learning_rate": 0.00010397435897435899, |
| "loss": 0.3852, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.5568, |
| "grad_norm": 0.5828729867935181, |
| "learning_rate": 9.756410256410257e-05, |
| "loss": 0.3734, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.7168, |
| "grad_norm": 0.5360562205314636, |
| "learning_rate": 9.115384615384615e-05, |
| "loss": 0.3733, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.8768000000000002, |
| "grad_norm": 0.8126237392425537, |
| "learning_rate": 8.474358974358975e-05, |
| "loss": 0.3894, |
| "step": 900 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_bleu": 0.21915803298526138, |
| "eval_cer": 0.7264520084511871, |
| "eval_f1": 0.5427453765644922, |
| "eval_loss": 0.26605311036109924, |
| "eval_meteor": 0.42146616013366456, |
| "eval_runtime": 356.3085, |
| "eval_samples_per_second": 2.807, |
| "eval_steps_per_second": 0.351, |
| "eval_wer": 0.899910051039441, |
| "step": 939 |
| }, |
| { |
| "epoch": 3.0352, |
| "grad_norm": 0.5388778448104858, |
| "learning_rate": 7.833333333333333e-05, |
| "loss": 0.3593, |
| "step": 950 |
| }, |
| { |
| "epoch": 3.1952, |
| "grad_norm": 0.5763868689537048, |
| "learning_rate": 7.192307692307693e-05, |
| "loss": 0.3458, |
| "step": 1000 |
| }, |
| { |
| "epoch": 3.3552, |
| "grad_norm": 0.6290721893310547, |
| "learning_rate": 6.551282051282052e-05, |
| "loss": 0.3717, |
| "step": 1050 |
| }, |
| { |
| "epoch": 3.5152, |
| "grad_norm": 0.6682249903678894, |
| "learning_rate": 5.910256410256411e-05, |
| "loss": 0.3753, |
| "step": 1100 |
| }, |
| { |
| "epoch": 3.6752000000000002, |
| "grad_norm": 0.6957021951675415, |
| "learning_rate": 5.26923076923077e-05, |
| "loss": 0.3488, |
| "step": 1150 |
| }, |
| { |
| "epoch": 3.8352, |
| "grad_norm": 0.7361820936203003, |
| "learning_rate": 4.6282051282051287e-05, |
| "loss": 0.3457, |
| "step": 1200 |
| }, |
| { |
| "epoch": 3.9952, |
| "grad_norm": 0.8249253034591675, |
| "learning_rate": 3.9871794871794875e-05, |
| "loss": 0.3807, |
| "step": 1250 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_bleu": 0.24594694557092867, |
| "eval_cer": 0.6551059613798427, |
| "eval_f1": 0.568844715510053, |
| "eval_loss": 0.25580132007598877, |
| "eval_meteor": 0.4567788944718627, |
| "eval_runtime": 348.0291, |
| "eval_samples_per_second": 2.873, |
| "eval_steps_per_second": 0.359, |
| "eval_wer": 0.825159759352005, |
| "step": 1252 |
| }, |
| { |
| "epoch": 4.1536, |
| "grad_norm": 0.5967223644256592, |
| "learning_rate": 3.346153846153846e-05, |
| "loss": 0.3382, |
| "step": 1300 |
| }, |
| { |
| "epoch": 4.3136, |
| "grad_norm": 0.7052249312400818, |
| "learning_rate": 2.705128205128205e-05, |
| "loss": 0.3547, |
| "step": 1350 |
| }, |
| { |
| "epoch": 4.4736, |
| "grad_norm": 0.5930851697921753, |
| "learning_rate": 2.064102564102564e-05, |
| "loss": 0.3653, |
| "step": 1400 |
| }, |
| { |
| "epoch": 4.6336, |
| "grad_norm": 0.6970656514167786, |
| "learning_rate": 1.423076923076923e-05, |
| "loss": 0.3452, |
| "step": 1450 |
| }, |
| { |
| "epoch": 4.7936, |
| "grad_norm": 0.6488224267959595, |
| "learning_rate": 7.82051282051282e-06, |
| "loss": 0.3574, |
| "step": 1500 |
| }, |
| { |
| "epoch": 4.9536, |
| "grad_norm": 0.6191901564598083, |
| "learning_rate": 1.4102564102564104e-06, |
| "loss": 0.3494, |
| "step": 1550 |
| }, |
| { |
| "epoch": 4.9856, |
| "eval_bleu": 0.2540393999718943, |
| "eval_cer": 0.5949038795235092, |
| "eval_f1": 0.5773283287693705, |
| "eval_loss": 0.2530927062034607, |
| "eval_meteor": 0.46690054172490514, |
| "eval_runtime": 345.3776, |
| "eval_samples_per_second": 2.895, |
| "eval_steps_per_second": 0.362, |
| "eval_wer": 0.7849787451961843, |
| "step": 1560 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 1560, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1709484225331200.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|