| { |
| "best_metric": 20.872765506693366, |
| "best_model_checkpoint": "./routed_longformer_docmt/checkpoint-2358", |
| "epoch": 2.9980928162746343, |
| "eval_steps": 500, |
| "global_step": 2358, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.12714558169103624, |
| "grad_norm": 2.349264621734619, |
| "learning_rate": 2.875318066157761e-05, |
| "loss": 3.6286, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.25429116338207247, |
| "grad_norm": 1.9311673641204834, |
| "learning_rate": 2.7480916030534352e-05, |
| "loss": 2.9688, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.3814367450731087, |
| "grad_norm": 3.087153673171997, |
| "learning_rate": 2.6208651399491097e-05, |
| "loss": 2.7041, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.5085823267641449, |
| "grad_norm": 1.7739580869674683, |
| "learning_rate": 2.494910941475827e-05, |
| "loss": 2.5374, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.6357279084551812, |
| "grad_norm": 1.8503930568695068, |
| "learning_rate": 2.3676844783715012e-05, |
| "loss": 2.4237, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.7628734901462174, |
| "grad_norm": 1.7974401712417603, |
| "learning_rate": 2.2417302798982186e-05, |
| "loss": 2.3547, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.8900190718372537, |
| "grad_norm": 1.7522088289260864, |
| "learning_rate": 2.1157760814249364e-05, |
| "loss": 2.2865, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.9993642720915448, |
| "eval_bleu": 13.574521383946667, |
| "eval_comet": 0.3645593269124563, |
| "eval_loss": 2.1787571907043457, |
| "eval_meteor": 0.2980500580872142, |
| "eval_runtime": 789.446, |
| "eval_samples_per_second": 1.771, |
| "eval_steps_per_second": 0.222, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.0171646535282899, |
| "grad_norm": 1.9924530982971191, |
| "learning_rate": 1.989821882951654e-05, |
| "loss": 2.3328, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.1443102352193262, |
| "grad_norm": 1.6780940294265747, |
| "learning_rate": 1.8625954198473282e-05, |
| "loss": 2.2451, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.2714558169103625, |
| "grad_norm": 1.6908459663391113, |
| "learning_rate": 1.7353689567430024e-05, |
| "loss": 2.1888, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.3986013986013985, |
| "grad_norm": 1.6930561065673828, |
| "learning_rate": 1.6094147582697204e-05, |
| "loss": 2.1707, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.5257469802924348, |
| "grad_norm": 1.614856243133545, |
| "learning_rate": 1.4834605597964377e-05, |
| "loss": 2.1682, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.6528925619834711, |
| "grad_norm": 1.6593618392944336, |
| "learning_rate": 1.356234096692112e-05, |
| "loss": 2.1013, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.7800381436745072, |
| "grad_norm": 1.5323368310928345, |
| "learning_rate": 1.2290076335877863e-05, |
| "loss": 2.05, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.9071837253655435, |
| "grad_norm": 1.511836051940918, |
| "learning_rate": 1.1030534351145039e-05, |
| "loss": 2.0517, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_bleu": 19.3520092950325, |
| "eval_comet": 0.448593350012756, |
| "eval_loss": 1.8939152956008911, |
| "eval_meteor": 0.38384472834012545, |
| "eval_runtime": 785.5497, |
| "eval_samples_per_second": 1.78, |
| "eval_steps_per_second": 0.223, |
| "step": 1573 |
| }, |
| { |
| "epoch": 2.0343293070565798, |
| "grad_norm": 1.7614094018936157, |
| "learning_rate": 9.758269720101782e-06, |
| "loss": 2.0315, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.161474888747616, |
| "grad_norm": 1.6287394762039185, |
| "learning_rate": 8.486005089058525e-06, |
| "loss": 2.0192, |
| "step": 1700 |
| }, |
| { |
| "epoch": 2.2886204704386524, |
| "grad_norm": 1.5242124795913696, |
| "learning_rate": 7.213740458015268e-06, |
| "loss": 1.9957, |
| "step": 1800 |
| }, |
| { |
| "epoch": 2.4157660521296886, |
| "grad_norm": 1.535020351409912, |
| "learning_rate": 5.94147582697201e-06, |
| "loss": 1.9768, |
| "step": 1900 |
| }, |
| { |
| "epoch": 2.542911633820725, |
| "grad_norm": 1.5248209238052368, |
| "learning_rate": 4.669211195928753e-06, |
| "loss": 1.998, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.6700572155117612, |
| "grad_norm": 1.3780014514923096, |
| "learning_rate": 3.3969465648854963e-06, |
| "loss": 1.9695, |
| "step": 2100 |
| }, |
| { |
| "epoch": 2.797202797202797, |
| "grad_norm": 1.498318076133728, |
| "learning_rate": 2.124681933842239e-06, |
| "loss": 1.9396, |
| "step": 2200 |
| }, |
| { |
| "epoch": 2.9243483788938334, |
| "grad_norm": 1.5350223779678345, |
| "learning_rate": 8.524173027989822e-07, |
| "loss": 1.9429, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.9980928162746343, |
| "eval_bleu": 20.872765506693366, |
| "eval_comet": 0.4747417943821274, |
| "eval_loss": 1.8313450813293457, |
| "eval_meteor": 0.4023453750660353, |
| "eval_runtime": 787.214, |
| "eval_samples_per_second": 1.776, |
| "eval_steps_per_second": 0.222, |
| "step": 2358 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 2358, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 2, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|