| { |
| "best_global_step": 326, |
| "best_metric": 1.5841516256332397, |
| "best_model_checkpoint": "results_t5base/checkpoint-326", |
| "epoch": 2.0, |
| "eval_steps": 200, |
| "global_step": 326, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.06134969325153374, |
| "grad_norm": 116.47437286376953, |
| "learning_rate": 1.8e-05, |
| "loss": 15.8526, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.12269938650306748, |
| "grad_norm": 36.57353591918945, |
| "learning_rate": 3.8e-05, |
| "loss": 11.7488, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.18404907975460122, |
| "grad_norm": 32.9762077331543, |
| "learning_rate": 5.800000000000001e-05, |
| "loss": 10.3287, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.24539877300613497, |
| "grad_norm": 23.83042335510254, |
| "learning_rate": 7.8e-05, |
| "loss": 6.4168, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.3067484662576687, |
| "grad_norm": 4.865868091583252, |
| "learning_rate": 9.800000000000001e-05, |
| "loss": 3.5254, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.36809815950920244, |
| "grad_norm": 12.269708633422852, |
| "learning_rate": 0.000118, |
| "loss": 3.0389, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.4294478527607362, |
| "grad_norm": 7.369626998901367, |
| "learning_rate": 0.00013800000000000002, |
| "loss": 2.5275, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.49079754601226994, |
| "grad_norm": 2.0186314582824707, |
| "learning_rate": 0.000158, |
| "loss": 2.1442, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.5521472392638037, |
| "grad_norm": 1.231919527053833, |
| "learning_rate": 0.000178, |
| "loss": 1.8347, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.6134969325153374, |
| "grad_norm": 1.3266997337341309, |
| "learning_rate": 0.00019800000000000002, |
| "loss": 1.7494, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.6748466257668712, |
| "grad_norm": 1.498828649520874, |
| "learning_rate": 0.000218, |
| "loss": 1.8056, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.7361963190184049, |
| "grad_norm": 0.9476116299629211, |
| "learning_rate": 0.00023799999999999998, |
| "loss": 1.7621, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.7975460122699386, |
| "grad_norm": 1.154313564300537, |
| "learning_rate": 0.00025800000000000004, |
| "loss": 1.7556, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.8588957055214724, |
| "grad_norm": 0.7778469920158386, |
| "learning_rate": 0.00027800000000000004, |
| "loss": 1.7215, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.9202453987730062, |
| "grad_norm": 0.8308514356613159, |
| "learning_rate": 0.000298, |
| "loss": 1.6761, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.9815950920245399, |
| "grad_norm": 1.156718373298645, |
| "learning_rate": 0.00031800000000000003, |
| "loss": 1.6865, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_gen_len": 187.0146, |
| "eval_loss": 1.5963470935821533, |
| "eval_rouge1": 0.6129, |
| "eval_rouge2": 0.2686, |
| "eval_rougeL": 0.4794, |
| "eval_runtime": 30.837, |
| "eval_samples_per_second": 8.885, |
| "eval_steps_per_second": 1.135, |
| "step": 163 |
| }, |
| { |
| "epoch": 1.0429447852760736, |
| "grad_norm": 0.8923042416572571, |
| "learning_rate": 0.00033800000000000003, |
| "loss": 1.6244, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.1042944785276074, |
| "grad_norm": 1.0038191080093384, |
| "learning_rate": 0.000358, |
| "loss": 1.6075, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.165644171779141, |
| "grad_norm": 0.9305262565612793, |
| "learning_rate": 0.000378, |
| "loss": 1.6722, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.2269938650306749, |
| "grad_norm": 1.2464691400527954, |
| "learning_rate": 0.000398, |
| "loss": 1.5723, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.2883435582822087, |
| "grad_norm": 1.017632246017456, |
| "learning_rate": 0.00041799999999999997, |
| "loss": 1.5711, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.3496932515337423, |
| "grad_norm": 1.2981927394866943, |
| "learning_rate": 0.000438, |
| "loss": 1.5017, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.4110429447852761, |
| "grad_norm": 0.7919584512710571, |
| "learning_rate": 0.000458, |
| "loss": 1.5671, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.4723926380368098, |
| "grad_norm": 1.0223811864852905, |
| "learning_rate": 0.00047799999999999996, |
| "loss": 1.5162, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.5337423312883436, |
| "grad_norm": 0.8784969449043274, |
| "learning_rate": 0.000498, |
| "loss": 1.5689, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.5950920245398774, |
| "grad_norm": 0.8144865036010742, |
| "learning_rate": 0.000518, |
| "loss": 1.6243, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.656441717791411, |
| "grad_norm": 0.846225380897522, |
| "learning_rate": 0.0005380000000000001, |
| "loss": 1.5596, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.7177914110429446, |
| "grad_norm": 0.9448590278625488, |
| "learning_rate": 0.000558, |
| "loss": 1.399, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.7791411042944785, |
| "grad_norm": 0.9131848812103271, |
| "learning_rate": 0.000578, |
| "loss": 1.6336, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.8404907975460123, |
| "grad_norm": 0.8644697070121765, |
| "learning_rate": 0.000598, |
| "loss": 1.6112, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.9018404907975461, |
| "grad_norm": 0.964495062828064, |
| "learning_rate": 0.0006180000000000001, |
| "loss": 1.5901, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.9631901840490797, |
| "grad_norm": 0.8454228043556213, |
| "learning_rate": 0.000638, |
| "loss": 1.609, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_gen_len": 187.0146, |
| "eval_loss": 1.5841516256332397, |
| "eval_rouge1": 0.6218, |
| "eval_rouge2": 0.2773, |
| "eval_rougeL": 0.4814, |
| "eval_runtime": 30.1539, |
| "eval_samples_per_second": 9.087, |
| "eval_steps_per_second": 1.161, |
| "step": 326 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 815, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 2, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1780369116364800.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|