| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9840546697038723, |
| "eval_steps": 500, |
| "global_step": 327, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.09111617312072894, |
| "grad_norm": 1.0032079586895355, |
| "learning_rate": 5e-06, |
| "loss": 0.7518, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.18223234624145787, |
| "grad_norm": 0.8916375795993198, |
| "learning_rate": 5e-06, |
| "loss": 0.6972, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.2733485193621868, |
| "grad_norm": 0.859373901904076, |
| "learning_rate": 5e-06, |
| "loss": 0.6728, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.36446469248291574, |
| "grad_norm": 0.7508318921954407, |
| "learning_rate": 5e-06, |
| "loss": 0.668, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.45558086560364464, |
| "grad_norm": 0.7932399981252415, |
| "learning_rate": 5e-06, |
| "loss": 0.658, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.5466970387243736, |
| "grad_norm": 0.6171331932517196, |
| "learning_rate": 5e-06, |
| "loss": 0.6521, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.6378132118451025, |
| "grad_norm": 0.4020639858989144, |
| "learning_rate": 5e-06, |
| "loss": 0.6491, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.7289293849658315, |
| "grad_norm": 0.2399377151309131, |
| "learning_rate": 5e-06, |
| "loss": 0.6404, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.8200455580865603, |
| "grad_norm": 0.20345732615286874, |
| "learning_rate": 5e-06, |
| "loss": 0.642, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.9111617312072893, |
| "grad_norm": 0.2021457864159628, |
| "learning_rate": 5e-06, |
| "loss": 0.6435, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.9931662870159453, |
| "eval_loss": 0.6352784037590027, |
| "eval_runtime": 113.0634, |
| "eval_samples_per_second": 104.57, |
| "eval_steps_per_second": 0.416, |
| "step": 109 |
| }, |
| { |
| "epoch": 1.0045558086560364, |
| "grad_norm": 0.286103565654699, |
| "learning_rate": 5e-06, |
| "loss": 0.6685, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.0956719817767653, |
| "grad_norm": 0.18833303865470652, |
| "learning_rate": 5e-06, |
| "loss": 0.6237, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.1867881548974943, |
| "grad_norm": 0.16783561580473474, |
| "learning_rate": 5e-06, |
| "loss": 0.6246, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.2779043280182232, |
| "grad_norm": 0.16206257154887665, |
| "learning_rate": 5e-06, |
| "loss": 0.6241, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.3690205011389522, |
| "grad_norm": 0.1947988949018225, |
| "learning_rate": 5e-06, |
| "loss": 0.6193, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.460136674259681, |
| "grad_norm": 0.1738333917201715, |
| "learning_rate": 5e-06, |
| "loss": 0.6227, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.5512528473804101, |
| "grad_norm": 0.14885151419707351, |
| "learning_rate": 5e-06, |
| "loss": 0.6173, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.642369020501139, |
| "grad_norm": 0.20398057021638838, |
| "learning_rate": 5e-06, |
| "loss": 0.6218, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.7334851936218678, |
| "grad_norm": 0.16009763606133326, |
| "learning_rate": 5e-06, |
| "loss": 0.6158, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.8246013667425967, |
| "grad_norm": 0.21016461721812518, |
| "learning_rate": 5e-06, |
| "loss": 0.615, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.9157175398633257, |
| "grad_norm": 0.1653615384724387, |
| "learning_rate": 5e-06, |
| "loss": 0.6146, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.997722095671982, |
| "eval_loss": 0.6264419555664062, |
| "eval_runtime": 112.952, |
| "eval_samples_per_second": 104.673, |
| "eval_steps_per_second": 0.416, |
| "step": 219 |
| }, |
| { |
| "epoch": 2.009111617312073, |
| "grad_norm": 0.22236652778604762, |
| "learning_rate": 5e-06, |
| "loss": 0.6452, |
| "step": 220 |
| }, |
| { |
| "epoch": 2.1002277904328017, |
| "grad_norm": 0.17479703968490523, |
| "learning_rate": 5e-06, |
| "loss": 0.6004, |
| "step": 230 |
| }, |
| { |
| "epoch": 2.1913439635535306, |
| "grad_norm": 0.16834890964824797, |
| "learning_rate": 5e-06, |
| "loss": 0.6008, |
| "step": 240 |
| }, |
| { |
| "epoch": 2.28246013667426, |
| "grad_norm": 0.17448916375775142, |
| "learning_rate": 5e-06, |
| "loss": 0.5979, |
| "step": 250 |
| }, |
| { |
| "epoch": 2.3735763097949887, |
| "grad_norm": 0.1551086822119296, |
| "learning_rate": 5e-06, |
| "loss": 0.5996, |
| "step": 260 |
| }, |
| { |
| "epoch": 2.4646924829157175, |
| "grad_norm": 0.17363176109388573, |
| "learning_rate": 5e-06, |
| "loss": 0.6067, |
| "step": 270 |
| }, |
| { |
| "epoch": 2.5558086560364464, |
| "grad_norm": 0.16838126885978782, |
| "learning_rate": 5e-06, |
| "loss": 0.6034, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.646924829157175, |
| "grad_norm": 0.17833393348363288, |
| "learning_rate": 5e-06, |
| "loss": 0.5997, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.7380410022779045, |
| "grad_norm": 0.18346978760704183, |
| "learning_rate": 5e-06, |
| "loss": 0.6011, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.8291571753986333, |
| "grad_norm": 0.17686880454079934, |
| "learning_rate": 5e-06, |
| "loss": 0.6015, |
| "step": 310 |
| }, |
| { |
| "epoch": 2.920273348519362, |
| "grad_norm": 0.17062928967728816, |
| "learning_rate": 5e-06, |
| "loss": 0.6015, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.9840546697038723, |
| "eval_loss": 0.6222477555274963, |
| "eval_runtime": 112.5243, |
| "eval_samples_per_second": 105.071, |
| "eval_steps_per_second": 0.418, |
| "step": 327 |
| }, |
| { |
| "epoch": 2.9840546697038723, |
| "step": 327, |
| "total_flos": 2741850750320640.0, |
| "train_loss": 0.630699354574221, |
| "train_runtime": 17986.3583, |
| "train_samples_per_second": 37.465, |
| "train_steps_per_second": 0.018 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 327, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2741850750320640.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|