| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9310344827586206, |
| "eval_steps": 500, |
| "global_step": 14, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.13793103448275862, |
| "grad_norm": 0.4272040277310641, |
| "learning_rate": 9.874639560909118e-06, |
| "loss": 0.1155, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.27586206896551724, |
| "grad_norm": 0.33370397348921355, |
| "learning_rate": 9.504844339512096e-06, |
| "loss": 0.1122, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.41379310344827586, |
| "grad_norm": 0.2722318279876112, |
| "learning_rate": 8.90915741234015e-06, |
| "loss": 0.1111, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.5517241379310345, |
| "grad_norm": 0.2450831855602725, |
| "learning_rate": 8.117449009293668e-06, |
| "loss": 0.1049, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.6896551724137931, |
| "grad_norm": 0.19900057099760882, |
| "learning_rate": 7.169418695587791e-06, |
| "loss": 0.0975, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.8275862068965517, |
| "grad_norm": 0.18810176402486756, |
| "learning_rate": 6.112604669781572e-06, |
| "loss": 0.0939, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.9655172413793104, |
| "grad_norm": 0.17410072415985545, |
| "learning_rate": 5e-06, |
| "loss": 0.0941, |
| "step": 7 |
| }, |
| { |
| "epoch": 1.103448275862069, |
| "grad_norm": 0.2902949965960051, |
| "learning_rate": 3.887395330218429e-06, |
| "loss": 0.1563, |
| "step": 8 |
| }, |
| { |
| "epoch": 1.2413793103448276, |
| "grad_norm": 0.1157793373052799, |
| "learning_rate": 2.83058130441221e-06, |
| "loss": 0.0807, |
| "step": 9 |
| }, |
| { |
| "epoch": 1.3793103448275863, |
| "grad_norm": 0.1269089918618396, |
| "learning_rate": 1.8825509907063328e-06, |
| "loss": 0.0818, |
| "step": 10 |
| }, |
| { |
| "epoch": 1.5172413793103448, |
| "grad_norm": 0.13718545049887143, |
| "learning_rate": 1.0908425876598512e-06, |
| "loss": 0.0826, |
| "step": 11 |
| }, |
| { |
| "epoch": 1.6551724137931034, |
| "grad_norm": 0.13360227237878022, |
| "learning_rate": 4.951556604879049e-07, |
| "loss": 0.0805, |
| "step": 12 |
| }, |
| { |
| "epoch": 1.793103448275862, |
| "grad_norm": 0.1373240247538753, |
| "learning_rate": 1.253604390908819e-07, |
| "loss": 0.0843, |
| "step": 13 |
| }, |
| { |
| "epoch": 1.9310344827586206, |
| "grad_norm": 0.13563216403584644, |
| "learning_rate": 0.0, |
| "loss": 0.0789, |
| "step": 14 |
| }, |
| { |
| "epoch": 1.9310344827586206, |
| "step": 14, |
| "total_flos": 3917060505600.0, |
| "train_loss": 0.09816603043249675, |
| "train_runtime": 131.6086, |
| "train_samples_per_second": 8.753, |
| "train_steps_per_second": 0.106 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 14, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 5000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3917060505600.0, |
| "train_batch_size": 5, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|