| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 15.8, |
| "eval_steps": 1000, |
| "global_step": 47, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.4, |
| "grad_norm": 3.62260103225708, |
| "learning_rate": 0.0, |
| "loss": 3.8984, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 4.146153450012207, |
| "learning_rate": 8.613531161467861e-05, |
| "loss": 4.0859, |
| "step": 2 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 4.146153450012207, |
| "learning_rate": 8.613531161467861e-05, |
| "loss": 2.2656, |
| "step": 3 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 3.7563395500183105, |
| "learning_rate": 0.00013652123889719707, |
| "loss": 3.9453, |
| "step": 4 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 3.5408530235290527, |
| "learning_rate": 0.00017227062322935723, |
| "loss": 4.1719, |
| "step": 5 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 3.279040575027466, |
| "learning_rate": 0.0002, |
| "loss": 1.8281, |
| "step": 6 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 2.5531609058380127, |
| "learning_rate": 0.0002, |
| "loss": 3.4062, |
| "step": 7 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 2.6214144229888916, |
| "learning_rate": 0.00019523809523809525, |
| "loss": 3.4219, |
| "step": 8 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 2.6214144229888916, |
| "learning_rate": 0.00019523809523809525, |
| "loss": 1.3828, |
| "step": 9 |
| }, |
| { |
| "epoch": 3.4, |
| "grad_norm": 3.3282411098480225, |
| "learning_rate": 0.00019047619047619048, |
| "loss": 3.25, |
| "step": 10 |
| }, |
| { |
| "epoch": 3.8, |
| "grad_norm": 4.45896577835083, |
| "learning_rate": 0.00018571428571428572, |
| "loss": 2.4922, |
| "step": 11 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 2.991671085357666, |
| "learning_rate": 0.00018095238095238095, |
| "loss": 1.1719, |
| "step": 12 |
| }, |
| { |
| "epoch": 4.4, |
| "grad_norm": 2.8518142700195312, |
| "learning_rate": 0.0001761904761904762, |
| "loss": 2.3516, |
| "step": 13 |
| }, |
| { |
| "epoch": 4.8, |
| "grad_norm": 3.275972843170166, |
| "learning_rate": 0.00017142857142857143, |
| "loss": 2.2422, |
| "step": 14 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 3.275972843170166, |
| "learning_rate": 0.00017142857142857143, |
| "loss": 0.9141, |
| "step": 15 |
| }, |
| { |
| "epoch": 5.4, |
| "grad_norm": 3.3755874633789062, |
| "learning_rate": 0.0001666666666666667, |
| "loss": 1.9297, |
| "step": 16 |
| }, |
| { |
| "epoch": 5.8, |
| "grad_norm": 3.0458221435546875, |
| "learning_rate": 0.00016190476190476192, |
| "loss": 1.6328, |
| "step": 17 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 3.3989930152893066, |
| "learning_rate": 0.00015714285714285716, |
| "loss": 0.8516, |
| "step": 18 |
| }, |
| { |
| "epoch": 6.4, |
| "grad_norm": 3.48042893409729, |
| "learning_rate": 0.00015238095238095237, |
| "loss": 1.4141, |
| "step": 19 |
| }, |
| { |
| "epoch": 6.8, |
| "grad_norm": 3.6217923164367676, |
| "learning_rate": 0.00014761904761904763, |
| "loss": 1.3867, |
| "step": 20 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 3.6217923164367676, |
| "learning_rate": 0.00014761904761904763, |
| "loss": 0.5625, |
| "step": 21 |
| }, |
| { |
| "epoch": 7.4, |
| "grad_norm": 3.5909311771392822, |
| "learning_rate": 0.00014285714285714287, |
| "loss": 0.9629, |
| "step": 22 |
| }, |
| { |
| "epoch": 7.8, |
| "grad_norm": 3.7631545066833496, |
| "learning_rate": 0.0001380952380952381, |
| "loss": 1.1055, |
| "step": 23 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 3.9868695735931396, |
| "learning_rate": 0.00013333333333333334, |
| "loss": 0.4961, |
| "step": 24 |
| }, |
| { |
| "epoch": 8.4, |
| "grad_norm": 4.413502216339111, |
| "learning_rate": 0.00012857142857142858, |
| "loss": 0.8281, |
| "step": 25 |
| }, |
| { |
| "epoch": 8.8, |
| "grad_norm": 3.732664108276367, |
| "learning_rate": 0.0001238095238095238, |
| "loss": 0.7305, |
| "step": 26 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 3.732664108276367, |
| "learning_rate": 0.0001238095238095238, |
| "loss": 0.3672, |
| "step": 27 |
| }, |
| { |
| "epoch": 9.4, |
| "grad_norm": 2.9861297607421875, |
| "learning_rate": 0.00011904761904761905, |
| "loss": 0.5273, |
| "step": 28 |
| }, |
| { |
| "epoch": 9.8, |
| "grad_norm": 3.490156888961792, |
| "learning_rate": 0.00011428571428571428, |
| "loss": 0.499, |
| "step": 29 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 4.10427188873291, |
| "learning_rate": 0.00010952380952380953, |
| "loss": 0.25, |
| "step": 30 |
| }, |
| { |
| "epoch": 10.4, |
| "grad_norm": 3.142503499984741, |
| "learning_rate": 0.00010476190476190477, |
| "loss": 0.4053, |
| "step": 31 |
| }, |
| { |
| "epoch": 10.8, |
| "grad_norm": 2.9335830211639404, |
| "learning_rate": 0.0001, |
| "loss": 0.3467, |
| "step": 32 |
| }, |
| { |
| "epoch": 11.0, |
| "grad_norm": 2.9335830211639404, |
| "learning_rate": 0.0001, |
| "loss": 0.1235, |
| "step": 33 |
| }, |
| { |
| "epoch": 11.4, |
| "grad_norm": 3.8848934173583984, |
| "learning_rate": 9.523809523809524e-05, |
| "loss": 0.2607, |
| "step": 34 |
| }, |
| { |
| "epoch": 11.8, |
| "grad_norm": 3.1604433059692383, |
| "learning_rate": 9.047619047619048e-05, |
| "loss": 0.187, |
| "step": 35 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 2.25864839553833, |
| "learning_rate": 8.571428571428571e-05, |
| "loss": 0.1621, |
| "step": 36 |
| }, |
| { |
| "epoch": 12.4, |
| "grad_norm": 2.059041976928711, |
| "learning_rate": 8.095238095238096e-05, |
| "loss": 0.1548, |
| "step": 37 |
| }, |
| { |
| "epoch": 12.8, |
| "grad_norm": 2.72867751121521, |
| "learning_rate": 7.619047619047618e-05, |
| "loss": 0.2002, |
| "step": 38 |
| }, |
| { |
| "epoch": 13.0, |
| "grad_norm": 2.72867751121521, |
| "learning_rate": 7.619047619047618e-05, |
| "loss": 0.0879, |
| "step": 39 |
| }, |
| { |
| "epoch": 13.4, |
| "grad_norm": 1.2181490659713745, |
| "learning_rate": 7.142857142857143e-05, |
| "loss": 0.1421, |
| "step": 40 |
| }, |
| { |
| "epoch": 13.8, |
| "grad_norm": 1.1512566804885864, |
| "learning_rate": 6.666666666666667e-05, |
| "loss": 0.0862, |
| "step": 41 |
| }, |
| { |
| "epoch": 14.0, |
| "grad_norm": 0.9324586391448975, |
| "learning_rate": 6.19047619047619e-05, |
| "loss": 0.0447, |
| "step": 42 |
| }, |
| { |
| "epoch": 14.4, |
| "grad_norm": 0.8047900795936584, |
| "learning_rate": 5.714285714285714e-05, |
| "loss": 0.0883, |
| "step": 43 |
| }, |
| { |
| "epoch": 14.8, |
| "grad_norm": 0.9313658475875854, |
| "learning_rate": 5.2380952380952384e-05, |
| "loss": 0.1011, |
| "step": 44 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 0.9313658475875854, |
| "learning_rate": 5.2380952380952384e-05, |
| "loss": 0.0596, |
| "step": 45 |
| }, |
| { |
| "epoch": 15.4, |
| "grad_norm": 0.9806156158447266, |
| "learning_rate": 4.761904761904762e-05, |
| "loss": 0.0974, |
| "step": 46 |
| }, |
| { |
| "epoch": 15.8, |
| "grad_norm": 1.0772619247436523, |
| "learning_rate": 4.2857142857142856e-05, |
| "loss": 0.1035, |
| "step": 47 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 47, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 16, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 835189824552960.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|