| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.875912408759124, |
| "eval_steps": 1000, |
| "global_step": 51, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.058394160583941604, |
| "grad_norm": 4.656679619700339, |
| "learning_rate": 0.0, |
| "loss": 0.4427, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.11678832116788321, |
| "grad_norm": 4.860123771865997, |
| "learning_rate": 8.333333333333334e-06, |
| "loss": 0.4479, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.17518248175182483, |
| "grad_norm": 2.242956104917747, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 0.4042, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.23357664233576642, |
| "grad_norm": 1.6608015519980013, |
| "learning_rate": 2.5e-05, |
| "loss": 0.3715, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.291970802919708, |
| "grad_norm": 2.824986830456239, |
| "learning_rate": 3.3333333333333335e-05, |
| "loss": 0.3487, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.35036496350364965, |
| "grad_norm": 2.360090022024097, |
| "learning_rate": 4.166666666666667e-05, |
| "loss": 0.3541, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.40875912408759124, |
| "grad_norm": 1.862994452379866, |
| "learning_rate": 5e-05, |
| "loss": 0.3371, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.46715328467153283, |
| "grad_norm": 1.0596405353524974, |
| "learning_rate": 4.993910125649561e-05, |
| "loss": 0.328, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.5255474452554745, |
| "grad_norm": 1.1579746499672514, |
| "learning_rate": 4.975670171853926e-05, |
| "loss": 0.3027, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.583941605839416, |
| "grad_norm": 0.7003699851250199, |
| "learning_rate": 4.9453690018345144e-05, |
| "loss": 0.2944, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.6423357664233577, |
| "grad_norm": 0.6949287751145983, |
| "learning_rate": 4.9031542398457974e-05, |
| "loss": 0.2966, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.7007299270072993, |
| "grad_norm": 0.5584013509075192, |
| "learning_rate": 4.849231551964771e-05, |
| "loss": 0.29, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.7591240875912408, |
| "grad_norm": 0.4653652334700578, |
| "learning_rate": 4.783863644106502e-05, |
| "loss": 0.2852, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.8175182481751825, |
| "grad_norm": 0.4915332959658402, |
| "learning_rate": 4.707368982147318e-05, |
| "loss": 0.2793, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.8759124087591241, |
| "grad_norm": 0.38234504853027035, |
| "learning_rate": 4.620120240391065e-05, |
| "loss": 0.2744, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.9343065693430657, |
| "grad_norm": 0.4184062685451573, |
| "learning_rate": 4.522542485937369e-05, |
| "loss": 0.2768, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.9927007299270073, |
| "grad_norm": 0.3704530279967013, |
| "learning_rate": 4.415111107797445e-05, |
| "loss": 0.2731, |
| "step": 17 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.3704530279967013, |
| "learning_rate": 4.2983495008466276e-05, |
| "loss": 0.268, |
| "step": 18 |
| }, |
| { |
| "epoch": 1.0583941605839415, |
| "grad_norm": 0.7313410375782172, |
| "learning_rate": 4.172826515897146e-05, |
| "loss": 0.2522, |
| "step": 19 |
| }, |
| { |
| "epoch": 1.1167883211678833, |
| "grad_norm": 0.42450006771608867, |
| "learning_rate": 4.039153688314145e-05, |
| "loss": 0.2535, |
| "step": 20 |
| }, |
| { |
| "epoch": 1.1751824817518248, |
| "grad_norm": 0.35572497872199876, |
| "learning_rate": 3.897982258676867e-05, |
| "loss": 0.2468, |
| "step": 21 |
| }, |
| { |
| "epoch": 1.2335766423357664, |
| "grad_norm": 0.3840433540509707, |
| "learning_rate": 3.7500000000000003e-05, |
| "loss": 0.2487, |
| "step": 22 |
| }, |
| { |
| "epoch": 1.2919708029197081, |
| "grad_norm": 0.3225052917728175, |
| "learning_rate": 3.5959278669726935e-05, |
| "loss": 0.2476, |
| "step": 23 |
| }, |
| { |
| "epoch": 1.3503649635036497, |
| "grad_norm": 0.3308473331591701, |
| "learning_rate": 3.436516483539781e-05, |
| "loss": 0.2437, |
| "step": 24 |
| }, |
| { |
| "epoch": 1.4087591240875912, |
| "grad_norm": 0.2817912935418844, |
| "learning_rate": 3.272542485937369e-05, |
| "loss": 0.2401, |
| "step": 25 |
| }, |
| { |
| "epoch": 1.4671532846715327, |
| "grad_norm": 0.3043640344168492, |
| "learning_rate": 3.104804738999169e-05, |
| "loss": 0.2443, |
| "step": 26 |
| }, |
| { |
| "epoch": 1.5255474452554745, |
| "grad_norm": 0.2334772712760174, |
| "learning_rate": 2.9341204441673266e-05, |
| "loss": 0.2428, |
| "step": 27 |
| }, |
| { |
| "epoch": 1.583941605839416, |
| "grad_norm": 0.252898158010675, |
| "learning_rate": 2.761321158169134e-05, |
| "loss": 0.2393, |
| "step": 28 |
| }, |
| { |
| "epoch": 1.6423357664233578, |
| "grad_norm": 0.19140920695753572, |
| "learning_rate": 2.587248741756253e-05, |
| "loss": 0.2341, |
| "step": 29 |
| }, |
| { |
| "epoch": 1.7007299270072993, |
| "grad_norm": 0.20503408790448174, |
| "learning_rate": 2.4127512582437485e-05, |
| "loss": 0.2364, |
| "step": 30 |
| }, |
| { |
| "epoch": 1.7591240875912408, |
| "grad_norm": 0.16892330641174735, |
| "learning_rate": 2.238678841830867e-05, |
| "loss": 0.2317, |
| "step": 31 |
| }, |
| { |
| "epoch": 1.8175182481751824, |
| "grad_norm": 0.18596878772651007, |
| "learning_rate": 2.0658795558326743e-05, |
| "loss": 0.2338, |
| "step": 32 |
| }, |
| { |
| "epoch": 1.8759124087591241, |
| "grad_norm": 0.19674830584353212, |
| "learning_rate": 1.895195261000831e-05, |
| "loss": 0.2385, |
| "step": 33 |
| }, |
| { |
| "epoch": 1.9343065693430657, |
| "grad_norm": 0.13767693529332986, |
| "learning_rate": 1.7274575140626318e-05, |
| "loss": 0.2254, |
| "step": 34 |
| }, |
| { |
| "epoch": 1.9927007299270074, |
| "grad_norm": 0.16469118823509277, |
| "learning_rate": 1.56348351646022e-05, |
| "loss": 0.2341, |
| "step": 35 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.16469118823509277, |
| "learning_rate": 1.4040721330273062e-05, |
| "loss": 0.2609, |
| "step": 36 |
| }, |
| { |
| "epoch": 2.0583941605839415, |
| "grad_norm": 0.5065177528025752, |
| "learning_rate": 1.2500000000000006e-05, |
| "loss": 0.2222, |
| "step": 37 |
| }, |
| { |
| "epoch": 2.116788321167883, |
| "grad_norm": 0.14060766874048272, |
| "learning_rate": 1.1020177413231334e-05, |
| "loss": 0.2206, |
| "step": 38 |
| }, |
| { |
| "epoch": 2.1751824817518246, |
| "grad_norm": 0.1675154659569595, |
| "learning_rate": 9.608463116858542e-06, |
| "loss": 0.2257, |
| "step": 39 |
| }, |
| { |
| "epoch": 2.2335766423357666, |
| "grad_norm": 0.16579788203125537, |
| "learning_rate": 8.271734841028553e-06, |
| "loss": 0.2211, |
| "step": 40 |
| }, |
| { |
| "epoch": 2.291970802919708, |
| "grad_norm": 0.14053569348297154, |
| "learning_rate": 7.016504991533726e-06, |
| "loss": 0.2185, |
| "step": 41 |
| }, |
| { |
| "epoch": 2.3503649635036497, |
| "grad_norm": 0.1314158148612366, |
| "learning_rate": 5.848888922025553e-06, |
| "loss": 0.221, |
| "step": 42 |
| }, |
| { |
| "epoch": 2.408759124087591, |
| "grad_norm": 0.14777050841668893, |
| "learning_rate": 4.7745751406263165e-06, |
| "loss": 0.2219, |
| "step": 43 |
| }, |
| { |
| "epoch": 2.4671532846715327, |
| "grad_norm": 0.12484750949794649, |
| "learning_rate": 3.798797596089351e-06, |
| "loss": 0.2159, |
| "step": 44 |
| }, |
| { |
| "epoch": 2.5255474452554747, |
| "grad_norm": 0.11913135271242134, |
| "learning_rate": 2.9263101785268254e-06, |
| "loss": 0.2188, |
| "step": 45 |
| }, |
| { |
| "epoch": 2.5839416058394162, |
| "grad_norm": 0.11303754361927708, |
| "learning_rate": 2.1613635589349756e-06, |
| "loss": 0.222, |
| "step": 46 |
| }, |
| { |
| "epoch": 2.6423357664233578, |
| "grad_norm": 0.1181946037992105, |
| "learning_rate": 1.5076844803522922e-06, |
| "loss": 0.2218, |
| "step": 47 |
| }, |
| { |
| "epoch": 2.7007299270072993, |
| "grad_norm": 0.12180986717544033, |
| "learning_rate": 9.684576015420278e-07, |
| "loss": 0.2234, |
| "step": 48 |
| }, |
| { |
| "epoch": 2.759124087591241, |
| "grad_norm": 0.11708758326837156, |
| "learning_rate": 5.463099816548579e-07, |
| "loss": 0.2155, |
| "step": 49 |
| }, |
| { |
| "epoch": 2.8175182481751824, |
| "grad_norm": 0.11769771487209676, |
| "learning_rate": 2.4329828146074095e-07, |
| "loss": 0.2088, |
| "step": 50 |
| }, |
| { |
| "epoch": 2.875912408759124, |
| "grad_norm": 0.11487124367774278, |
| "learning_rate": 6.089874350439506e-08, |
| "loss": 0.2137, |
| "step": 51 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 51, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 20, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 121368125374464.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|