| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.6843455945252352, |
| "eval_steps": 500, |
| "global_step": 2000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01710863986313088, |
| "grad_norm": 0.5642565488815308, |
| "learning_rate": 9.8e-05, |
| "loss": 4.3835, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.03421727972626176, |
| "grad_norm": 0.5467566251754761, |
| "learning_rate": 0.00019800000000000002, |
| "loss": 3.749, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.05132591958939264, |
| "grad_norm": 0.6452801823616028, |
| "learning_rate": 0.00019985136108184378, |
| "loss": 3.6567, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.06843455945252352, |
| "grad_norm": 0.5236774682998657, |
| "learning_rate": 0.00019939371189454347, |
| "loss": 3.6156, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.0855431993156544, |
| "grad_norm": 0.5809645652770996, |
| "learning_rate": 0.00019862840708762515, |
| "loss": 3.5816, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.10265183917878529, |
| "grad_norm": 0.5521299839019775, |
| "learning_rate": 0.00019755781552648373, |
| "loss": 3.5869, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.11976047904191617, |
| "grad_norm": 0.6748865246772766, |
| "learning_rate": 0.0001961852510375556, |
| "loss": 3.5605, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.13686911890504705, |
| "grad_norm": 0.5228627920150757, |
| "learning_rate": 0.00019451496215095671, |
| "loss": 3.5609, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.15397775876817793, |
| "grad_norm": 0.6597743034362793, |
| "learning_rate": 0.0001925521189499101, |
| "loss": 3.5223, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.1710863986313088, |
| "grad_norm": 0.6093740463256836, |
| "learning_rate": 0.00019030279706766984, |
| "loss": 3.4853, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.1881950384944397, |
| "grad_norm": 0.602202832698822, |
| "learning_rate": 0.00018777395888147495, |
| "loss": 3.5089, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.20530367835757057, |
| "grad_norm": 0.6418355703353882, |
| "learning_rate": 0.00018497343196174478, |
| "loss": 3.5126, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.22241231822070145, |
| "grad_norm": 0.6478594541549683, |
| "learning_rate": 0.0001819098848432218, |
| "loss": 3.506, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.23952095808383234, |
| "grad_norm": 0.5717478394508362, |
| "learning_rate": 0.00017859280019305883, |
| "loss": 3.4606, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.2566295979469632, |
| "grad_norm": 0.7009835839271545, |
| "learning_rate": 0.00017503244545890345, |
| "loss": 3.4376, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.2737382378100941, |
| "grad_norm": 0.6202488541603088, |
| "learning_rate": 0.00017123984108783336, |
| "loss": 3.4626, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.290846877673225, |
| "grad_norm": 0.6825055480003357, |
| "learning_rate": 0.0001672267264145158, |
| "loss": 3.4384, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.30795551753635586, |
| "grad_norm": 0.6829890608787537, |
| "learning_rate": 0.00016300552332417753, |
| "loss": 3.4205, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.32506415739948674, |
| "grad_norm": 0.6822900176048279, |
| "learning_rate": 0.00015858929780286074, |
| "loss": 3.4484, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.3421727972626176, |
| "grad_norm": 0.6704617142677307, |
| "learning_rate": 0.00015399171949397882, |
| "loss": 3.3913, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.3592814371257485, |
| "grad_norm": 0.6927918791770935, |
| "learning_rate": 0.00014922701938635793, |
| "loss": 3.3897, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.3763900769888794, |
| "grad_norm": 0.7238638401031494, |
| "learning_rate": 0.0001443099457647332, |
| "loss": 3.3743, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.39349871685201027, |
| "grad_norm": 0.7207512855529785, |
| "learning_rate": 0.00013925571855904722, |
| "loss": 3.3237, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.41060735671514115, |
| "grad_norm": 0.7312901616096497, |
| "learning_rate": 0.0001340799822338543, |
| "loss": 3.3378, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.42771599657827203, |
| "grad_norm": 0.761360228061676, |
| "learning_rate": 0.00012879875736365314, |
| "loss": 3.3589, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.4448246364414029, |
| "grad_norm": 0.7985053658485413, |
| "learning_rate": 0.0001234283910440377, |
| "loss": 3.3407, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.4619332763045338, |
| "grad_norm": 0.8397980332374573, |
| "learning_rate": 0.00011798550629216014, |
| "loss": 3.3116, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.47904191616766467, |
| "grad_norm": 0.8010730147361755, |
| "learning_rate": 0.00011248695059312721, |
| "loss": 3.2978, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.49615055603079555, |
| "grad_norm": 0.9002817273139954, |
| "learning_rate": 0.000106949743751596, |
| "loss": 3.2647, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.5132591958939264, |
| "grad_norm": 0.9080646634101868, |
| "learning_rate": 0.00010139102520998512, |
| "loss": 3.2929, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5303678357570573, |
| "grad_norm": 0.9352893829345703, |
| "learning_rate": 9.582800099636817e-05, |
| "loss": 3.2759, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.5474764756201882, |
| "grad_norm": 1.0353022813796997, |
| "learning_rate": 9.027789046626338e-05, |
| "loss": 3.2525, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.564585115483319, |
| "grad_norm": 0.9121481776237488, |
| "learning_rate": 8.475787300317043e-05, |
| "loss": 3.2432, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.58169375534645, |
| "grad_norm": 0.9885613918304443, |
| "learning_rate": 7.928503484283338e-05, |
| "loss": 3.2033, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.5988023952095808, |
| "grad_norm": 0.6203758120536804, |
| "learning_rate": 7.387631618582624e-05, |
| "loss": 3.1833, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.6159110350727117, |
| "grad_norm": 1.0348588228225708, |
| "learning_rate": 6.854845876216432e-05, |
| "loss": 3.1845, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.6330196749358425, |
| "grad_norm": 1.0622155666351318, |
| "learning_rate": 6.331795401024621e-05, |
| "loss": 3.2105, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.6501283147989735, |
| "grad_norm": 0.9984121322631836, |
| "learning_rate": 5.8200992030528875e-05, |
| "loss": 3.1651, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.6672369546621043, |
| "grad_norm": 1.0703413486480713, |
| "learning_rate": 5.3213411471941155e-05, |
| "loss": 3.1492, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.6843455945252352, |
| "grad_norm": 1.0491068363189697, |
| "learning_rate": 4.8370650506153093e-05, |
| "loss": 3.1829, |
| "step": 2000 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 2923, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.8590532124459008e+17, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|