| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.1008142690965492, |
| "eval_steps": 500, |
| "global_step": 2000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.07754943776657619, |
| "grad_norm": 0.41543584930406274, |
| "learning_rate": 1.984472049689441e-05, |
| "loss": 0.3571, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.15509887553315238, |
| "grad_norm": 0.2360007761618504, |
| "learning_rate": 1.9689440993788823e-05, |
| "loss": 0.1481, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.23264831329972857, |
| "grad_norm": 0.2459469865789585, |
| "learning_rate": 1.9534161490683232e-05, |
| "loss": 0.1413, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.31019775106630476, |
| "grad_norm": 0.1836701486527724, |
| "learning_rate": 1.937888198757764e-05, |
| "loss": 0.139, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.38774718883288095, |
| "grad_norm": 0.2053715213473303, |
| "learning_rate": 1.922360248447205e-05, |
| "loss": 0.1359, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.46529662659945714, |
| "grad_norm": 0.20210517044823975, |
| "learning_rate": 1.906832298136646e-05, |
| "loss": 0.1343, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.5428460643660333, |
| "grad_norm": 0.17133333163173686, |
| "learning_rate": 1.891304347826087e-05, |
| "loss": 0.1336, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.6203955021326095, |
| "grad_norm": 0.16513031282114732, |
| "learning_rate": 1.875776397515528e-05, |
| "loss": 0.1323, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.6979449398991857, |
| "grad_norm": 0.16414324471781971, |
| "learning_rate": 1.8602484472049693e-05, |
| "loss": 0.1318, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.7754943776657619, |
| "grad_norm": 0.16672201846671922, |
| "learning_rate": 1.84472049689441e-05, |
| "loss": 0.1307, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.8530438154323381, |
| "grad_norm": 0.1588831815209266, |
| "learning_rate": 1.829192546583851e-05, |
| "loss": 0.1301, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.9305932531989143, |
| "grad_norm": 0.17229438485787515, |
| "learning_rate": 1.8136645962732923e-05, |
| "loss": 0.13, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.0077549437766575, |
| "grad_norm": 0.1626649495069495, |
| "learning_rate": 1.798136645962733e-05, |
| "loss": 0.1284, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.0853043815432337, |
| "grad_norm": 0.16302373242598406, |
| "learning_rate": 1.782608695652174e-05, |
| "loss": 0.1256, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.16285381930981, |
| "grad_norm": 0.15938032051749196, |
| "learning_rate": 1.767080745341615e-05, |
| "loss": 0.1252, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.240403257076386, |
| "grad_norm": 0.19138770209482472, |
| "learning_rate": 1.751552795031056e-05, |
| "loss": 0.1244, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.3179526948429623, |
| "grad_norm": 0.15984339587089894, |
| "learning_rate": 1.736024844720497e-05, |
| "loss": 0.1253, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.3955021326095385, |
| "grad_norm": 0.1502502706654219, |
| "learning_rate": 1.720496894409938e-05, |
| "loss": 0.1248, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.4730515703761147, |
| "grad_norm": 0.13661135477508957, |
| "learning_rate": 1.704968944099379e-05, |
| "loss": 0.125, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.5506010081426909, |
| "grad_norm": 0.24839381800982097, |
| "learning_rate": 1.68944099378882e-05, |
| "loss": 0.1253, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.628150445909267, |
| "grad_norm": 0.12815184233515442, |
| "learning_rate": 1.673913043478261e-05, |
| "loss": 0.1243, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.7056998836758432, |
| "grad_norm": 0.13153520379094585, |
| "learning_rate": 1.658385093167702e-05, |
| "loss": 0.1237, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.7832493214424194, |
| "grad_norm": 0.1189084339669079, |
| "learning_rate": 1.642857142857143e-05, |
| "loss": 0.1245, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.8607987592089956, |
| "grad_norm": 0.15491708781159905, |
| "learning_rate": 1.627329192546584e-05, |
| "loss": 0.1235, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.9383481969755718, |
| "grad_norm": 0.12739351593431672, |
| "learning_rate": 1.611801242236025e-05, |
| "loss": 0.1243, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.015509887553315, |
| "grad_norm": 0.12465194041174449, |
| "learning_rate": 1.596273291925466e-05, |
| "loss": 0.1219, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.0930593253198913, |
| "grad_norm": 0.1404295274618665, |
| "learning_rate": 1.580745341614907e-05, |
| "loss": 0.1186, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.1706087630864674, |
| "grad_norm": 0.1359342551816161, |
| "learning_rate": 1.565217391304348e-05, |
| "loss": 0.1179, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.2481582008530436, |
| "grad_norm": 0.15332233562241915, |
| "learning_rate": 1.549689440993789e-05, |
| "loss": 0.1185, |
| "step": 1450 |
| }, |
| { |
| "epoch": 2.32570763861962, |
| "grad_norm": 0.11859966428735469, |
| "learning_rate": 1.5341614906832298e-05, |
| "loss": 0.1185, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.403257076386196, |
| "grad_norm": 0.1493931915889296, |
| "learning_rate": 1.5186335403726709e-05, |
| "loss": 0.1186, |
| "step": 1550 |
| }, |
| { |
| "epoch": 2.480806514152772, |
| "grad_norm": 0.1319324405407719, |
| "learning_rate": 1.5031055900621118e-05, |
| "loss": 0.1189, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.5583559519193484, |
| "grad_norm": 0.12024679968154829, |
| "learning_rate": 1.4875776397515529e-05, |
| "loss": 0.1187, |
| "step": 1650 |
| }, |
| { |
| "epoch": 2.6359053896859246, |
| "grad_norm": 0.11739796193835754, |
| "learning_rate": 1.472049689440994e-05, |
| "loss": 0.1187, |
| "step": 1700 |
| }, |
| { |
| "epoch": 2.7134548274525008, |
| "grad_norm": 0.13178912063786213, |
| "learning_rate": 1.456521739130435e-05, |
| "loss": 0.1187, |
| "step": 1750 |
| }, |
| { |
| "epoch": 2.791004265219077, |
| "grad_norm": 0.11853885559187641, |
| "learning_rate": 1.4409937888198759e-05, |
| "loss": 0.119, |
| "step": 1800 |
| }, |
| { |
| "epoch": 2.868553702985653, |
| "grad_norm": 0.12827528414635014, |
| "learning_rate": 1.425465838509317e-05, |
| "loss": 0.1187, |
| "step": 1850 |
| }, |
| { |
| "epoch": 2.9461031407522293, |
| "grad_norm": 0.13153122815676796, |
| "learning_rate": 1.409937888198758e-05, |
| "loss": 0.1188, |
| "step": 1900 |
| }, |
| { |
| "epoch": 3.023264831329973, |
| "grad_norm": 0.15315655570709347, |
| "learning_rate": 1.3944099378881988e-05, |
| "loss": 0.1156, |
| "step": 1950 |
| }, |
| { |
| "epoch": 3.1008142690965492, |
| "grad_norm": 0.11997442542975086, |
| "learning_rate": 1.3788819875776398e-05, |
| "loss": 0.1105, |
| "step": 2000 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 6440, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.0334530111995904e+16, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|