| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.0, |
| "eval_steps": 500, |
| "global_step": 980, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04094165813715456, |
| "grad_norm": 12.65462875366211, |
| "learning_rate": 9.000000000000001e-07, |
| "loss": 0.84, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.08188331627430911, |
| "grad_norm": 15.826253890991211, |
| "learning_rate": 1.9000000000000002e-06, |
| "loss": 0.9075, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.12282497441146366, |
| "grad_norm": 12.505751609802246, |
| "learning_rate": 2.9e-06, |
| "loss": 0.9015, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.16376663254861823, |
| "grad_norm": 15.053168296813965, |
| "learning_rate": 3.900000000000001e-06, |
| "loss": 0.8386, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.2047082906857728, |
| "grad_norm": 15.304803848266602, |
| "learning_rate": 4.9000000000000005e-06, |
| "loss": 0.8111, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.24564994882292732, |
| "grad_norm": 11.536665916442871, |
| "learning_rate": 5.9e-06, |
| "loss": 1.0011, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.2865916069600819, |
| "grad_norm": 15.474630355834961, |
| "learning_rate": 6.9e-06, |
| "loss": 0.9621, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.32753326509723646, |
| "grad_norm": 16.180992126464844, |
| "learning_rate": 7.9e-06, |
| "loss": 0.9414, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.368474923234391, |
| "grad_norm": 19.92721939086914, |
| "learning_rate": 8.900000000000001e-06, |
| "loss": 0.8467, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.4094165813715456, |
| "grad_norm": 13.27595043182373, |
| "learning_rate": 9.9e-06, |
| "loss": 0.8855, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4503582395087001, |
| "grad_norm": 17.830875396728516, |
| "learning_rate": 9.961702127659575e-06, |
| "loss": 0.8637, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.49129989764585463, |
| "grad_norm": 13.432883262634277, |
| "learning_rate": 9.919148936170213e-06, |
| "loss": 0.8807, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.5322415557830092, |
| "grad_norm": 19.632150650024414, |
| "learning_rate": 9.876595744680851e-06, |
| "loss": 0.7876, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.5731832139201638, |
| "grad_norm": 8.750787734985352, |
| "learning_rate": 9.834042553191491e-06, |
| "loss": 0.8812, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.6141248720573184, |
| "grad_norm": 11.828136444091797, |
| "learning_rate": 9.79148936170213e-06, |
| "loss": 0.7476, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.6550665301944729, |
| "grad_norm": 8.025004386901855, |
| "learning_rate": 9.748936170212768e-06, |
| "loss": 0.7658, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.6960081883316275, |
| "grad_norm": 17.197458267211914, |
| "learning_rate": 9.706382978723406e-06, |
| "loss": 0.8227, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.736949846468782, |
| "grad_norm": 18.768630981445312, |
| "learning_rate": 9.663829787234044e-06, |
| "loss": 0.8761, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.7778915046059366, |
| "grad_norm": 11.378251075744629, |
| "learning_rate": 9.621276595744682e-06, |
| "loss": 0.7956, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.8188331627430911, |
| "grad_norm": 12.888134956359863, |
| "learning_rate": 9.57872340425532e-06, |
| "loss": 0.7741, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.8597748208802457, |
| "grad_norm": 9.256698608398438, |
| "learning_rate": 9.536170212765959e-06, |
| "loss": 0.8193, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.9007164790174002, |
| "grad_norm": 9.790871620178223, |
| "learning_rate": 9.493617021276597e-06, |
| "loss": 0.8279, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.9416581371545547, |
| "grad_norm": 17.922643661499023, |
| "learning_rate": 9.451063829787235e-06, |
| "loss": 0.8167, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.9825997952917093, |
| "grad_norm": 15.21545696258545, |
| "learning_rate": 9.408510638297873e-06, |
| "loss": 0.678, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_general_loss": 0.7405520677566528, |
| "eval_general_runtime": 257.7451, |
| "eval_general_samples_per_second": 3.55, |
| "eval_general_steps_per_second": 0.888, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_code_loss": 0.8061306476593018, |
| "eval_code_runtime": 300.2792, |
| "eval_code_samples_per_second": 3.057, |
| "eval_code_steps_per_second": 0.766, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_stem_loss": 0.7527948021888733, |
| "eval_stem_runtime": 253.8295, |
| "eval_stem_samples_per_second": 3.601, |
| "eval_stem_steps_per_second": 0.902, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.0204708290685773, |
| "grad_norm": 9.183212280273438, |
| "learning_rate": 9.365957446808511e-06, |
| "loss": 0.6701, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.0614124872057318, |
| "grad_norm": 11.502631187438965, |
| "learning_rate": 9.32340425531915e-06, |
| "loss": 0.6915, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.1023541453428864, |
| "grad_norm": 14.637332916259766, |
| "learning_rate": 9.280851063829788e-06, |
| "loss": 0.7034, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.143295803480041, |
| "grad_norm": 22.042236328125, |
| "learning_rate": 9.238297872340426e-06, |
| "loss": 0.7613, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.1842374616171956, |
| "grad_norm": 10.717690467834473, |
| "learning_rate": 9.195744680851064e-06, |
| "loss": 0.7115, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.22517911975435, |
| "grad_norm": 12.595451354980469, |
| "learning_rate": 9.153191489361702e-06, |
| "loss": 0.6753, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.2661207778915047, |
| "grad_norm": 15.185698509216309, |
| "learning_rate": 9.11063829787234e-06, |
| "loss": 0.6543, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.3070624360286591, |
| "grad_norm": 8.96109676361084, |
| "learning_rate": 9.06808510638298e-06, |
| "loss": 0.7118, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.3480040941658138, |
| "grad_norm": 11.143041610717773, |
| "learning_rate": 9.025531914893619e-06, |
| "loss": 0.7155, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.3889457523029682, |
| "grad_norm": 13.331513404846191, |
| "learning_rate": 8.982978723404257e-06, |
| "loss": 0.5873, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.429887410440123, |
| "grad_norm": 7.453923225402832, |
| "learning_rate": 8.940425531914895e-06, |
| "loss": 0.6085, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.4708290685772774, |
| "grad_norm": 10.874267578125, |
| "learning_rate": 8.897872340425533e-06, |
| "loss": 0.7046, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.511770726714432, |
| "grad_norm": 18.965225219726562, |
| "learning_rate": 8.855319148936171e-06, |
| "loss": 0.7275, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.5527123848515865, |
| "grad_norm": 11.133731842041016, |
| "learning_rate": 8.81276595744681e-06, |
| "loss": 0.7185, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.593654042988741, |
| "grad_norm": 9.591411590576172, |
| "learning_rate": 8.770212765957448e-06, |
| "loss": 0.6325, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.6345957011258956, |
| "grad_norm": 9.676285743713379, |
| "learning_rate": 8.727659574468086e-06, |
| "loss": 0.5229, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.6755373592630503, |
| "grad_norm": 17.216745376586914, |
| "learning_rate": 8.685106382978724e-06, |
| "loss": 0.5627, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.7164790174002047, |
| "grad_norm": 12.413490295410156, |
| "learning_rate": 8.642553191489362e-06, |
| "loss": 0.6627, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.7574206755373591, |
| "grad_norm": 18.200937271118164, |
| "learning_rate": 8.6e-06, |
| "loss": 0.821, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.7983623336745138, |
| "grad_norm": 7.938803195953369, |
| "learning_rate": 8.557446808510639e-06, |
| "loss": 0.5181, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.8393039918116685, |
| "grad_norm": 16.005313873291016, |
| "learning_rate": 8.514893617021277e-06, |
| "loss": 0.5963, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.880245649948823, |
| "grad_norm": 7.592184066772461, |
| "learning_rate": 8.472340425531915e-06, |
| "loss": 0.5118, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.9211873080859774, |
| "grad_norm": 7.5901384353637695, |
| "learning_rate": 8.429787234042553e-06, |
| "loss": 0.6951, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.962128966223132, |
| "grad_norm": 15.962983131408691, |
| "learning_rate": 8.387234042553192e-06, |
| "loss": 0.5939, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 21.49174690246582, |
| "learning_rate": 8.34468085106383e-06, |
| "loss": 0.5632, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_general_loss": 0.6747614741325378, |
| "eval_general_runtime": 258.8315, |
| "eval_general_samples_per_second": 3.535, |
| "eval_general_steps_per_second": 0.885, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_code_loss": 0.7175714373588562, |
| "eval_code_runtime": 301.597, |
| "eval_code_samples_per_second": 3.044, |
| "eval_code_steps_per_second": 0.763, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_stem_loss": 0.6549679040908813, |
| "eval_stem_runtime": 254.6647, |
| "eval_stem_samples_per_second": 3.589, |
| "eval_stem_steps_per_second": 0.899, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.0409416581371547, |
| "grad_norm": 15.108848571777344, |
| "learning_rate": 8.30212765957447e-06, |
| "loss": 0.5936, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.0818833162743093, |
| "grad_norm": 5.768886566162109, |
| "learning_rate": 8.259574468085108e-06, |
| "loss": 0.567, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.1228249744114636, |
| "grad_norm": 7.63210916519165, |
| "learning_rate": 8.217021276595746e-06, |
| "loss": 0.4894, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.1637666325486182, |
| "grad_norm": 19.224069595336914, |
| "learning_rate": 8.174468085106384e-06, |
| "loss": 0.5285, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.204708290685773, |
| "grad_norm": 9.942646026611328, |
| "learning_rate": 8.131914893617023e-06, |
| "loss": 0.4813, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.245649948822927, |
| "grad_norm": 9.9766206741333, |
| "learning_rate": 8.08936170212766e-06, |
| "loss": 0.5319, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.286591606960082, |
| "grad_norm": 10.004892349243164, |
| "learning_rate": 8.046808510638299e-06, |
| "loss": 0.4969, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.3275332650972365, |
| "grad_norm": 10.710165977478027, |
| "learning_rate": 8.004255319148937e-06, |
| "loss": 0.5878, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.368474923234391, |
| "grad_norm": 12.612700462341309, |
| "learning_rate": 7.961702127659575e-06, |
| "loss": 0.479, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.409416581371546, |
| "grad_norm": 9.927840232849121, |
| "learning_rate": 7.919148936170214e-06, |
| "loss": 0.4759, |
| "step": 590 |
| }, |
| { |
| "epoch": 2.4503582395087, |
| "grad_norm": 20.39271354675293, |
| "learning_rate": 7.876595744680852e-06, |
| "loss": 0.5486, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.4912998976458547, |
| "grad_norm": 10.480104446411133, |
| "learning_rate": 7.83404255319149e-06, |
| "loss": 0.4948, |
| "step": 610 |
| }, |
| { |
| "epoch": 2.5322415557830094, |
| "grad_norm": 7.528670310974121, |
| "learning_rate": 7.791489361702128e-06, |
| "loss": 0.4786, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.5731832139201636, |
| "grad_norm": 13.00081729888916, |
| "learning_rate": 7.748936170212766e-06, |
| "loss": 0.6303, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.6141248720573182, |
| "grad_norm": 15.72313404083252, |
| "learning_rate": 7.706382978723405e-06, |
| "loss": 0.5574, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.655066530194473, |
| "grad_norm": 7.435535907745361, |
| "learning_rate": 7.663829787234043e-06, |
| "loss": 0.6302, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.6960081883316276, |
| "grad_norm": 12.462494850158691, |
| "learning_rate": 7.621276595744681e-06, |
| "loss": 0.5318, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.7369498464687823, |
| "grad_norm": 13.541357040405273, |
| "learning_rate": 7.578723404255319e-06, |
| "loss": 0.5983, |
| "step": 670 |
| }, |
| { |
| "epoch": 2.7778915046059365, |
| "grad_norm": 10.254051208496094, |
| "learning_rate": 7.536170212765958e-06, |
| "loss": 0.5733, |
| "step": 680 |
| }, |
| { |
| "epoch": 2.818833162743091, |
| "grad_norm": 9.48480224609375, |
| "learning_rate": 7.4936170212765964e-06, |
| "loss": 0.4989, |
| "step": 690 |
| }, |
| { |
| "epoch": 2.859774820880246, |
| "grad_norm": 15.369186401367188, |
| "learning_rate": 7.4510638297872355e-06, |
| "loss": 0.5001, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.9007164790174, |
| "grad_norm": 19.331863403320312, |
| "learning_rate": 7.408510638297874e-06, |
| "loss": 0.5833, |
| "step": 710 |
| }, |
| { |
| "epoch": 2.9416581371545547, |
| "grad_norm": 8.6927490234375, |
| "learning_rate": 7.365957446808512e-06, |
| "loss": 0.5061, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.9825997952917094, |
| "grad_norm": 10.688565254211426, |
| "learning_rate": 7.32340425531915e-06, |
| "loss": 0.5222, |
| "step": 730 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_general_loss": 0.672736406326294, |
| "eval_general_runtime": 258.2016, |
| "eval_general_samples_per_second": 3.544, |
| "eval_general_steps_per_second": 0.887, |
| "step": 735 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_code_loss": 0.6921422481536865, |
| "eval_code_runtime": 300.8317, |
| "eval_code_samples_per_second": 3.052, |
| "eval_code_steps_per_second": 0.765, |
| "step": 735 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_stem_loss": 0.6068028211593628, |
| "eval_stem_runtime": 254.127, |
| "eval_stem_samples_per_second": 3.597, |
| "eval_stem_steps_per_second": 0.901, |
| "step": 735 |
| }, |
| { |
| "epoch": 3.0204708290685773, |
| "grad_norm": 18.046438217163086, |
| "learning_rate": 7.280851063829788e-06, |
| "loss": 0.6208, |
| "step": 740 |
| }, |
| { |
| "epoch": 3.061412487205732, |
| "grad_norm": 9.861907005310059, |
| "learning_rate": 7.2382978723404265e-06, |
| "loss": 0.4168, |
| "step": 750 |
| }, |
| { |
| "epoch": 3.1023541453428862, |
| "grad_norm": 12.162917137145996, |
| "learning_rate": 7.195744680851065e-06, |
| "loss": 0.4801, |
| "step": 760 |
| }, |
| { |
| "epoch": 3.143295803480041, |
| "grad_norm": 8.550954818725586, |
| "learning_rate": 7.153191489361703e-06, |
| "loss": 0.4002, |
| "step": 770 |
| }, |
| { |
| "epoch": 3.1842374616171956, |
| "grad_norm": 13.538202285766602, |
| "learning_rate": 7.110638297872341e-06, |
| "loss": 0.4252, |
| "step": 780 |
| }, |
| { |
| "epoch": 3.2251791197543502, |
| "grad_norm": 16.993202209472656, |
| "learning_rate": 7.068085106382979e-06, |
| "loss": 0.4298, |
| "step": 790 |
| }, |
| { |
| "epoch": 3.2661207778915045, |
| "grad_norm": 8.579163551330566, |
| "learning_rate": 7.0255319148936175e-06, |
| "loss": 0.4718, |
| "step": 800 |
| }, |
| { |
| "epoch": 3.307062436028659, |
| "grad_norm": 11.938567161560059, |
| "learning_rate": 6.982978723404256e-06, |
| "loss": 0.4885, |
| "step": 810 |
| }, |
| { |
| "epoch": 3.348004094165814, |
| "grad_norm": 10.950126647949219, |
| "learning_rate": 6.940425531914894e-06, |
| "loss": 0.525, |
| "step": 820 |
| }, |
| { |
| "epoch": 3.3889457523029685, |
| "grad_norm": 11.802833557128906, |
| "learning_rate": 6.897872340425532e-06, |
| "loss": 0.4151, |
| "step": 830 |
| }, |
| { |
| "epoch": 3.4298874104401227, |
| "grad_norm": 11.907119750976562, |
| "learning_rate": 6.85531914893617e-06, |
| "loss": 0.4043, |
| "step": 840 |
| }, |
| { |
| "epoch": 3.4708290685772774, |
| "grad_norm": 16.365446090698242, |
| "learning_rate": 6.8127659574468085e-06, |
| "loss": 0.4343, |
| "step": 850 |
| }, |
| { |
| "epoch": 3.511770726714432, |
| "grad_norm": 15.271421432495117, |
| "learning_rate": 6.770212765957447e-06, |
| "loss": 0.3902, |
| "step": 860 |
| }, |
| { |
| "epoch": 3.5527123848515867, |
| "grad_norm": 13.381245613098145, |
| "learning_rate": 6.727659574468086e-06, |
| "loss": 0.3752, |
| "step": 870 |
| }, |
| { |
| "epoch": 3.593654042988741, |
| "grad_norm": 10.570069313049316, |
| "learning_rate": 6.685106382978725e-06, |
| "loss": 0.4237, |
| "step": 880 |
| }, |
| { |
| "epoch": 3.6345957011258956, |
| "grad_norm": 11.140630722045898, |
| "learning_rate": 6.642553191489363e-06, |
| "loss": 0.4037, |
| "step": 890 |
| }, |
| { |
| "epoch": 3.6755373592630503, |
| "grad_norm": 10.076521873474121, |
| "learning_rate": 6.600000000000001e-06, |
| "loss": 0.3291, |
| "step": 900 |
| }, |
| { |
| "epoch": 3.7164790174002045, |
| "grad_norm": 21.271434783935547, |
| "learning_rate": 6.557446808510639e-06, |
| "loss": 0.513, |
| "step": 910 |
| }, |
| { |
| "epoch": 3.757420675537359, |
| "grad_norm": 16.138254165649414, |
| "learning_rate": 6.514893617021278e-06, |
| "loss": 0.438, |
| "step": 920 |
| }, |
| { |
| "epoch": 3.798362333674514, |
| "grad_norm": 12.374245643615723, |
| "learning_rate": 6.472340425531916e-06, |
| "loss": 0.4408, |
| "step": 930 |
| }, |
| { |
| "epoch": 3.8393039918116685, |
| "grad_norm": 10.709432601928711, |
| "learning_rate": 6.429787234042554e-06, |
| "loss": 0.4978, |
| "step": 940 |
| }, |
| { |
| "epoch": 3.880245649948823, |
| "grad_norm": 19.277435302734375, |
| "learning_rate": 6.387234042553192e-06, |
| "loss": 0.4056, |
| "step": 950 |
| }, |
| { |
| "epoch": 3.9211873080859774, |
| "grad_norm": 15.328817367553711, |
| "learning_rate": 6.34468085106383e-06, |
| "loss": 0.3557, |
| "step": 960 |
| }, |
| { |
| "epoch": 3.962128966223132, |
| "grad_norm": 10.352958679199219, |
| "learning_rate": 6.302127659574469e-06, |
| "loss": 0.3695, |
| "step": 970 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 12.802416801452637, |
| "learning_rate": 6.259574468085107e-06, |
| "loss": 0.3172, |
| "step": 980 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_general_loss": 0.7220921516418457, |
| "eval_general_runtime": 258.5979, |
| "eval_general_samples_per_second": 3.538, |
| "eval_general_steps_per_second": 0.886, |
| "step": 980 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_code_loss": 0.7049041390419006, |
| "eval_code_runtime": 301.544, |
| "eval_code_samples_per_second": 3.044, |
| "eval_code_steps_per_second": 0.763, |
| "step": 980 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_stem_loss": 0.5855021476745605, |
| "eval_stem_runtime": 254.8479, |
| "eval_stem_samples_per_second": 3.586, |
| "eval_stem_steps_per_second": 0.899, |
| "step": 980 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 2450, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.1350506468917903e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|