| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 165, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0060790273556231, |
| "grad_norm": 1.133876085281372, |
| "learning_rate": 0.0, |
| "loss": 2.3054, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0121580547112462, |
| "grad_norm": 1.1317460536956787, |
| "learning_rate": 4e-05, |
| "loss": 2.3129, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0182370820668693, |
| "grad_norm": 1.030227780342102, |
| "learning_rate": 8e-05, |
| "loss": 2.2997, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0243161094224924, |
| "grad_norm": 0.7727698087692261, |
| "learning_rate": 0.00012, |
| "loss": 2.0895, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.030395136778115502, |
| "grad_norm": 0.6157111525535583, |
| "learning_rate": 0.00016, |
| "loss": 1.9285, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0364741641337386, |
| "grad_norm": 0.45515450835227966, |
| "learning_rate": 0.0002, |
| "loss": 1.6672, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0425531914893617, |
| "grad_norm": 0.3927247226238251, |
| "learning_rate": 0.0001999953280342959, |
| "loss": 1.4969, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0486322188449848, |
| "grad_norm": 0.36588212847709656, |
| "learning_rate": 0.00019998131257372876, |
| "loss": 1.3842, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0547112462006079, |
| "grad_norm": 0.29039424657821655, |
| "learning_rate": 0.0001999579549278937, |
| "loss": 1.2499, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.060790273556231005, |
| "grad_norm": 0.279226690530777, |
| "learning_rate": 0.00019992525727931303, |
| "loss": 1.1685, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0668693009118541, |
| "grad_norm": 0.272398978471756, |
| "learning_rate": 0.00019988322268323268, |
| "loss": 1.0638, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.0729483282674772, |
| "grad_norm": 0.24636079370975494, |
| "learning_rate": 0.0001998318550673364, |
| "loss": 0.9584, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0790273556231003, |
| "grad_norm": 0.22141209244728088, |
| "learning_rate": 0.00019977115923137912, |
| "loss": 0.9138, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.0851063829787234, |
| "grad_norm": 0.17475497722625732, |
| "learning_rate": 0.00019970114084673796, |
| "loss": 0.8515, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.0911854103343465, |
| "grad_norm": 0.15117621421813965, |
| "learning_rate": 0.0001996218064558829, |
| "loss": 0.8233, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0972644376899696, |
| "grad_norm": 0.11647526919841766, |
| "learning_rate": 0.00019953316347176488, |
| "loss": 0.8347, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.1033434650455927, |
| "grad_norm": 0.10608438402414322, |
| "learning_rate": 0.00019943522017712358, |
| "loss": 0.7888, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.1094224924012158, |
| "grad_norm": 0.10528700798749924, |
| "learning_rate": 0.0001993279857237133, |
| "loss": 0.7655, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.11550151975683891, |
| "grad_norm": 0.09133293479681015, |
| "learning_rate": 0.0001992114701314478, |
| "loss": 0.7585, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.12158054711246201, |
| "grad_norm": 0.09861589223146439, |
| "learning_rate": 0.0001990856842874641, |
| "loss": 0.7616, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.1276595744680851, |
| "grad_norm": 0.07328511029481888, |
| "learning_rate": 0.0001989506399451051, |
| "loss": 0.746, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.1337386018237082, |
| "grad_norm": 0.08320208638906479, |
| "learning_rate": 0.00019880634972282166, |
| "loss": 0.729, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.1398176291793313, |
| "grad_norm": 0.08053930848836899, |
| "learning_rate": 0.0001986528271029931, |
| "loss": 0.7052, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.1458966565349544, |
| "grad_norm": 0.08553291857242584, |
| "learning_rate": 0.00019849008643066772, |
| "loss": 0.7167, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.1519756838905775, |
| "grad_norm": 0.07856528460979462, |
| "learning_rate": 0.00019831814291222232, |
| "loss": 0.707, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.1580547112462006, |
| "grad_norm": 0.0697573572397232, |
| "learning_rate": 0.00019813701261394136, |
| "loss": 0.706, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.1641337386018237, |
| "grad_norm": 0.07752593606710434, |
| "learning_rate": 0.0001979467124605156, |
| "loss": 0.702, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.1702127659574468, |
| "grad_norm": 0.06536618620157242, |
| "learning_rate": 0.0001977472602334609, |
| "loss": 0.6612, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.1762917933130699, |
| "grad_norm": 0.061260443180799484, |
| "learning_rate": 0.0001975386745694565, |
| "loss": 0.6432, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.182370820668693, |
| "grad_norm": 0.04959681257605553, |
| "learning_rate": 0.00019732097495860386, |
| "loss": 0.6545, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.1884498480243161, |
| "grad_norm": 0.05118938162922859, |
| "learning_rate": 0.0001970941817426052, |
| "loss": 0.6564, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.1945288753799392, |
| "grad_norm": 0.05098890885710716, |
| "learning_rate": 0.0001968583161128631, |
| "loss": 0.6751, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.2006079027355623, |
| "grad_norm": 0.049598027020692825, |
| "learning_rate": 0.00019661340010850026, |
| "loss": 0.6582, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.2066869300911854, |
| "grad_norm": 0.052051279693841934, |
| "learning_rate": 0.00019635945661430006, |
| "loss": 0.6279, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.2127659574468085, |
| "grad_norm": 0.04154731333255768, |
| "learning_rate": 0.00019609650935856844, |
| "loss": 0.6344, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.2188449848024316, |
| "grad_norm": 0.045474398881196976, |
| "learning_rate": 0.00019582458291091663, |
| "loss": 0.6195, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.22492401215805471, |
| "grad_norm": 0.04392531141638756, |
| "learning_rate": 0.00019554370267996538, |
| "loss": 0.6394, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.23100303951367782, |
| "grad_norm": 0.04103963077068329, |
| "learning_rate": 0.0001952538949109708, |
| "loss": 0.6334, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.23708206686930092, |
| "grad_norm": 0.042054690420627594, |
| "learning_rate": 0.00019495518668337201, |
| "loss": 0.6239, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.24316109422492402, |
| "grad_norm": 0.0393654890358448, |
| "learning_rate": 0.00019464760590826098, |
| "loss": 0.6054, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.24924012158054712, |
| "grad_norm": 0.042832907289266586, |
| "learning_rate": 0.0001943311813257743, |
| "loss": 0.5769, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.2553191489361702, |
| "grad_norm": 0.03982216864824295, |
| "learning_rate": 0.00019400594250240798, |
| "loss": 0.5921, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.2613981762917933, |
| "grad_norm": 0.03954484313726425, |
| "learning_rate": 0.0001936719198282545, |
| "loss": 0.5976, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.2674772036474164, |
| "grad_norm": 0.042563363909721375, |
| "learning_rate": 0.00019332914451416347, |
| "loss": 0.6016, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.2735562310030395, |
| "grad_norm": 0.04000777006149292, |
| "learning_rate": 0.00019297764858882514, |
| "loss": 0.5822, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.2796352583586626, |
| "grad_norm": 0.04084771126508713, |
| "learning_rate": 0.00019261746489577765, |
| "loss": 0.6122, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 0.040883008390665054, |
| "learning_rate": 0.00019224862709033824, |
| "loss": 0.5898, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.2917933130699088, |
| "grad_norm": 0.04313352331519127, |
| "learning_rate": 0.00019187116963645842, |
| "loss": 0.5852, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.2978723404255319, |
| "grad_norm": 0.04391175135970116, |
| "learning_rate": 0.00019148512780350384, |
| "loss": 0.5901, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.303951367781155, |
| "grad_norm": 0.041302260011434555, |
| "learning_rate": 0.0001910905376629585, |
| "loss": 0.5725, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.3100303951367781, |
| "grad_norm": 0.044717274606227875, |
| "learning_rate": 0.00019068743608505455, |
| "loss": 0.5396, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.3161094224924012, |
| "grad_norm": 0.04290296137332916, |
| "learning_rate": 0.0001902758607353269, |
| "loss": 0.5888, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.3221884498480243, |
| "grad_norm": 0.04425125569105148, |
| "learning_rate": 0.0001898558500710939, |
| "loss": 0.5614, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.3282674772036474, |
| "grad_norm": 0.047949157655239105, |
| "learning_rate": 0.00018942744333786397, |
| "loss": 0.5434, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.3343465045592705, |
| "grad_norm": 0.0461716391146183, |
| "learning_rate": 0.0001889906805656684, |
| "loss": 0.5788, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.3404255319148936, |
| "grad_norm": 0.04746852442622185, |
| "learning_rate": 0.000188545602565321, |
| "loss": 0.5431, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.3465045592705167, |
| "grad_norm": 0.04825344309210777, |
| "learning_rate": 0.00018809225092460488, |
| "loss": 0.5372, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.3525835866261398, |
| "grad_norm": 0.046712443232536316, |
| "learning_rate": 0.00018763066800438636, |
| "loss": 0.5721, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.3586626139817629, |
| "grad_norm": 0.045053232461214066, |
| "learning_rate": 0.00018716089693465696, |
| "loss": 0.5632, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.364741641337386, |
| "grad_norm": 0.04859543964266777, |
| "learning_rate": 0.00018668298161050309, |
| "loss": 0.579, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.3708206686930091, |
| "grad_norm": 0.05270425230264664, |
| "learning_rate": 0.00018619696668800492, |
| "loss": 0.5625, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.3768996960486322, |
| "grad_norm": 0.05261973291635513, |
| "learning_rate": 0.00018570289758006346, |
| "loss": 0.5377, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.3829787234042553, |
| "grad_norm": 0.04979139566421509, |
| "learning_rate": 0.0001852008204521572, |
| "loss": 0.5403, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.3890577507598784, |
| "grad_norm": 0.05422956123948097, |
| "learning_rate": 0.0001846907822180286, |
| "loss": 0.5251, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.3951367781155015, |
| "grad_norm": 0.057373154908418655, |
| "learning_rate": 0.00018417283053530044, |
| "loss": 0.5237, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.4012158054711246, |
| "grad_norm": 0.054097920656204224, |
| "learning_rate": 0.00018364701380102266, |
| "loss": 0.5083, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.4072948328267477, |
| "grad_norm": 0.05596352368593216, |
| "learning_rate": 0.0001831133811471503, |
| "loss": 0.5185, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.4133738601823708, |
| "grad_norm": 0.054655492305755615, |
| "learning_rate": 0.0001825719824359524, |
| "loss": 0.5103, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.4194528875379939, |
| "grad_norm": 0.056628547608852386, |
| "learning_rate": 0.0001820228682553533, |
| "loss": 0.5402, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.425531914893617, |
| "grad_norm": 0.058673664927482605, |
| "learning_rate": 0.00018146608991420534, |
| "loss": 0.5156, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.4316109422492401, |
| "grad_norm": 0.0580279715359211, |
| "learning_rate": 0.00018090169943749476, |
| "loss": 0.5077, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.4376899696048632, |
| "grad_norm": 0.06008626148104668, |
| "learning_rate": 0.00018032974956148063, |
| "loss": 0.4982, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.44376899696048633, |
| "grad_norm": 0.06102442368865013, |
| "learning_rate": 0.00017975029372876706, |
| "loss": 0.5117, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.44984802431610943, |
| "grad_norm": 0.061185382306575775, |
| "learning_rate": 0.0001791633860833096, |
| "loss": 0.4933, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.45592705167173253, |
| "grad_norm": 0.06262445449829102, |
| "learning_rate": 0.00017856908146535603, |
| "loss": 0.4907, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.46200607902735563, |
| "grad_norm": 0.06194239482283592, |
| "learning_rate": 0.00017796743540632223, |
| "loss": 0.4722, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.46808510638297873, |
| "grad_norm": 0.06256049871444702, |
| "learning_rate": 0.00017735850412360331, |
| "loss": 0.4935, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.47416413373860183, |
| "grad_norm": 0.06734279543161392, |
| "learning_rate": 0.00017674234451532065, |
| "loss": 0.4767, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.48024316109422494, |
| "grad_norm": 0.06772830337285995, |
| "learning_rate": 0.00017611901415500535, |
| "loss": 0.4915, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.48632218844984804, |
| "grad_norm": 0.06995881348848343, |
| "learning_rate": 0.00017548857128621875, |
| "loss": 0.4723, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.49240121580547114, |
| "grad_norm": 0.06601176410913467, |
| "learning_rate": 0.00017485107481711012, |
| "loss": 0.4831, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.49848024316109424, |
| "grad_norm": 0.06836414337158203, |
| "learning_rate": 0.00017420658431491223, |
| "loss": 0.4585, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.5045592705167173, |
| "grad_norm": 0.06948156654834747, |
| "learning_rate": 0.00017355516000037554, |
| "loss": 0.4624, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.5106382978723404, |
| "grad_norm": 0.06856788694858551, |
| "learning_rate": 0.00017289686274214118, |
| "loss": 0.4497, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.5167173252279635, |
| "grad_norm": 0.07304105907678604, |
| "learning_rate": 0.0001722317540510534, |
| "loss": 0.4697, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.5227963525835866, |
| "grad_norm": 0.07297949492931366, |
| "learning_rate": 0.00017155989607441213, |
| "loss": 0.4376, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.5288753799392097, |
| "grad_norm": 0.07408228516578674, |
| "learning_rate": 0.00017088135159016584, |
| "loss": 0.4493, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.5349544072948328, |
| "grad_norm": 0.07207636535167694, |
| "learning_rate": 0.00017019618400104572, |
| "loss": 0.4612, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.541033434650456, |
| "grad_norm": 0.07454758137464523, |
| "learning_rate": 0.00016950445732864127, |
| "loss": 0.4123, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.547112462006079, |
| "grad_norm": 0.07566685974597931, |
| "learning_rate": 0.00016880623620741842, |
| "loss": 0.4632, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.5531914893617021, |
| "grad_norm": 0.07650725543498993, |
| "learning_rate": 0.00016810158587867973, |
| "loss": 0.4153, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.5592705167173252, |
| "grad_norm": 0.07634485512971878, |
| "learning_rate": 0.0001673905721844686, |
| "loss": 0.4402, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.5653495440729484, |
| "grad_norm": 0.07571671158075333, |
| "learning_rate": 0.00016667326156141692, |
| "loss": 0.4308, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 0.08016683161258698, |
| "learning_rate": 0.00016594972103453726, |
| "loss": 0.4213, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.5775075987841946, |
| "grad_norm": 0.07403143495321274, |
| "learning_rate": 0.0001652200182109602, |
| "loss": 0.4485, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.5835866261398176, |
| "grad_norm": 0.08598003536462784, |
| "learning_rate": 0.00016448422127361706, |
| "loss": 0.4044, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.5896656534954408, |
| "grad_norm": 0.0837491899728775, |
| "learning_rate": 0.000163742398974869, |
| "loss": 0.3965, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.5957446808510638, |
| "grad_norm": 0.08433262258768082, |
| "learning_rate": 0.00016299462063008272, |
| "loss": 0.4267, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.601823708206687, |
| "grad_norm": 0.07738591730594635, |
| "learning_rate": 0.00016224095611115384, |
| "loss": 0.4175, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.60790273556231, |
| "grad_norm": 0.08330941945314407, |
| "learning_rate": 0.00016148147583997812, |
| "loss": 0.4324, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.6139817629179332, |
| "grad_norm": 0.08629601448774338, |
| "learning_rate": 0.00016071625078187114, |
| "loss": 0.3949, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.6200607902735562, |
| "grad_norm": 0.08496759831905365, |
| "learning_rate": 0.0001599453524389374, |
| "loss": 0.4181, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.6261398176291794, |
| "grad_norm": 0.08593132346868515, |
| "learning_rate": 0.00015916885284338937, |
| "loss": 0.3979, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.6322188449848024, |
| "grad_norm": 0.08198531717061996, |
| "learning_rate": 0.00015838682455081657, |
| "loss": 0.396, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.6382978723404256, |
| "grad_norm": 0.08349744975566864, |
| "learning_rate": 0.00015759934063340627, |
| "loss": 0.3773, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.6443768996960486, |
| "grad_norm": 0.08445355296134949, |
| "learning_rate": 0.00015680647467311557, |
| "loss": 0.3946, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.6504559270516718, |
| "grad_norm": 0.0808950737118721, |
| "learning_rate": 0.00015600830075479603, |
| "loss": 0.3926, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.6565349544072948, |
| "grad_norm": 0.08728586137294769, |
| "learning_rate": 0.00015520489345927096, |
| "loss": 0.4248, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.662613981762918, |
| "grad_norm": 0.08645470440387726, |
| "learning_rate": 0.00015439632785636706, |
| "loss": 0.4051, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.668693009118541, |
| "grad_norm": 0.07910045236349106, |
| "learning_rate": 0.00015358267949789966, |
| "loss": 0.4167, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.6747720364741642, |
| "grad_norm": 0.08322255313396454, |
| "learning_rate": 0.0001527640244106133, |
| "loss": 0.3842, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.6808510638297872, |
| "grad_norm": 0.07981768995523453, |
| "learning_rate": 0.00015194043908907775, |
| "loss": 0.3942, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.6869300911854104, |
| "grad_norm": 0.08817645907402039, |
| "learning_rate": 0.00015111200048854056, |
| "loss": 0.3739, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.6930091185410334, |
| "grad_norm": 0.09770014137029648, |
| "learning_rate": 0.00015027878601773633, |
| "loss": 0.3781, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.6990881458966566, |
| "grad_norm": 0.09042941778898239, |
| "learning_rate": 0.0001494408735316537, |
| "loss": 0.3897, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.7051671732522796, |
| "grad_norm": 0.08232049643993378, |
| "learning_rate": 0.0001485983413242606, |
| "loss": 0.3566, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.7112462006079028, |
| "grad_norm": 0.08187402784824371, |
| "learning_rate": 0.00014775126812118864, |
| "loss": 0.3559, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.7173252279635258, |
| "grad_norm": 0.08564823865890503, |
| "learning_rate": 0.00014689973307237687, |
| "loss": 0.37, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.723404255319149, |
| "grad_norm": 0.09082309901714325, |
| "learning_rate": 0.00014604381574467615, |
| "loss": 0.3962, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.729483282674772, |
| "grad_norm": 0.09014427661895752, |
| "learning_rate": 0.0001451835961144145, |
| "loss": 0.3391, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.7355623100303952, |
| "grad_norm": 0.08382211625576019, |
| "learning_rate": 0.00014431915455992414, |
| "loss": 0.3547, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.7416413373860182, |
| "grad_norm": 0.08397499471902847, |
| "learning_rate": 0.000143450571854031, |
| "loss": 0.3479, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.7477203647416414, |
| "grad_norm": 0.08822325617074966, |
| "learning_rate": 0.00014257792915650728, |
| "loss": 0.3824, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.7537993920972644, |
| "grad_norm": 0.08629824221134186, |
| "learning_rate": 0.00014170130800648814, |
| "loss": 0.3548, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.7598784194528876, |
| "grad_norm": 0.09454140067100525, |
| "learning_rate": 0.0001408207903148525, |
| "loss": 0.3524, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.7659574468085106, |
| "grad_norm": 0.08818770945072174, |
| "learning_rate": 0.00013993645835656953, |
| "loss": 0.388, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.7720364741641338, |
| "grad_norm": 0.08908054232597351, |
| "learning_rate": 0.0001390483947630109, |
| "loss": 0.3548, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.7781155015197568, |
| "grad_norm": 0.08153887093067169, |
| "learning_rate": 0.00013815668251422952, |
| "loss": 0.3545, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.78419452887538, |
| "grad_norm": 0.08948613703250885, |
| "learning_rate": 0.0001372614049312064, |
| "loss": 0.3483, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.790273556231003, |
| "grad_norm": 0.08655694872140884, |
| "learning_rate": 0.0001363626456680647, |
| "loss": 0.321, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.7963525835866262, |
| "grad_norm": 0.09818850457668304, |
| "learning_rate": 0.00013546048870425356, |
| "loss": 0.3647, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.8024316109422492, |
| "grad_norm": 0.0866372138261795, |
| "learning_rate": 0.00013455501833670088, |
| "loss": 0.3604, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.8085106382978723, |
| "grad_norm": 0.09384460002183914, |
| "learning_rate": 0.0001336463191719367, |
| "loss": 0.357, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.8145896656534954, |
| "grad_norm": 0.09135902673006058, |
| "learning_rate": 0.00013273447611818767, |
| "loss": 0.3406, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.8206686930091185, |
| "grad_norm": 0.08802594989538193, |
| "learning_rate": 0.00013181957437744332, |
| "loss": 0.3548, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.8267477203647416, |
| "grad_norm": 0.09025990217924118, |
| "learning_rate": 0.00013090169943749476, |
| "loss": 0.3505, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.8328267477203647, |
| "grad_norm": 0.09130747616291046, |
| "learning_rate": 0.00012998093706394675, |
| "loss": 0.3159, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.8389057750759878, |
| "grad_norm": 0.09548977762460709, |
| "learning_rate": 0.00012905737329220392, |
| "loss": 0.3473, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.8449848024316109, |
| "grad_norm": 0.08869072049856186, |
| "learning_rate": 0.00012813109441943166, |
| "loss": 0.3379, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.851063829787234, |
| "grad_norm": 0.09292670339345932, |
| "learning_rate": 0.00012720218699649243, |
| "loss": 0.3252, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 0.08339939266443253, |
| "learning_rate": 0.0001262707378198587, |
| "loss": 0.2908, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.8632218844984803, |
| "grad_norm": 0.0901230052113533, |
| "learning_rate": 0.00012533683392350263, |
| "loss": 0.3346, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.8693009118541033, |
| "grad_norm": 0.09229591488838196, |
| "learning_rate": 0.00012440056257076375, |
| "loss": 0.3523, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.8753799392097265, |
| "grad_norm": 0.09824731945991516, |
| "learning_rate": 0.00012346201124619502, |
| "loss": 0.3416, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.8814589665653495, |
| "grad_norm": 0.09328845143318176, |
| "learning_rate": 0.00012252126764738844, |
| "loss": 0.3211, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.8875379939209727, |
| "grad_norm": 0.09492561221122742, |
| "learning_rate": 0.00012157841967678063, |
| "loss": 0.3404, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.8936170212765957, |
| "grad_norm": 0.09995546191930771, |
| "learning_rate": 0.00012063355543343924, |
| "loss": 0.3188, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.8996960486322189, |
| "grad_norm": 0.09694822877645493, |
| "learning_rate": 0.00011968676320483103, |
| "loss": 0.3269, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.9057750759878419, |
| "grad_norm": 0.11103739589452744, |
| "learning_rate": 0.00011873813145857249, |
| "loss": 0.3228, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.9118541033434651, |
| "grad_norm": 0.10173939168453217, |
| "learning_rate": 0.00011778774883416323, |
| "loss": 0.3033, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.9179331306990881, |
| "grad_norm": 0.10053914040327072, |
| "learning_rate": 0.00011683570413470383, |
| "loss": 0.3357, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.9240121580547113, |
| "grad_norm": 0.09117776155471802, |
| "learning_rate": 0.00011588208631859807, |
| "loss": 0.3288, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.9300911854103343, |
| "grad_norm": 0.08972764015197754, |
| "learning_rate": 0.00011492698449124042, |
| "loss": 0.3134, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.9361702127659575, |
| "grad_norm": 0.09842713177204132, |
| "learning_rate": 0.0001139704878966906, |
| "loss": 0.3258, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.9422492401215805, |
| "grad_norm": 0.08585759252309799, |
| "learning_rate": 0.00011301268590933434, |
| "loss": 0.3121, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.9483282674772037, |
| "grad_norm": 0.10005568712949753, |
| "learning_rate": 0.0001120536680255323, |
| "loss": 0.3199, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.9544072948328267, |
| "grad_norm": 0.10690546780824661, |
| "learning_rate": 0.00011109352385525783, |
| "loss": 0.3008, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.9604863221884499, |
| "grad_norm": 0.0924290269613266, |
| "learning_rate": 0.00011013234311372353, |
| "loss": 0.339, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.9665653495440729, |
| "grad_norm": 0.09407492727041245, |
| "learning_rate": 0.00010917021561299863, |
| "loss": 0.3282, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.9726443768996961, |
| "grad_norm": 0.09656916558742523, |
| "learning_rate": 0.00010820723125361684, |
| "loss": 0.285, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.9787234042553191, |
| "grad_norm": 0.0891689881682396, |
| "learning_rate": 0.00010724348001617625, |
| "loss": 0.3134, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.9848024316109423, |
| "grad_norm": 0.09076245874166489, |
| "learning_rate": 0.00010627905195293135, |
| "loss": 0.3014, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.9908814589665653, |
| "grad_norm": 0.09239046275615692, |
| "learning_rate": 0.00010531403717937887, |
| "loss": 0.2828, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.9969604863221885, |
| "grad_norm": 0.10273317247629166, |
| "learning_rate": 0.00010434852586583736, |
| "loss": 0.3159, |
| "step": 164 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.1356026530265808, |
| "learning_rate": 0.00010338260822902167, |
| "loss": 0.2756, |
| "step": 165 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 330, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 15, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.452241334344024e+18, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|