{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9978118161925602, "eval_steps": 500, "global_step": 171, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.005835156819839533, "grad_norm": 3.0967645370477803, "learning_rate": 0.0, "loss": 1.0698, "step": 1 }, { "epoch": 0.011670313639679067, "grad_norm": 3.240964653147952, "learning_rate": 5.555555555555555e-07, "loss": 1.0714, "step": 2 }, { "epoch": 0.0175054704595186, "grad_norm": 3.286415256381705, "learning_rate": 1.111111111111111e-06, "loss": 1.1575, "step": 3 }, { "epoch": 0.023340627279358133, "grad_norm": 4.198907257722359, "learning_rate": 1.6666666666666667e-06, "loss": 1.359, "step": 4 }, { "epoch": 0.029175784099197667, "grad_norm": 3.4693235279579637, "learning_rate": 2.222222222222222e-06, "loss": 0.9649, "step": 5 }, { "epoch": 0.0350109409190372, "grad_norm": 3.0568945268705883, "learning_rate": 2.7777777777777783e-06, "loss": 1.0989, "step": 6 }, { "epoch": 0.040846097738876735, "grad_norm": 2.2641958909637157, "learning_rate": 3.3333333333333333e-06, "loss": 0.9482, "step": 7 }, { "epoch": 0.046681254558716266, "grad_norm": 2.5090273909864367, "learning_rate": 3.88888888888889e-06, "loss": 1.1167, "step": 8 }, { "epoch": 0.0525164113785558, "grad_norm": 1.976815998901389, "learning_rate": 4.444444444444444e-06, "loss": 1.0178, "step": 9 }, { "epoch": 0.058351568198395334, "grad_norm": 2.1819711414316108, "learning_rate": 5e-06, "loss": 1.2776, "step": 10 }, { "epoch": 0.06418672501823487, "grad_norm": 1.8201699012478207, "learning_rate": 5.555555555555557e-06, "loss": 1.1955, "step": 11 }, { "epoch": 0.0700218818380744, "grad_norm": 1.5462205611641233, "learning_rate": 6.111111111111112e-06, "loss": 0.9814, "step": 12 }, { "epoch": 0.07585703865791393, "grad_norm": 1.8781521337402638, "learning_rate": 6.666666666666667e-06, "loss": 1.0493, "step": 13 }, { "epoch": 0.08169219547775347, "grad_norm": 1.7561324099023312, "learning_rate": 7.222222222222223e-06, "loss": 0.9224, "step": 14 }, { "epoch": 0.087527352297593, "grad_norm": 1.4418880184516636, "learning_rate": 7.77777777777778e-06, "loss": 0.8082, "step": 15 }, { "epoch": 0.09336250911743253, "grad_norm": 1.8346753334483505, "learning_rate": 8.333333333333334e-06, "loss": 1.0255, "step": 16 }, { "epoch": 0.09919766593727207, "grad_norm": 1.5851713574171153, "learning_rate": 8.888888888888888e-06, "loss": 0.9778, "step": 17 }, { "epoch": 0.1050328227571116, "grad_norm": 1.670718527610444, "learning_rate": 9.444444444444445e-06, "loss": 0.9377, "step": 18 }, { "epoch": 0.11086797957695113, "grad_norm": 1.6015249574772341, "learning_rate": 1e-05, "loss": 0.9287, "step": 19 }, { "epoch": 0.11670313639679067, "grad_norm": 1.6989482004469072, "learning_rate": 9.998945997517957e-06, "loss": 1.152, "step": 20 }, { "epoch": 0.12253829321663019, "grad_norm": 1.3452249233397808, "learning_rate": 9.99578443444032e-06, "loss": 0.8807, "step": 21 }, { "epoch": 0.12837345003646974, "grad_norm": 2.1487234818996686, "learning_rate": 9.990516643685222e-06, "loss": 0.7627, "step": 22 }, { "epoch": 0.13420860685630925, "grad_norm": 1.923473946779632, "learning_rate": 9.983144846158472e-06, "loss": 1.2667, "step": 23 }, { "epoch": 0.1400437636761488, "grad_norm": 1.1277306360256565, "learning_rate": 9.973672149817232e-06, "loss": 0.7689, "step": 24 }, { "epoch": 0.14587892049598833, "grad_norm": 1.1475575701592111, "learning_rate": 9.96210254835968e-06, "loss": 0.7796, "step": 25 }, { "epoch": 0.15171407731582787, "grad_norm": 1.423352108857882, "learning_rate": 9.948440919541277e-06, "loss": 1.0162, "step": 26 }, { "epoch": 0.1575492341356674, "grad_norm": 1.3087161085162522, "learning_rate": 9.932693023118299e-06, "loss": 0.9945, "step": 27 }, { "epoch": 0.16338439095550694, "grad_norm": 1.3363607553089747, "learning_rate": 9.91486549841951e-06, "loss": 0.8672, "step": 28 }, { "epoch": 0.16921954777534645, "grad_norm": 1.1887359433001867, "learning_rate": 9.894965861547023e-06, "loss": 0.9, "step": 29 }, { "epoch": 0.175054704595186, "grad_norm": 1.3150047218140335, "learning_rate": 9.873002502207502e-06, "loss": 0.786, "step": 30 }, { "epoch": 0.18088986141502553, "grad_norm": 1.3895338060465705, "learning_rate": 9.848984680175049e-06, "loss": 0.935, "step": 31 }, { "epoch": 0.18672501823486506, "grad_norm": 1.3275940096513228, "learning_rate": 9.822922521387277e-06, "loss": 0.804, "step": 32 }, { "epoch": 0.1925601750547046, "grad_norm": 1.0560366942912123, "learning_rate": 9.794827013676206e-06, "loss": 0.7541, "step": 33 }, { "epoch": 0.19839533187454414, "grad_norm": 0.9888394869609451, "learning_rate": 9.764710002135784e-06, "loss": 0.6956, "step": 34 }, { "epoch": 0.20423048869438365, "grad_norm": 1.5276601674154802, "learning_rate": 9.732584184127973e-06, "loss": 1.1283, "step": 35 }, { "epoch": 0.2100656455142232, "grad_norm": 1.0255635415282915, "learning_rate": 9.698463103929542e-06, "loss": 0.7307, "step": 36 }, { "epoch": 0.21590080233406272, "grad_norm": 1.3172712637289348, "learning_rate": 9.66236114702178e-06, "loss": 0.912, "step": 37 }, { "epoch": 0.22173595915390226, "grad_norm": 1.2265556745233968, "learning_rate": 9.62429353402556e-06, "loss": 0.8612, "step": 38 }, { "epoch": 0.2275711159737418, "grad_norm": 1.382293666822052, "learning_rate": 9.584276314284316e-06, "loss": 0.9029, "step": 39 }, { "epoch": 0.23340627279358134, "grad_norm": 1.4564772263747658, "learning_rate": 9.542326359097619e-06, "loss": 0.9388, "step": 40 }, { "epoch": 0.23924142961342085, "grad_norm": 1.0351934922881194, "learning_rate": 9.498461354608228e-06, "loss": 0.7048, "step": 41 }, { "epoch": 0.24507658643326038, "grad_norm": 1.1855152897517165, "learning_rate": 9.452699794345583e-06, "loss": 0.8375, "step": 42 }, { "epoch": 0.25091174325309995, "grad_norm": 0.9858455620030542, "learning_rate": 9.405060971428924e-06, "loss": 0.6869, "step": 43 }, { "epoch": 0.2567469000729395, "grad_norm": 0.9902232882820138, "learning_rate": 9.355564970433288e-06, "loss": 0.7302, "step": 44 }, { "epoch": 0.26258205689277897, "grad_norm": 1.0675035604533485, "learning_rate": 9.30423265892184e-06, "loss": 0.8118, "step": 45 }, { "epoch": 0.2684172137126185, "grad_norm": 1.3867123886210095, "learning_rate": 9.251085678648072e-06, "loss": 1.0246, "step": 46 }, { "epoch": 0.27425237053245805, "grad_norm": 1.5259388197713282, "learning_rate": 9.196146436431635e-06, "loss": 1.1016, "step": 47 }, { "epoch": 0.2800875273522976, "grad_norm": 1.2982224709981671, "learning_rate": 9.13943809471159e-06, "loss": 0.9131, "step": 48 }, { "epoch": 0.2859226841721371, "grad_norm": 1.0192488730773452, "learning_rate": 9.08098456178111e-06, "loss": 0.6878, "step": 49 }, { "epoch": 0.29175784099197666, "grad_norm": 1.3595848189757955, "learning_rate": 9.020810481707709e-06, "loss": 0.9331, "step": 50 }, { "epoch": 0.2975929978118162, "grad_norm": 0.8812342597637117, "learning_rate": 8.958941223943292e-06, "loss": 0.6287, "step": 51 }, { "epoch": 0.30342815463165573, "grad_norm": 1.2990640418383663, "learning_rate": 8.895402872628352e-06, "loss": 0.9154, "step": 52 }, { "epoch": 0.30926331145149527, "grad_norm": 1.2083829619551534, "learning_rate": 8.83022221559489e-06, "loss": 0.8133, "step": 53 }, { "epoch": 0.3150984682713348, "grad_norm": 1.1431625625586879, "learning_rate": 8.763426733072624e-06, "loss": 0.7709, "step": 54 }, { "epoch": 0.32093362509117435, "grad_norm": 1.0434131741083061, "learning_rate": 8.695044586103297e-06, "loss": 0.8007, "step": 55 }, { "epoch": 0.3267687819110139, "grad_norm": 1.0904984432710232, "learning_rate": 8.625104604667965e-06, "loss": 0.8123, "step": 56 }, { "epoch": 0.33260393873085337, "grad_norm": 0.920394581392736, "learning_rate": 8.553636275532236e-06, "loss": 0.6425, "step": 57 }, { "epoch": 0.3384390955506929, "grad_norm": 1.1503386459221627, "learning_rate": 8.480669729814635e-06, "loss": 0.7206, "step": 58 }, { "epoch": 0.34427425237053244, "grad_norm": 1.090627383298351, "learning_rate": 8.40623573028327e-06, "loss": 0.7557, "step": 59 }, { "epoch": 0.350109409190372, "grad_norm": 1.2240349403722315, "learning_rate": 8.330365658386252e-06, "loss": 0.896, "step": 60 }, { "epoch": 0.3559445660102115, "grad_norm": 1.0765178282667567, "learning_rate": 8.25309150102121e-06, "loss": 0.7681, "step": 61 }, { "epoch": 0.36177972283005105, "grad_norm": 0.9793957317021386, "learning_rate": 8.174445837049614e-06, "loss": 0.7504, "step": 62 }, { "epoch": 0.3676148796498906, "grad_norm": 1.2232761418608094, "learning_rate": 8.094461823561473e-06, "loss": 0.8488, "step": 63 }, { "epoch": 0.37345003646973013, "grad_norm": 1.2088998059050347, "learning_rate": 8.013173181896283e-06, "loss": 0.8523, "step": 64 }, { "epoch": 0.37928519328956967, "grad_norm": 1.2900325123864722, "learning_rate": 7.930614183426074e-06, "loss": 0.8181, "step": 65 }, { "epoch": 0.3851203501094092, "grad_norm": 0.9454791838776316, "learning_rate": 7.846819635106569e-06, "loss": 0.7437, "step": 66 }, { "epoch": 0.39095550692924874, "grad_norm": 0.9144587470068872, "learning_rate": 7.76182486480253e-06, "loss": 0.628, "step": 67 }, { "epoch": 0.3967906637490883, "grad_norm": 1.15724937547755, "learning_rate": 7.675665706393502e-06, "loss": 0.871, "step": 68 }, { "epoch": 0.4026258205689278, "grad_norm": 1.5015191452686714, "learning_rate": 7.588378484666214e-06, "loss": 1.0221, "step": 69 }, { "epoch": 0.4084609773887673, "grad_norm": 1.39998905729372, "learning_rate": 7.500000000000001e-06, "loss": 0.9229, "step": 70 }, { "epoch": 0.41429613420860684, "grad_norm": 0.948492711117696, "learning_rate": 7.4105675128517456e-06, "loss": 0.6874, "step": 71 }, { "epoch": 0.4201312910284464, "grad_norm": 1.258374093991861, "learning_rate": 7.320118728046818e-06, "loss": 0.9105, "step": 72 }, { "epoch": 0.4259664478482859, "grad_norm": 0.9884107530136707, "learning_rate": 7.2286917788826926e-06, "loss": 0.6701, "step": 73 }, { "epoch": 0.43180160466812545, "grad_norm": 1.4165000586156353, "learning_rate": 7.136325211051905e-06, "loss": 1.0041, "step": 74 }, { "epoch": 0.437636761487965, "grad_norm": 1.1602616170268807, "learning_rate": 7.043057966391158e-06, "loss": 0.8137, "step": 75 }, { "epoch": 0.4434719183078045, "grad_norm": 0.8628006157003906, "learning_rate": 6.948929366463397e-06, "loss": 0.6294, "step": 76 }, { "epoch": 0.44930707512764406, "grad_norm": 1.0039942958404424, "learning_rate": 6.8539790959798045e-06, "loss": 0.7109, "step": 77 }, { "epoch": 0.4551422319474836, "grad_norm": 1.3931149797330953, "learning_rate": 6.758247186068684e-06, "loss": 0.9198, "step": 78 }, { "epoch": 0.46097738876732314, "grad_norm": 1.0513701294639524, "learning_rate": 6.6617739973982985e-06, "loss": 0.7459, "step": 79 }, { "epoch": 0.4668125455871627, "grad_norm": 1.028695671279348, "learning_rate": 6.5646002031607726e-06, "loss": 0.746, "step": 80 }, { "epoch": 0.4726477024070022, "grad_norm": 0.9959898755507389, "learning_rate": 6.466766771924231e-06, "loss": 0.8316, "step": 81 }, { "epoch": 0.4784828592268417, "grad_norm": 1.0084464738139896, "learning_rate": 6.368314950360416e-06, "loss": 0.6801, "step": 82 }, { "epoch": 0.48431801604668123, "grad_norm": 1.2115089332593143, "learning_rate": 6.269286245855039e-06, "loss": 0.9926, "step": 83 }, { "epoch": 0.49015317286652077, "grad_norm": 1.0623534367007055, "learning_rate": 6.169722409008244e-06, "loss": 0.7846, "step": 84 }, { "epoch": 0.4959883296863603, "grad_norm": 0.9618691552575331, "learning_rate": 6.0696654160324875e-06, "loss": 0.7579, "step": 85 }, { "epoch": 0.5018234865061999, "grad_norm": 1.4236190190616003, "learning_rate": 5.9691574510553505e-06, "loss": 0.9145, "step": 86 }, { "epoch": 0.5076586433260394, "grad_norm": 0.9781934808099418, "learning_rate": 5.8682408883346535e-06, "loss": 0.7268, "step": 87 }, { "epoch": 0.513493800145879, "grad_norm": 1.4459804636005047, "learning_rate": 5.766958274393428e-06, "loss": 0.9873, "step": 88 }, { "epoch": 0.5193289569657185, "grad_norm": 1.402656043092622, "learning_rate": 5.66535231008227e-06, "loss": 0.9982, "step": 89 }, { "epoch": 0.5251641137855579, "grad_norm": 1.0200801826456665, "learning_rate": 5.5634658325766066e-06, "loss": 0.678, "step": 90 }, { "epoch": 0.5309992706053975, "grad_norm": 0.9733189988828815, "learning_rate": 5.46134179731651e-06, "loss": 0.7651, "step": 91 }, { "epoch": 0.536834427425237, "grad_norm": 1.4664206961259343, "learning_rate": 5.359023259896638e-06, "loss": 1.1306, "step": 92 }, { "epoch": 0.5426695842450766, "grad_norm": 0.8504168428338762, "learning_rate": 5.2565533579139484e-06, "loss": 0.63, "step": 93 }, { "epoch": 0.5485047410649161, "grad_norm": 0.9453356240059136, "learning_rate": 5.153975292780852e-06, "loss": 0.7414, "step": 94 }, { "epoch": 0.5543398978847557, "grad_norm": 1.2873508014079351, "learning_rate": 5.05133231151145e-06, "loss": 0.9491, "step": 95 }, { "epoch": 0.5601750547045952, "grad_norm": 1.120596468774601, "learning_rate": 4.948667688488552e-06, "loss": 0.8434, "step": 96 }, { "epoch": 0.5660102115244348, "grad_norm": 0.933260578220278, "learning_rate": 4.846024707219149e-06, "loss": 0.6954, "step": 97 }, { "epoch": 0.5718453683442742, "grad_norm": 1.1505604485932606, "learning_rate": 4.7434466420860515e-06, "loss": 0.8766, "step": 98 }, { "epoch": 0.5776805251641138, "grad_norm": 1.3190064743230776, "learning_rate": 4.640976740103363e-06, "loss": 0.9601, "step": 99 }, { "epoch": 0.5835156819839533, "grad_norm": 1.1031574587195832, "learning_rate": 4.53865820268349e-06, "loss": 0.8262, "step": 100 }, { "epoch": 0.5893508388037928, "grad_norm": 1.2140720848727085, "learning_rate": 4.436534167423395e-06, "loss": 0.8474, "step": 101 }, { "epoch": 0.5951859956236324, "grad_norm": 1.0868094387995848, "learning_rate": 4.334647689917734e-06, "loss": 0.7998, "step": 102 }, { "epoch": 0.6010211524434719, "grad_norm": 1.0979874833235823, "learning_rate": 4.233041725606573e-06, "loss": 0.7538, "step": 103 }, { "epoch": 0.6068563092633115, "grad_norm": 0.8239976607859979, "learning_rate": 4.131759111665349e-06, "loss": 0.6354, "step": 104 }, { "epoch": 0.612691466083151, "grad_norm": 1.2986211049877099, "learning_rate": 4.03084254894465e-06, "loss": 0.8842, "step": 105 }, { "epoch": 0.6185266229029905, "grad_norm": 1.4668023858002492, "learning_rate": 3.930334583967514e-06, "loss": 1.1808, "step": 106 }, { "epoch": 0.62436177972283, "grad_norm": 0.9088094469191689, "learning_rate": 3.8302775909917585e-06, "loss": 0.7396, "step": 107 }, { "epoch": 0.6301969365426696, "grad_norm": 1.1598630921383477, "learning_rate": 3.730713754144961e-06, "loss": 0.8171, "step": 108 }, { "epoch": 0.6360320933625091, "grad_norm": 0.9110392040510746, "learning_rate": 3.6316850496395863e-06, "loss": 0.7386, "step": 109 }, { "epoch": 0.6418672501823487, "grad_norm": 1.4073871245462084, "learning_rate": 3.5332332280757706e-06, "loss": 0.8708, "step": 110 }, { "epoch": 0.6477024070021882, "grad_norm": 1.0974400375996476, "learning_rate": 3.4353997968392295e-06, "loss": 0.7883, "step": 111 }, { "epoch": 0.6535375638220278, "grad_norm": 0.9061825638441261, "learning_rate": 3.3382260026017027e-06, "loss": 0.6915, "step": 112 }, { "epoch": 0.6593727206418672, "grad_norm": 1.0771803335915262, "learning_rate": 3.241752813931316e-06, "loss": 0.8647, "step": 113 }, { "epoch": 0.6652078774617067, "grad_norm": 0.9568570846689433, "learning_rate": 3.1460209040201967e-06, "loss": 0.743, "step": 114 }, { "epoch": 0.6710430342815463, "grad_norm": 0.8038754456744998, "learning_rate": 3.0510706335366034e-06, "loss": 0.6186, "step": 115 }, { "epoch": 0.6768781911013858, "grad_norm": 1.0490487646998612, "learning_rate": 2.956942033608843e-06, "loss": 0.7842, "step": 116 }, { "epoch": 0.6827133479212254, "grad_norm": 1.1528124191592586, "learning_rate": 2.863674788948097e-06, "loss": 0.8293, "step": 117 }, { "epoch": 0.6885485047410649, "grad_norm": 1.1151481901518225, "learning_rate": 2.771308221117309e-06, "loss": 0.8212, "step": 118 }, { "epoch": 0.6943836615609045, "grad_norm": 0.9417128722043704, "learning_rate": 2.6798812719531843e-06, "loss": 0.6844, "step": 119 }, { "epoch": 0.700218818380744, "grad_norm": 1.032191371370444, "learning_rate": 2.5894324871482557e-06, "loss": 0.8145, "step": 120 }, { "epoch": 0.7060539752005836, "grad_norm": 0.8894619973494138, "learning_rate": 2.5000000000000015e-06, "loss": 0.7, "step": 121 }, { "epoch": 0.711889132020423, "grad_norm": 1.0056726773650362, "learning_rate": 2.411621515333788e-06, "loss": 0.7701, "step": 122 }, { "epoch": 0.7177242888402626, "grad_norm": 1.0869409622185178, "learning_rate": 2.324334293606499e-06, "loss": 0.8878, "step": 123 }, { "epoch": 0.7235594456601021, "grad_norm": 0.9975686046452821, "learning_rate": 2.238175135197471e-06, "loss": 0.7396, "step": 124 }, { "epoch": 0.7293946024799417, "grad_norm": 0.9651532423367729, "learning_rate": 2.1531803648934333e-06, "loss": 0.7326, "step": 125 }, { "epoch": 0.7352297592997812, "grad_norm": 1.2022500141898183, "learning_rate": 2.069385816573928e-06, "loss": 0.8903, "step": 126 }, { "epoch": 0.7410649161196207, "grad_norm": 1.1194585298700934, "learning_rate": 1.9868268181037186e-06, "loss": 0.7492, "step": 127 }, { "epoch": 0.7469000729394603, "grad_norm": 0.9563972874909548, "learning_rate": 1.9055381764385272e-06, "loss": 0.7019, "step": 128 }, { "epoch": 0.7527352297592997, "grad_norm": 1.1116002934004348, "learning_rate": 1.8255541629503865e-06, "loss": 0.8464, "step": 129 }, { "epoch": 0.7585703865791393, "grad_norm": 0.9834353364914193, "learning_rate": 1.746908498978791e-06, "loss": 0.7098, "step": 130 }, { "epoch": 0.7644055433989788, "grad_norm": 0.9133138576459355, "learning_rate": 1.6696343416137495e-06, "loss": 0.7116, "step": 131 }, { "epoch": 0.7702407002188184, "grad_norm": 1.1533333715544398, "learning_rate": 1.5937642697167288e-06, "loss": 0.8582, "step": 132 }, { "epoch": 0.7760758570386579, "grad_norm": 1.1643056283458053, "learning_rate": 1.5193302701853674e-06, "loss": 0.8634, "step": 133 }, { "epoch": 0.7819110138584975, "grad_norm": 1.2470889663194495, "learning_rate": 1.4463637244677648e-06, "loss": 1.0618, "step": 134 }, { "epoch": 0.787746170678337, "grad_norm": 1.137971799681056, "learning_rate": 1.374895395332037e-06, "loss": 0.7659, "step": 135 }, { "epoch": 0.7935813274981766, "grad_norm": 0.8296991248159793, "learning_rate": 1.3049554138967052e-06, "loss": 0.6202, "step": 136 }, { "epoch": 0.799416484318016, "grad_norm": 1.267967690991332, "learning_rate": 1.2365732669273778e-06, "loss": 0.7476, "step": 137 }, { "epoch": 0.8052516411378556, "grad_norm": 1.0879072186919871, "learning_rate": 1.1697777844051105e-06, "loss": 0.7832, "step": 138 }, { "epoch": 0.8110867979576951, "grad_norm": 0.9784037046492985, "learning_rate": 1.1045971273716476e-06, "loss": 0.75, "step": 139 }, { "epoch": 0.8169219547775346, "grad_norm": 0.969625700866293, "learning_rate": 1.0410587760567104e-06, "loss": 0.8109, "step": 140 }, { "epoch": 0.8227571115973742, "grad_norm": 1.0370004026477495, "learning_rate": 9.791895182922911e-07, "loss": 0.7486, "step": 141 }, { "epoch": 0.8285922684172137, "grad_norm": 1.313976521343513, "learning_rate": 9.190154382188921e-07, "loss": 0.9392, "step": 142 }, { "epoch": 0.8344274252370533, "grad_norm": 1.3968351116673179, "learning_rate": 8.605619052884106e-07, "loss": 1.0106, "step": 143 }, { "epoch": 0.8402625820568927, "grad_norm": 0.9927938469465771, "learning_rate": 8.03853563568367e-07, "loss": 0.7784, "step": 144 }, { "epoch": 0.8460977388767323, "grad_norm": 0.9788007150864164, "learning_rate": 7.489143213519301e-07, "loss": 0.8396, "step": 145 }, { "epoch": 0.8519328956965718, "grad_norm": 1.0639499300007744, "learning_rate": 6.957673410781617e-07, "loss": 0.7964, "step": 146 }, { "epoch": 0.8577680525164114, "grad_norm": 1.2358144380534066, "learning_rate": 6.444350295667112e-07, "loss": 0.8458, "step": 147 }, { "epoch": 0.8636032093362509, "grad_norm": 1.1284354446219842, "learning_rate": 5.949390285710777e-07, "loss": 0.8905, "step": 148 }, { "epoch": 0.8694383661560905, "grad_norm": 0.9040214380343765, "learning_rate": 5.473002056544191e-07, "loss": 0.7118, "step": 149 }, { "epoch": 0.87527352297593, "grad_norm": 0.9693967414133844, "learning_rate": 5.015386453917742e-07, "loss": 0.7273, "step": 150 }, { "epoch": 0.8811086797957695, "grad_norm": 1.08806034257909, "learning_rate": 4.576736409023813e-07, "loss": 0.762, "step": 151 }, { "epoch": 0.886943836615609, "grad_norm": 1.0602639261817013, "learning_rate": 4.15723685715686e-07, "loss": 0.8823, "step": 152 }, { "epoch": 0.8927789934354485, "grad_norm": 1.015659074679919, "learning_rate": 3.7570646597444196e-07, "loss": 0.8069, "step": 153 }, { "epoch": 0.8986141502552881, "grad_norm": 1.1556479384409875, "learning_rate": 3.3763885297822153e-07, "loss": 0.9008, "step": 154 }, { "epoch": 0.9044493070751276, "grad_norm": 1.1155559090709997, "learning_rate": 3.015368960704584e-07, "loss": 0.7752, "step": 155 }, { "epoch": 0.9102844638949672, "grad_norm": 1.1402114085019883, "learning_rate": 2.6741581587202747e-07, "loss": 0.849, "step": 156 }, { "epoch": 0.9161196207148067, "grad_norm": 1.0419920776286093, "learning_rate": 2.3528999786421758e-07, "loss": 0.8142, "step": 157 }, { "epoch": 0.9219547775346463, "grad_norm": 1.1306470417466037, "learning_rate": 2.0517298632379445e-07, "loss": 0.8408, "step": 158 }, { "epoch": 0.9277899343544858, "grad_norm": 1.125391381754142, "learning_rate": 1.770774786127244e-07, "loss": 0.7793, "step": 159 }, { "epoch": 0.9336250911743253, "grad_norm": 0.8803202289480606, "learning_rate": 1.510153198249531e-07, "loss": 0.7106, "step": 160 }, { "epoch": 0.9394602479941648, "grad_norm": 0.9210847874254954, "learning_rate": 1.2699749779249926e-07, "loss": 0.6821, "step": 161 }, { "epoch": 0.9452954048140044, "grad_norm": 1.0075752224421093, "learning_rate": 1.0503413845297739e-07, "loss": 0.8513, "step": 162 }, { "epoch": 0.9511305616338439, "grad_norm": 0.8750161290165441, "learning_rate": 8.513450158049109e-08, "loss": 0.7585, "step": 163 }, { "epoch": 0.9569657184536834, "grad_norm": 1.1096291136415941, "learning_rate": 6.730697688170251e-08, "loss": 0.8467, "step": 164 }, { "epoch": 0.962800875273523, "grad_norm": 0.9247194424342008, "learning_rate": 5.155908045872349e-08, "loss": 0.7033, "step": 165 }, { "epoch": 0.9686360320933625, "grad_norm": 0.9831793236532468, "learning_rate": 3.7897451640321326e-08, "loss": 0.7581, "step": 166 }, { "epoch": 0.9744711889132021, "grad_norm": 0.9263535840213669, "learning_rate": 2.6327850182769065e-08, "loss": 0.697, "step": 167 }, { "epoch": 0.9803063457330415, "grad_norm": 0.9615311264525579, "learning_rate": 1.6855153841527915e-08, "loss": 0.6953, "step": 168 }, { "epoch": 0.9861415025528811, "grad_norm": 0.9734970339085126, "learning_rate": 9.48335631477948e-09, "loss": 0.8321, "step": 169 }, { "epoch": 0.9919766593727206, "grad_norm": 0.8205128623545712, "learning_rate": 4.2155655596809455e-09, "loss": 0.6526, "step": 170 }, { "epoch": 0.9978118161925602, "grad_norm": 0.8811350978732005, "learning_rate": 1.054002482043237e-09, "loss": 0.7098, "step": 171 } ], "logging_steps": 1, "max_steps": 171, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 4050, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 18593527627776.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }