{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9997915363769022, "eval_steps": 400, "global_step": 1199, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0008338544923910778, "grad_norm": 0.35252463817596436, "learning_rate": 2e-08, "loss": 0.8273, "step": 1 }, { "epoch": 0.0008338544923910778, "eval_loss": 0.861491858959198, "eval_runtime": 319.044, "eval_samples_per_second": 16.872, "eval_steps_per_second": 2.815, "step": 1 }, { "epoch": 0.0016677089847821555, "grad_norm": 0.3189675807952881, "learning_rate": 4e-08, "loss": 0.8572, "step": 2 }, { "epoch": 0.0025015634771732333, "grad_norm": 0.3477325141429901, "learning_rate": 6e-08, "loss": 0.9059, "step": 3 }, { "epoch": 0.003335417969564311, "grad_norm": 0.32423585653305054, "learning_rate": 8e-08, "loss": 0.8386, "step": 4 }, { "epoch": 0.004169272461955389, "grad_norm": 0.32164493203163147, "learning_rate": 1e-07, "loss": 0.8753, "step": 5 }, { "epoch": 0.0050031269543464665, "grad_norm": 0.33231157064437866, "learning_rate": 1.2e-07, "loss": 0.8665, "step": 6 }, { "epoch": 0.005836981446737544, "grad_norm": 0.31154102087020874, "learning_rate": 1.4e-07, "loss": 0.8813, "step": 7 }, { "epoch": 0.006670835939128622, "grad_norm": 0.3176712393760681, "learning_rate": 1.6e-07, "loss": 0.8683, "step": 8 }, { "epoch": 0.0075046904315197, "grad_norm": 0.3419969975948334, "learning_rate": 1.8e-07, "loss": 0.849, "step": 9 }, { "epoch": 0.008338544923910778, "grad_norm": 0.33408281207084656, "learning_rate": 2e-07, "loss": 0.8778, "step": 10 }, { "epoch": 0.009172399416301855, "grad_norm": 0.3125899136066437, "learning_rate": 2.1999999999999998e-07, "loss": 0.8051, "step": 11 }, { "epoch": 0.010006253908692933, "grad_norm": 0.32648083567619324, "learning_rate": 2.4e-07, "loss": 0.8297, "step": 12 }, { "epoch": 0.01084010840108401, "grad_norm": 0.3143678605556488, "learning_rate": 2.6e-07, "loss": 0.8401, "step": 13 }, { "epoch": 0.011673962893475089, "grad_norm": 0.3291407525539398, "learning_rate": 2.8e-07, "loss": 0.8668, "step": 14 }, { "epoch": 0.012507817385866166, "grad_norm": 0.3211856484413147, "learning_rate": 3e-07, "loss": 0.84, "step": 15 }, { "epoch": 0.013341671878257244, "grad_norm": 0.31169289350509644, "learning_rate": 3.2e-07, "loss": 0.8659, "step": 16 }, { "epoch": 0.014175526370648322, "grad_norm": 0.3279392719268799, "learning_rate": 3.4000000000000003e-07, "loss": 0.8317, "step": 17 }, { "epoch": 0.0150093808630394, "grad_norm": 0.3028814494609833, "learning_rate": 3.6e-07, "loss": 0.8246, "step": 18 }, { "epoch": 0.015843235355430477, "grad_norm": 0.307066410779953, "learning_rate": 3.7999999999999996e-07, "loss": 0.8025, "step": 19 }, { "epoch": 0.016677089847821555, "grad_norm": 0.3293515741825104, "learning_rate": 4e-07, "loss": 0.8674, "step": 20 }, { "epoch": 0.017510944340212633, "grad_norm": 0.3215535581111908, "learning_rate": 4.1999999999999995e-07, "loss": 0.8533, "step": 21 }, { "epoch": 0.01834479883260371, "grad_norm": 0.3236245810985565, "learning_rate": 4.3999999999999997e-07, "loss": 0.8957, "step": 22 }, { "epoch": 0.01917865332499479, "grad_norm": 0.33956846594810486, "learning_rate": 4.6e-07, "loss": 0.8293, "step": 23 }, { "epoch": 0.020012507817385866, "grad_norm": 0.33766457438468933, "learning_rate": 4.8e-07, "loss": 0.8381, "step": 24 }, { "epoch": 0.020846362309776944, "grad_norm": 0.3336811065673828, "learning_rate": 5e-07, "loss": 0.8475, "step": 25 }, { "epoch": 0.02168021680216802, "grad_norm": 0.307309627532959, "learning_rate": 5.2e-07, "loss": 0.852, "step": 26 }, { "epoch": 0.0225140712945591, "grad_norm": 0.29773110151290894, "learning_rate": 5.4e-07, "loss": 0.8042, "step": 27 }, { "epoch": 0.023347925786950177, "grad_norm": 0.3210948407649994, "learning_rate": 5.6e-07, "loss": 0.9099, "step": 28 }, { "epoch": 0.024181780279341255, "grad_norm": 0.3149116337299347, "learning_rate": 5.8e-07, "loss": 0.8, "step": 29 }, { "epoch": 0.025015634771732333, "grad_norm": 0.29307302832603455, "learning_rate": 6e-07, "loss": 0.8583, "step": 30 }, { "epoch": 0.02584948926412341, "grad_norm": 0.3131772577762604, "learning_rate": 6.2e-07, "loss": 0.8432, "step": 31 }, { "epoch": 0.026683343756514488, "grad_norm": 0.28058937191963196, "learning_rate": 6.4e-07, "loss": 0.8099, "step": 32 }, { "epoch": 0.027517198248905566, "grad_norm": 0.2649301290512085, "learning_rate": 6.6e-07, "loss": 0.8715, "step": 33 }, { "epoch": 0.028351052741296644, "grad_norm": 0.2659442126750946, "learning_rate": 6.800000000000001e-07, "loss": 0.7432, "step": 34 }, { "epoch": 0.02918490723368772, "grad_norm": 0.24875527620315552, "learning_rate": 7e-07, "loss": 0.8502, "step": 35 }, { "epoch": 0.0300187617260788, "grad_norm": 0.21730971336364746, "learning_rate": 7.2e-07, "loss": 0.813, "step": 36 }, { "epoch": 0.030852616218469877, "grad_norm": 0.19625824689865112, "learning_rate": 7.4e-07, "loss": 0.7839, "step": 37 }, { "epoch": 0.031686470710860955, "grad_norm": 0.19287347793579102, "learning_rate": 7.599999999999999e-07, "loss": 0.7949, "step": 38 }, { "epoch": 0.032520325203252036, "grad_norm": 0.1588095873594284, "learning_rate": 7.799999999999999e-07, "loss": 0.7137, "step": 39 }, { "epoch": 0.03335417969564311, "grad_norm": 0.17520244419574738, "learning_rate": 8e-07, "loss": 0.8112, "step": 40 }, { "epoch": 0.03418803418803419, "grad_norm": 0.16394659876823425, "learning_rate": 8.199999999999999e-07, "loss": 0.8096, "step": 41 }, { "epoch": 0.035021888680425266, "grad_norm": 0.1522587239742279, "learning_rate": 8.399999999999999e-07, "loss": 0.708, "step": 42 }, { "epoch": 0.03585574317281635, "grad_norm": 0.16340652108192444, "learning_rate": 8.599999999999999e-07, "loss": 0.7816, "step": 43 }, { "epoch": 0.03668959766520742, "grad_norm": 0.15452717244625092, "learning_rate": 8.799999999999999e-07, "loss": 0.7946, "step": 44 }, { "epoch": 0.0375234521575985, "grad_norm": 0.1545872688293457, "learning_rate": 9e-07, "loss": 0.7908, "step": 45 }, { "epoch": 0.03835730664998958, "grad_norm": 0.13799017667770386, "learning_rate": 9.2e-07, "loss": 0.7056, "step": 46 }, { "epoch": 0.03919116114238066, "grad_norm": 0.14599646627902985, "learning_rate": 9.399999999999999e-07, "loss": 0.8098, "step": 47 }, { "epoch": 0.04002501563477173, "grad_norm": 0.13951805233955383, "learning_rate": 9.6e-07, "loss": 0.7938, "step": 48 }, { "epoch": 0.040858870127162814, "grad_norm": 0.14058919250965118, "learning_rate": 9.8e-07, "loss": 0.7806, "step": 49 }, { "epoch": 0.04169272461955389, "grad_norm": 0.14300380647182465, "learning_rate": 1e-06, "loss": 0.8192, "step": 50 }, { "epoch": 0.04252657911194497, "grad_norm": 0.13719584047794342, "learning_rate": 1.02e-06, "loss": 0.7939, "step": 51 }, { "epoch": 0.04336043360433604, "grad_norm": 0.14002980291843414, "learning_rate": 1.04e-06, "loss": 0.8163, "step": 52 }, { "epoch": 0.044194288096727125, "grad_norm": 0.13815677165985107, "learning_rate": 1.06e-06, "loss": 0.8137, "step": 53 }, { "epoch": 0.0450281425891182, "grad_norm": 0.13772878050804138, "learning_rate": 1.08e-06, "loss": 0.771, "step": 54 }, { "epoch": 0.04586199708150928, "grad_norm": 0.1414576917886734, "learning_rate": 1.1e-06, "loss": 0.7989, "step": 55 }, { "epoch": 0.046695851573900354, "grad_norm": 0.11953306943178177, "learning_rate": 1.12e-06, "loss": 0.7074, "step": 56 }, { "epoch": 0.047529706066291436, "grad_norm": 0.13051201403141022, "learning_rate": 1.1399999999999999e-06, "loss": 0.8034, "step": 57 }, { "epoch": 0.04836356055868251, "grad_norm": 0.12802985310554504, "learning_rate": 1.16e-06, "loss": 0.7581, "step": 58 }, { "epoch": 0.04919741505107359, "grad_norm": 0.13593435287475586, "learning_rate": 1.18e-06, "loss": 0.7978, "step": 59 }, { "epoch": 0.050031269543464665, "grad_norm": 0.12595658004283905, "learning_rate": 1.2e-06, "loss": 0.7997, "step": 60 }, { "epoch": 0.05086512403585575, "grad_norm": 0.1257220059633255, "learning_rate": 1.22e-06, "loss": 0.7194, "step": 61 }, { "epoch": 0.05169897852824682, "grad_norm": 0.12169249355792999, "learning_rate": 1.24e-06, "loss": 0.7412, "step": 62 }, { "epoch": 0.0525328330206379, "grad_norm": 0.12391054630279541, "learning_rate": 1.26e-06, "loss": 0.7849, "step": 63 }, { "epoch": 0.053366687513028976, "grad_norm": 0.12134575098752975, "learning_rate": 1.28e-06, "loss": 0.6859, "step": 64 }, { "epoch": 0.05420054200542006, "grad_norm": 0.11605742573738098, "learning_rate": 1.3e-06, "loss": 0.8112, "step": 65 }, { "epoch": 0.05503439649781113, "grad_norm": 0.11593819409608841, "learning_rate": 1.32e-06, "loss": 0.6993, "step": 66 }, { "epoch": 0.05586825099020221, "grad_norm": 0.12103109061717987, "learning_rate": 1.34e-06, "loss": 0.7595, "step": 67 }, { "epoch": 0.05670210548259329, "grad_norm": 0.12391602247953415, "learning_rate": 1.3600000000000001e-06, "loss": 0.7676, "step": 68 }, { "epoch": 0.05753595997498437, "grad_norm": 0.10895267128944397, "learning_rate": 1.38e-06, "loss": 0.7256, "step": 69 }, { "epoch": 0.05836981446737544, "grad_norm": 0.1129627376794815, "learning_rate": 1.4e-06, "loss": 0.7097, "step": 70 }, { "epoch": 0.059203668959766524, "grad_norm": 0.1078682616353035, "learning_rate": 1.42e-06, "loss": 0.7052, "step": 71 }, { "epoch": 0.0600375234521576, "grad_norm": 0.1140650063753128, "learning_rate": 1.44e-06, "loss": 0.8067, "step": 72 }, { "epoch": 0.06087137794454868, "grad_norm": 0.10528790205717087, "learning_rate": 1.46e-06, "loss": 0.7241, "step": 73 }, { "epoch": 0.061705232436939754, "grad_norm": 0.11815836280584335, "learning_rate": 1.48e-06, "loss": 0.7344, "step": 74 }, { "epoch": 0.06253908692933083, "grad_norm": 0.11140462756156921, "learning_rate": 1.5e-06, "loss": 0.7664, "step": 75 }, { "epoch": 0.06337294142172191, "grad_norm": 0.11665944010019302, "learning_rate": 1.5199999999999998e-06, "loss": 0.7178, "step": 76 }, { "epoch": 0.06420679591411299, "grad_norm": 0.10751160234212875, "learning_rate": 1.5399999999999999e-06, "loss": 0.7269, "step": 77 }, { "epoch": 0.06504065040650407, "grad_norm": 0.09768356382846832, "learning_rate": 1.5599999999999999e-06, "loss": 0.7449, "step": 78 }, { "epoch": 0.06587450489889514, "grad_norm": 0.09686972200870514, "learning_rate": 1.58e-06, "loss": 0.7097, "step": 79 }, { "epoch": 0.06670835939128622, "grad_norm": 0.09527275711297989, "learning_rate": 1.6e-06, "loss": 0.7301, "step": 80 }, { "epoch": 0.0675422138836773, "grad_norm": 0.0931195393204689, "learning_rate": 1.62e-06, "loss": 0.7524, "step": 81 }, { "epoch": 0.06837606837606838, "grad_norm": 0.0892946794629097, "learning_rate": 1.6399999999999998e-06, "loss": 0.7156, "step": 82 }, { "epoch": 0.06920992286845945, "grad_norm": 0.08845791220664978, "learning_rate": 1.6599999999999998e-06, "loss": 0.7349, "step": 83 }, { "epoch": 0.07004377736085053, "grad_norm": 0.0896732360124588, "learning_rate": 1.6799999999999998e-06, "loss": 0.7145, "step": 84 }, { "epoch": 0.07087763185324161, "grad_norm": 0.09997335076332092, "learning_rate": 1.6999999999999998e-06, "loss": 0.7103, "step": 85 }, { "epoch": 0.0717114863456327, "grad_norm": 0.08821084350347519, "learning_rate": 1.7199999999999998e-06, "loss": 0.725, "step": 86 }, { "epoch": 0.07254534083802376, "grad_norm": 0.08937980234622955, "learning_rate": 1.7399999999999999e-06, "loss": 0.7194, "step": 87 }, { "epoch": 0.07337919533041484, "grad_norm": 0.09307857602834702, "learning_rate": 1.7599999999999999e-06, "loss": 0.7595, "step": 88 }, { "epoch": 0.07421304982280592, "grad_norm": 0.08865829557180405, "learning_rate": 1.78e-06, "loss": 0.7464, "step": 89 }, { "epoch": 0.075046904315197, "grad_norm": 0.09038495272397995, "learning_rate": 1.8e-06, "loss": 0.6698, "step": 90 }, { "epoch": 0.07588075880758807, "grad_norm": 0.08254078030586243, "learning_rate": 1.82e-06, "loss": 0.7426, "step": 91 }, { "epoch": 0.07671461329997915, "grad_norm": 0.08807505667209625, "learning_rate": 1.84e-06, "loss": 0.6789, "step": 92 }, { "epoch": 0.07754846779237023, "grad_norm": 0.09523889422416687, "learning_rate": 1.86e-06, "loss": 0.7218, "step": 93 }, { "epoch": 0.07838232228476132, "grad_norm": 0.08782277256250381, "learning_rate": 1.8799999999999998e-06, "loss": 0.7599, "step": 94 }, { "epoch": 0.07921617677715238, "grad_norm": 0.08968065679073334, "learning_rate": 1.8999999999999998e-06, "loss": 0.7971, "step": 95 }, { "epoch": 0.08005003126954346, "grad_norm": 0.08655782043933868, "learning_rate": 1.92e-06, "loss": 0.7479, "step": 96 }, { "epoch": 0.08088388576193455, "grad_norm": 0.0854155421257019, "learning_rate": 1.94e-06, "loss": 0.7327, "step": 97 }, { "epoch": 0.08171774025432563, "grad_norm": 0.08121508359909058, "learning_rate": 1.96e-06, "loss": 0.6749, "step": 98 }, { "epoch": 0.0825515947467167, "grad_norm": 0.08429264277219772, "learning_rate": 1.98e-06, "loss": 0.6402, "step": 99 }, { "epoch": 0.08338544923910778, "grad_norm": 0.08390086144208908, "learning_rate": 2e-06, "loss": 0.7434, "step": 100 }, { "epoch": 0.08421930373149886, "grad_norm": 0.08662284910678864, "learning_rate": 1.9999995964675577e-06, "loss": 0.6681, "step": 101 }, { "epoch": 0.08505315822388994, "grad_norm": 0.08700251579284668, "learning_rate": 1.9999983858705566e-06, "loss": 0.763, "step": 102 }, { "epoch": 0.085887012716281, "grad_norm": 0.08504343777894974, "learning_rate": 1.9999963682099734e-06, "loss": 0.77, "step": 103 }, { "epoch": 0.08672086720867209, "grad_norm": 0.08222745358943939, "learning_rate": 1.999993543487437e-06, "loss": 0.7103, "step": 104 }, { "epoch": 0.08755472170106317, "grad_norm": 0.08124719560146332, "learning_rate": 1.9999899117052263e-06, "loss": 0.6913, "step": 105 }, { "epoch": 0.08838857619345425, "grad_norm": 0.09033836424350739, "learning_rate": 1.9999854728662734e-06, "loss": 0.7065, "step": 106 }, { "epoch": 0.08922243068584532, "grad_norm": 0.09045730531215668, "learning_rate": 1.99998022697416e-06, "loss": 0.7624, "step": 107 }, { "epoch": 0.0900562851782364, "grad_norm": 0.0832991823554039, "learning_rate": 1.9999741740331203e-06, "loss": 0.7399, "step": 108 }, { "epoch": 0.09089013967062748, "grad_norm": 0.07666011154651642, "learning_rate": 1.9999673140480388e-06, "loss": 0.6477, "step": 109 }, { "epoch": 0.09172399416301856, "grad_norm": 0.08195007592439651, "learning_rate": 1.9999596470244527e-06, "loss": 0.7068, "step": 110 }, { "epoch": 0.09255784865540963, "grad_norm": 0.07755164802074432, "learning_rate": 1.99995117296855e-06, "loss": 0.7021, "step": 111 }, { "epoch": 0.09339170314780071, "grad_norm": 0.0782230794429779, "learning_rate": 1.9999418918871685e-06, "loss": 0.7039, "step": 112 }, { "epoch": 0.09422555764019179, "grad_norm": 0.08311248570680618, "learning_rate": 1.9999318037877995e-06, "loss": 0.7238, "step": 113 }, { "epoch": 0.09505941213258287, "grad_norm": 0.08455490320920944, "learning_rate": 1.999920908678585e-06, "loss": 0.7184, "step": 114 }, { "epoch": 0.09589326662497394, "grad_norm": 0.08463476598262787, "learning_rate": 1.9999092065683174e-06, "loss": 0.6162, "step": 115 }, { "epoch": 0.09672712111736502, "grad_norm": 0.07902689278125763, "learning_rate": 1.999896697466442e-06, "loss": 0.6649, "step": 116 }, { "epoch": 0.0975609756097561, "grad_norm": 0.08383440971374512, "learning_rate": 1.999883381383053e-06, "loss": 0.703, "step": 117 }, { "epoch": 0.09839483010214718, "grad_norm": 0.07288578897714615, "learning_rate": 1.999869258328899e-06, "loss": 0.7286, "step": 118 }, { "epoch": 0.09922868459453825, "grad_norm": 0.08177065849304199, "learning_rate": 1.999854328315377e-06, "loss": 0.6899, "step": 119 }, { "epoch": 0.10006253908692933, "grad_norm": 0.07529062032699585, "learning_rate": 1.999838591354537e-06, "loss": 0.6781, "step": 120 }, { "epoch": 0.10089639357932041, "grad_norm": 0.08230678737163544, "learning_rate": 1.9998220474590795e-06, "loss": 0.7059, "step": 121 }, { "epoch": 0.1017302480717115, "grad_norm": 0.08000528067350388, "learning_rate": 1.9998046966423567e-06, "loss": 0.6561, "step": 122 }, { "epoch": 0.10256410256410256, "grad_norm": 0.07669328153133392, "learning_rate": 1.9997865389183717e-06, "loss": 0.6452, "step": 123 }, { "epoch": 0.10339795705649364, "grad_norm": 0.07872146368026733, "learning_rate": 1.9997675743017794e-06, "loss": 0.7337, "step": 124 }, { "epoch": 0.10423181154888472, "grad_norm": 0.07842138409614563, "learning_rate": 1.9997478028078853e-06, "loss": 0.7126, "step": 125 }, { "epoch": 0.1050656660412758, "grad_norm": 0.07890679687261581, "learning_rate": 1.9997272244526453e-06, "loss": 0.7049, "step": 126 }, { "epoch": 0.10589952053366687, "grad_norm": 0.07983887195587158, "learning_rate": 1.999705839252669e-06, "loss": 0.7578, "step": 127 }, { "epoch": 0.10673337502605795, "grad_norm": 0.07721222192049026, "learning_rate": 1.9996836472252144e-06, "loss": 0.678, "step": 128 }, { "epoch": 0.10756722951844903, "grad_norm": 0.07825150340795517, "learning_rate": 1.9996606483881927e-06, "loss": 0.6763, "step": 129 }, { "epoch": 0.10840108401084012, "grad_norm": 0.08063995838165283, "learning_rate": 1.999636842760165e-06, "loss": 0.758, "step": 130 }, { "epoch": 0.10923493850323118, "grad_norm": 0.0724928081035614, "learning_rate": 1.9996122303603443e-06, "loss": 0.7312, "step": 131 }, { "epoch": 0.11006879299562226, "grad_norm": 0.08180397748947144, "learning_rate": 1.999586811208594e-06, "loss": 0.6794, "step": 132 }, { "epoch": 0.11090264748801335, "grad_norm": 0.0856177881360054, "learning_rate": 1.99956058532543e-06, "loss": 0.6885, "step": 133 }, { "epoch": 0.11173650198040443, "grad_norm": 0.0763324424624443, "learning_rate": 1.999533552732017e-06, "loss": 0.6821, "step": 134 }, { "epoch": 0.1125703564727955, "grad_norm": 0.07964632660150528, "learning_rate": 1.9995057134501725e-06, "loss": 0.7362, "step": 135 }, { "epoch": 0.11340421096518657, "grad_norm": 0.07633720338344574, "learning_rate": 1.999477067502365e-06, "loss": 0.7396, "step": 136 }, { "epoch": 0.11423806545757766, "grad_norm": 0.07655656337738037, "learning_rate": 1.9994476149117132e-06, "loss": 0.728, "step": 137 }, { "epoch": 0.11507191994996874, "grad_norm": 0.07770884782075882, "learning_rate": 1.9994173557019877e-06, "loss": 0.6878, "step": 138 }, { "epoch": 0.1159057744423598, "grad_norm": 0.07332731038331985, "learning_rate": 1.999386289897609e-06, "loss": 0.7115, "step": 139 }, { "epoch": 0.11673962893475089, "grad_norm": 0.0784095972776413, "learning_rate": 1.9993544175236497e-06, "loss": 0.6653, "step": 140 }, { "epoch": 0.11757348342714197, "grad_norm": 0.08311771601438522, "learning_rate": 1.9993217386058326e-06, "loss": 0.6504, "step": 141 }, { "epoch": 0.11840733791953305, "grad_norm": 0.07529395818710327, "learning_rate": 1.999288253170532e-06, "loss": 0.7083, "step": 142 }, { "epoch": 0.11924119241192412, "grad_norm": 0.07608146965503693, "learning_rate": 1.999253961244773e-06, "loss": 0.6856, "step": 143 }, { "epoch": 0.1200750469043152, "grad_norm": 0.07125360518693924, "learning_rate": 1.9992188628562303e-06, "loss": 0.6095, "step": 144 }, { "epoch": 0.12090890139670628, "grad_norm": 0.07715660333633423, "learning_rate": 1.999182958033232e-06, "loss": 0.7337, "step": 145 }, { "epoch": 0.12174275588909736, "grad_norm": 0.07756403833627701, "learning_rate": 1.999146246804755e-06, "loss": 0.6929, "step": 146 }, { "epoch": 0.12257661038148843, "grad_norm": 0.07667449861764908, "learning_rate": 1.9991087292004273e-06, "loss": 0.67, "step": 147 }, { "epoch": 0.12341046487387951, "grad_norm": 0.08285968005657196, "learning_rate": 1.9990704052505284e-06, "loss": 0.7483, "step": 148 }, { "epoch": 0.12424431936627059, "grad_norm": 0.07476554065942764, "learning_rate": 1.9990312749859887e-06, "loss": 0.6846, "step": 149 }, { "epoch": 0.12507817385866166, "grad_norm": 0.0818118005990982, "learning_rate": 1.998991338438388e-06, "loss": 0.6709, "step": 150 }, { "epoch": 0.12591202835105275, "grad_norm": 0.08350253850221634, "learning_rate": 1.998950595639958e-06, "loss": 0.7379, "step": 151 }, { "epoch": 0.12674588284344382, "grad_norm": 0.07916589826345444, "learning_rate": 1.9989090466235806e-06, "loss": 0.7206, "step": 152 }, { "epoch": 0.1275797373358349, "grad_norm": 0.07860454171895981, "learning_rate": 1.998866691422789e-06, "loss": 0.6777, "step": 153 }, { "epoch": 0.12841359182822598, "grad_norm": 0.07861131429672241, "learning_rate": 1.998823530071766e-06, "loss": 0.6938, "step": 154 }, { "epoch": 0.12924744632061705, "grad_norm": 0.07819496840238571, "learning_rate": 1.9987795626053465e-06, "loss": 0.7193, "step": 155 }, { "epoch": 0.13008130081300814, "grad_norm": 0.07828415185213089, "learning_rate": 1.9987347890590144e-06, "loss": 0.6809, "step": 156 }, { "epoch": 0.1309151553053992, "grad_norm": 0.07755222916603088, "learning_rate": 1.998689209468905e-06, "loss": 0.7213, "step": 157 }, { "epoch": 0.13174900979779028, "grad_norm": 0.07672612369060516, "learning_rate": 1.998642823871804e-06, "loss": 0.7196, "step": 158 }, { "epoch": 0.13258286429018137, "grad_norm": 0.07963719964027405, "learning_rate": 1.9985956323051475e-06, "loss": 0.7465, "step": 159 }, { "epoch": 0.13341671878257244, "grad_norm": 0.0795946940779686, "learning_rate": 1.9985476348070223e-06, "loss": 0.717, "step": 160 }, { "epoch": 0.1342505732749635, "grad_norm": 0.07344063371419907, "learning_rate": 1.9984988314161656e-06, "loss": 0.6861, "step": 161 }, { "epoch": 0.1350844277673546, "grad_norm": 0.07722171396017075, "learning_rate": 1.9984492221719645e-06, "loss": 0.6825, "step": 162 }, { "epoch": 0.13591828225974567, "grad_norm": 0.0812341496348381, "learning_rate": 1.9983988071144572e-06, "loss": 0.7245, "step": 163 }, { "epoch": 0.13675213675213677, "grad_norm": 0.08472032845020294, "learning_rate": 1.998347586284332e-06, "loss": 0.7315, "step": 164 }, { "epoch": 0.13758599124452783, "grad_norm": 0.07785464823246002, "learning_rate": 1.9982955597229275e-06, "loss": 0.6906, "step": 165 }, { "epoch": 0.1384198457369189, "grad_norm": 0.07382892072200775, "learning_rate": 1.998242727472232e-06, "loss": 0.6488, "step": 166 }, { "epoch": 0.13925370022931, "grad_norm": 0.07445723563432693, "learning_rate": 1.9981890895748845e-06, "loss": 0.692, "step": 167 }, { "epoch": 0.14008755472170106, "grad_norm": 0.07889281213283539, "learning_rate": 1.998134646074175e-06, "loss": 0.6438, "step": 168 }, { "epoch": 0.14092140921409213, "grad_norm": 0.07664698362350464, "learning_rate": 1.9980793970140426e-06, "loss": 0.6993, "step": 169 }, { "epoch": 0.14175526370648323, "grad_norm": 0.07807335257530212, "learning_rate": 1.9980233424390773e-06, "loss": 0.7334, "step": 170 }, { "epoch": 0.1425891181988743, "grad_norm": 0.0683249682188034, "learning_rate": 1.9979664823945174e-06, "loss": 0.6325, "step": 171 }, { "epoch": 0.1434229726912654, "grad_norm": 0.08339341729879379, "learning_rate": 1.997908816926254e-06, "loss": 0.697, "step": 172 }, { "epoch": 0.14425682718365646, "grad_norm": 0.07781082391738892, "learning_rate": 1.997850346080827e-06, "loss": 0.6748, "step": 173 }, { "epoch": 0.14509068167604752, "grad_norm": 0.07718750089406967, "learning_rate": 1.9977910699054247e-06, "loss": 0.7002, "step": 174 }, { "epoch": 0.14592453616843862, "grad_norm": 0.07779830694198608, "learning_rate": 1.997730988447888e-06, "loss": 0.723, "step": 175 }, { "epoch": 0.14675839066082969, "grad_norm": 0.07958701252937317, "learning_rate": 1.997670101756706e-06, "loss": 0.6651, "step": 176 }, { "epoch": 0.14759224515322075, "grad_norm": 0.08142640441656113, "learning_rate": 1.997608409881019e-06, "loss": 0.6652, "step": 177 }, { "epoch": 0.14842609964561185, "grad_norm": 0.07898704707622528, "learning_rate": 1.9975459128706152e-06, "loss": 0.6842, "step": 178 }, { "epoch": 0.14925995413800291, "grad_norm": 0.07870691269636154, "learning_rate": 1.997482610775935e-06, "loss": 0.7167, "step": 179 }, { "epoch": 0.150093808630394, "grad_norm": 0.07766727358102798, "learning_rate": 1.9974185036480658e-06, "loss": 0.6145, "step": 180 }, { "epoch": 0.15092766312278508, "grad_norm": 0.07913653552532196, "learning_rate": 1.997353591538748e-06, "loss": 0.7598, "step": 181 }, { "epoch": 0.15176151761517614, "grad_norm": 0.07850881665945053, "learning_rate": 1.9972878745003684e-06, "loss": 0.7492, "step": 182 }, { "epoch": 0.15259537210756724, "grad_norm": 0.07599500566720963, "learning_rate": 1.9972213525859656e-06, "loss": 0.6802, "step": 183 }, { "epoch": 0.1534292265999583, "grad_norm": 0.08165914565324783, "learning_rate": 1.997154025849227e-06, "loss": 0.6343, "step": 184 }, { "epoch": 0.15426308109234937, "grad_norm": 0.0763566642999649, "learning_rate": 1.9970858943444893e-06, "loss": 0.7214, "step": 185 }, { "epoch": 0.15509693558474047, "grad_norm": 0.07993835210800171, "learning_rate": 1.99701695812674e-06, "loss": 0.6975, "step": 186 }, { "epoch": 0.15593079007713154, "grad_norm": 0.07139826565980911, "learning_rate": 1.996947217251614e-06, "loss": 0.6714, "step": 187 }, { "epoch": 0.15676464456952263, "grad_norm": 0.0777943804860115, "learning_rate": 1.996876671775397e-06, "loss": 0.6684, "step": 188 }, { "epoch": 0.1575984990619137, "grad_norm": 0.07822203636169434, "learning_rate": 1.9968053217550242e-06, "loss": 0.7161, "step": 189 }, { "epoch": 0.15843235355430477, "grad_norm": 0.07645302265882492, "learning_rate": 1.9967331672480794e-06, "loss": 0.647, "step": 190 }, { "epoch": 0.15926620804669586, "grad_norm": 0.07744666188955307, "learning_rate": 1.996660208312796e-06, "loss": 0.6887, "step": 191 }, { "epoch": 0.16010006253908693, "grad_norm": 0.07616739720106125, "learning_rate": 1.9965864450080565e-06, "loss": 0.7209, "step": 192 }, { "epoch": 0.160933917031478, "grad_norm": 0.0808212161064148, "learning_rate": 1.996511877393393e-06, "loss": 0.7229, "step": 193 }, { "epoch": 0.1617677715238691, "grad_norm": 0.07879424095153809, "learning_rate": 1.996436505528986e-06, "loss": 0.6753, "step": 194 }, { "epoch": 0.16260162601626016, "grad_norm": 0.07811419665813446, "learning_rate": 1.9963603294756654e-06, "loss": 0.6427, "step": 195 }, { "epoch": 0.16343548050865125, "grad_norm": 0.08347994834184647, "learning_rate": 1.996283349294911e-06, "loss": 0.6575, "step": 196 }, { "epoch": 0.16426933500104232, "grad_norm": 0.08415097743272781, "learning_rate": 1.99620556504885e-06, "loss": 0.6859, "step": 197 }, { "epoch": 0.1651031894934334, "grad_norm": 0.08112502098083496, "learning_rate": 1.9961269768002596e-06, "loss": 0.6601, "step": 198 }, { "epoch": 0.16593704398582448, "grad_norm": 0.07908611744642258, "learning_rate": 1.9960475846125656e-06, "loss": 0.6697, "step": 199 }, { "epoch": 0.16677089847821555, "grad_norm": 0.07735829800367355, "learning_rate": 1.995967388549843e-06, "loss": 0.6631, "step": 200 }, { "epoch": 0.16760475297060662, "grad_norm": 0.07564505934715271, "learning_rate": 1.9958863886768145e-06, "loss": 0.7004, "step": 201 }, { "epoch": 0.1684386074629977, "grad_norm": 0.08117896318435669, "learning_rate": 1.9958045850588527e-06, "loss": 0.62, "step": 202 }, { "epoch": 0.16927246195538878, "grad_norm": 0.0761483758687973, "learning_rate": 1.9957219777619784e-06, "loss": 0.7145, "step": 203 }, { "epoch": 0.17010631644777988, "grad_norm": 0.08249073475599289, "learning_rate": 1.995638566852861e-06, "loss": 0.703, "step": 204 }, { "epoch": 0.17094017094017094, "grad_norm": 0.08197219669818878, "learning_rate": 1.9955543523988187e-06, "loss": 0.695, "step": 205 }, { "epoch": 0.171774025432562, "grad_norm": 0.07933253049850464, "learning_rate": 1.995469334467818e-06, "loss": 0.6575, "step": 206 }, { "epoch": 0.1726078799249531, "grad_norm": 0.07888033986091614, "learning_rate": 1.9953835131284735e-06, "loss": 0.6504, "step": 207 }, { "epoch": 0.17344173441734417, "grad_norm": 0.080783411860466, "learning_rate": 1.995296888450049e-06, "loss": 0.6586, "step": 208 }, { "epoch": 0.17427558890973524, "grad_norm": 0.07863239198923111, "learning_rate": 1.995209460502456e-06, "loss": 0.7204, "step": 209 }, { "epoch": 0.17510944340212634, "grad_norm": 0.07864314317703247, "learning_rate": 1.9951212293562544e-06, "loss": 0.6867, "step": 210 }, { "epoch": 0.1759432978945174, "grad_norm": 0.07574496418237686, "learning_rate": 1.995032195082653e-06, "loss": 0.6806, "step": 211 }, { "epoch": 0.1767771523869085, "grad_norm": 0.08079942315816879, "learning_rate": 1.9949423577535078e-06, "loss": 0.6929, "step": 212 }, { "epoch": 0.17761100687929957, "grad_norm": 0.08361431211233139, "learning_rate": 1.9948517174413235e-06, "loss": 0.6948, "step": 213 }, { "epoch": 0.17844486137169063, "grad_norm": 0.07974692434072495, "learning_rate": 1.994760274219253e-06, "loss": 0.7269, "step": 214 }, { "epoch": 0.17927871586408173, "grad_norm": 0.08275672793388367, "learning_rate": 1.9946680281610957e-06, "loss": 0.6268, "step": 215 }, { "epoch": 0.1801125703564728, "grad_norm": 0.08361789584159851, "learning_rate": 1.9945749793413017e-06, "loss": 0.6145, "step": 216 }, { "epoch": 0.18094642484886386, "grad_norm": 0.0829552635550499, "learning_rate": 1.9944811278349665e-06, "loss": 0.7203, "step": 217 }, { "epoch": 0.18178027934125496, "grad_norm": 0.07961665093898773, "learning_rate": 1.994386473717835e-06, "loss": 0.6598, "step": 218 }, { "epoch": 0.18261413383364603, "grad_norm": 0.08015939593315125, "learning_rate": 1.9942910170662986e-06, "loss": 0.6465, "step": 219 }, { "epoch": 0.18344798832603712, "grad_norm": 0.08055105805397034, "learning_rate": 1.994194757957397e-06, "loss": 0.6543, "step": 220 }, { "epoch": 0.1842818428184282, "grad_norm": 0.07763723284006119, "learning_rate": 1.994097696468818e-06, "loss": 0.6984, "step": 221 }, { "epoch": 0.18511569731081925, "grad_norm": 0.07882793247699738, "learning_rate": 1.9939998326788966e-06, "loss": 0.6672, "step": 222 }, { "epoch": 0.18594955180321035, "grad_norm": 0.08098764717578888, "learning_rate": 1.993901166666615e-06, "loss": 0.65, "step": 223 }, { "epoch": 0.18678340629560142, "grad_norm": 0.07791118323802948, "learning_rate": 1.9938016985116027e-06, "loss": 0.6954, "step": 224 }, { "epoch": 0.18761726078799248, "grad_norm": 0.07814016193151474, "learning_rate": 1.993701428294137e-06, "loss": 0.6788, "step": 225 }, { "epoch": 0.18845111528038358, "grad_norm": 0.07884248346090317, "learning_rate": 1.9936003560951433e-06, "loss": 0.6577, "step": 226 }, { "epoch": 0.18928496977277465, "grad_norm": 0.07807621359825134, "learning_rate": 1.9934984819961924e-06, "loss": 0.6424, "step": 227 }, { "epoch": 0.19011882426516574, "grad_norm": 0.08191139996051788, "learning_rate": 1.9933958060795043e-06, "loss": 0.6809, "step": 228 }, { "epoch": 0.1909526787575568, "grad_norm": 0.07597807049751282, "learning_rate": 1.9932923284279445e-06, "loss": 0.7109, "step": 229 }, { "epoch": 0.19178653324994788, "grad_norm": 0.08211328089237213, "learning_rate": 1.9931880491250263e-06, "loss": 0.6916, "step": 230 }, { "epoch": 0.19262038774233897, "grad_norm": 0.07775567471981049, "learning_rate": 1.993082968254909e-06, "loss": 0.6458, "step": 231 }, { "epoch": 0.19345424223473004, "grad_norm": 0.08438362181186676, "learning_rate": 1.992977085902402e-06, "loss": 0.6946, "step": 232 }, { "epoch": 0.1942880967271211, "grad_norm": 0.08631843328475952, "learning_rate": 1.9928704021529564e-06, "loss": 0.6643, "step": 233 }, { "epoch": 0.1951219512195122, "grad_norm": 0.07766929268836975, "learning_rate": 1.992762917092675e-06, "loss": 0.6753, "step": 234 }, { "epoch": 0.19595580571190327, "grad_norm": 0.07574877887964249, "learning_rate": 1.9926546308083044e-06, "loss": 0.6346, "step": 235 }, { "epoch": 0.19678966020429436, "grad_norm": 0.07815206050872803, "learning_rate": 1.9925455433872386e-06, "loss": 0.6946, "step": 236 }, { "epoch": 0.19762351469668543, "grad_norm": 0.07457253336906433, "learning_rate": 1.9924356549175185e-06, "loss": 0.7108, "step": 237 }, { "epoch": 0.1984573691890765, "grad_norm": 0.07688765227794647, "learning_rate": 1.992324965487831e-06, "loss": 0.6764, "step": 238 }, { "epoch": 0.1992912236814676, "grad_norm": 0.08050046861171722, "learning_rate": 1.99221347518751e-06, "loss": 0.6263, "step": 239 }, { "epoch": 0.20012507817385866, "grad_norm": 0.07763144373893738, "learning_rate": 1.9921011841065347e-06, "loss": 0.6618, "step": 240 }, { "epoch": 0.20095893266624973, "grad_norm": 0.07744456082582474, "learning_rate": 1.991988092335532e-06, "loss": 0.6922, "step": 241 }, { "epoch": 0.20179278715864082, "grad_norm": 0.07596256583929062, "learning_rate": 1.9918741999657743e-06, "loss": 0.7267, "step": 242 }, { "epoch": 0.2026266416510319, "grad_norm": 0.08003415167331696, "learning_rate": 1.9917595070891793e-06, "loss": 0.6796, "step": 243 }, { "epoch": 0.203460496143423, "grad_norm": 0.08418845385313034, "learning_rate": 1.9916440137983125e-06, "loss": 0.6815, "step": 244 }, { "epoch": 0.20429435063581405, "grad_norm": 0.0794699415564537, "learning_rate": 1.991527720186384e-06, "loss": 0.6962, "step": 245 }, { "epoch": 0.20512820512820512, "grad_norm": 0.07894127070903778, "learning_rate": 1.991410626347251e-06, "loss": 0.6898, "step": 246 }, { "epoch": 0.20596205962059622, "grad_norm": 0.0819648951292038, "learning_rate": 1.9912927323754145e-06, "loss": 0.726, "step": 247 }, { "epoch": 0.20679591411298728, "grad_norm": 0.08042652159929276, "learning_rate": 1.991174038366024e-06, "loss": 0.6398, "step": 248 }, { "epoch": 0.20762976860537835, "grad_norm": 0.07848809659481049, "learning_rate": 1.9910545444148722e-06, "loss": 0.6562, "step": 249 }, { "epoch": 0.20846362309776945, "grad_norm": 0.08295590430498123, "learning_rate": 1.9909342506183987e-06, "loss": 0.6767, "step": 250 }, { "epoch": 0.2092974775901605, "grad_norm": 0.0840587317943573, "learning_rate": 1.9908131570736887e-06, "loss": 0.7274, "step": 251 }, { "epoch": 0.2101313320825516, "grad_norm": 0.08375398814678192, "learning_rate": 1.9906912638784727e-06, "loss": 0.6716, "step": 252 }, { "epoch": 0.21096518657494268, "grad_norm": 0.08229872584342957, "learning_rate": 1.990568571131126e-06, "loss": 0.7385, "step": 253 }, { "epoch": 0.21179904106733374, "grad_norm": 0.07736154645681381, "learning_rate": 1.99044507893067e-06, "loss": 0.71, "step": 254 }, { "epoch": 0.21263289555972484, "grad_norm": 0.08812221139669418, "learning_rate": 1.9903207873767704e-06, "loss": 0.7045, "step": 255 }, { "epoch": 0.2134667500521159, "grad_norm": 0.08660853654146194, "learning_rate": 1.9901956965697386e-06, "loss": 0.7218, "step": 256 }, { "epoch": 0.21430060454450697, "grad_norm": 0.08562801033258438, "learning_rate": 1.9900698066105318e-06, "loss": 0.6376, "step": 257 }, { "epoch": 0.21513445903689807, "grad_norm": 0.07764414697885513, "learning_rate": 1.9899431176007503e-06, "loss": 0.6629, "step": 258 }, { "epoch": 0.21596831352928914, "grad_norm": 0.07547292113304138, "learning_rate": 1.989815629642641e-06, "loss": 0.6887, "step": 259 }, { "epoch": 0.21680216802168023, "grad_norm": 0.08463111519813538, "learning_rate": 1.9896873428390948e-06, "loss": 0.6169, "step": 260 }, { "epoch": 0.2176360225140713, "grad_norm": 0.08064530789852142, "learning_rate": 1.9895582572936473e-06, "loss": 0.6905, "step": 261 }, { "epoch": 0.21846987700646237, "grad_norm": 0.07929642498493195, "learning_rate": 1.9894283731104796e-06, "loss": 0.6415, "step": 262 }, { "epoch": 0.21930373149885346, "grad_norm": 0.07858303934335709, "learning_rate": 1.9892976903944154e-06, "loss": 0.6955, "step": 263 }, { "epoch": 0.22013758599124453, "grad_norm": 0.07967871427536011, "learning_rate": 1.9891662092509255e-06, "loss": 0.7004, "step": 264 }, { "epoch": 0.2209714404836356, "grad_norm": 0.08009492605924606, "learning_rate": 1.989033929786123e-06, "loss": 0.6206, "step": 265 }, { "epoch": 0.2218052949760267, "grad_norm": 0.08245829492807388, "learning_rate": 1.988900852106766e-06, "loss": 0.7182, "step": 266 }, { "epoch": 0.22263914946841776, "grad_norm": 0.07844138890504837, "learning_rate": 1.9887669763202568e-06, "loss": 0.6372, "step": 267 }, { "epoch": 0.22347300396080885, "grad_norm": 0.0783441960811615, "learning_rate": 1.9886323025346417e-06, "loss": 0.6182, "step": 268 }, { "epoch": 0.22430685845319992, "grad_norm": 0.0803346335887909, "learning_rate": 1.9884968308586117e-06, "loss": 0.6436, "step": 269 }, { "epoch": 0.225140712945591, "grad_norm": 0.0813342034816742, "learning_rate": 1.9883605614015015e-06, "loss": 0.6472, "step": 270 }, { "epoch": 0.22597456743798208, "grad_norm": 0.07891476154327393, "learning_rate": 1.988223494273288e-06, "loss": 0.6625, "step": 271 }, { "epoch": 0.22680842193037315, "grad_norm": 0.08087664842605591, "learning_rate": 1.9880856295845945e-06, "loss": 0.7226, "step": 272 }, { "epoch": 0.22764227642276422, "grad_norm": 0.08254389464855194, "learning_rate": 1.9879469674466865e-06, "loss": 0.6992, "step": 273 }, { "epoch": 0.2284761309151553, "grad_norm": 0.08207474648952484, "learning_rate": 1.987807507971473e-06, "loss": 0.6827, "step": 274 }, { "epoch": 0.22930998540754638, "grad_norm": 0.08394856005907059, "learning_rate": 1.9876672512715075e-06, "loss": 0.7445, "step": 275 }, { "epoch": 0.23014383989993747, "grad_norm": 0.07934897392988205, "learning_rate": 1.9875261974599856e-06, "loss": 0.6988, "step": 276 }, { "epoch": 0.23097769439232854, "grad_norm": 0.08756977319717407, "learning_rate": 1.987384346650747e-06, "loss": 0.6588, "step": 277 }, { "epoch": 0.2318115488847196, "grad_norm": 0.08198294788599014, "learning_rate": 1.9872416989582747e-06, "loss": 0.6729, "step": 278 }, { "epoch": 0.2326454033771107, "grad_norm": 0.07671192288398743, "learning_rate": 1.9870982544976948e-06, "loss": 0.6685, "step": 279 }, { "epoch": 0.23347925786950177, "grad_norm": 0.07943634688854218, "learning_rate": 1.986954013384776e-06, "loss": 0.7352, "step": 280 }, { "epoch": 0.23431311236189284, "grad_norm": 0.0829697921872139, "learning_rate": 1.98680897573593e-06, "loss": 0.6645, "step": 281 }, { "epoch": 0.23514696685428393, "grad_norm": 0.08583538234233856, "learning_rate": 1.986663141668212e-06, "loss": 0.713, "step": 282 }, { "epoch": 0.235980821346675, "grad_norm": 0.08206543326377869, "learning_rate": 1.9865165112993192e-06, "loss": 0.7211, "step": 283 }, { "epoch": 0.2368146758390661, "grad_norm": 0.07899665087461472, "learning_rate": 1.9863690847475924e-06, "loss": 0.6757, "step": 284 }, { "epoch": 0.23764853033145716, "grad_norm": 0.0813852995634079, "learning_rate": 1.986220862132014e-06, "loss": 0.692, "step": 285 }, { "epoch": 0.23848238482384823, "grad_norm": 0.09079395979642868, "learning_rate": 1.9860718435722093e-06, "loss": 0.6756, "step": 286 }, { "epoch": 0.23931623931623933, "grad_norm": 0.07518535107374191, "learning_rate": 1.985922029188446e-06, "loss": 0.626, "step": 287 }, { "epoch": 0.2401500938086304, "grad_norm": 0.08228084444999695, "learning_rate": 1.9857714191016337e-06, "loss": 0.7394, "step": 288 }, { "epoch": 0.24098394830102146, "grad_norm": 0.08036590367555618, "learning_rate": 1.985620013433325e-06, "loss": 0.6012, "step": 289 }, { "epoch": 0.24181780279341256, "grad_norm": 0.08429574221372604, "learning_rate": 1.985467812305714e-06, "loss": 0.6881, "step": 290 }, { "epoch": 0.24265165728580362, "grad_norm": 0.07866602391004562, "learning_rate": 1.9853148158416364e-06, "loss": 0.6708, "step": 291 }, { "epoch": 0.24348551177819472, "grad_norm": 0.0796622484922409, "learning_rate": 1.9851610241645714e-06, "loss": 0.7129, "step": 292 }, { "epoch": 0.24431936627058579, "grad_norm": 0.08136799931526184, "learning_rate": 1.9850064373986374e-06, "loss": 0.6806, "step": 293 }, { "epoch": 0.24515322076297685, "grad_norm": 0.07808342576026917, "learning_rate": 1.9848510556685973e-06, "loss": 0.6217, "step": 294 }, { "epoch": 0.24598707525536795, "grad_norm": 0.07293733209371567, "learning_rate": 1.984694879099853e-06, "loss": 0.662, "step": 295 }, { "epoch": 0.24682092974775902, "grad_norm": 0.07730741053819656, "learning_rate": 1.98453790781845e-06, "loss": 0.6386, "step": 296 }, { "epoch": 0.24765478424015008, "grad_norm": 0.08601437509059906, "learning_rate": 1.9843801419510743e-06, "loss": 0.6639, "step": 297 }, { "epoch": 0.24848863873254118, "grad_norm": 0.0815718024969101, "learning_rate": 1.9842215816250525e-06, "loss": 0.6891, "step": 298 }, { "epoch": 0.24932249322493225, "grad_norm": 0.08793067187070847, "learning_rate": 1.9840622269683537e-06, "loss": 0.684, "step": 299 }, { "epoch": 0.2501563477173233, "grad_norm": 0.07922165095806122, "learning_rate": 1.9839020781095873e-06, "loss": 0.6913, "step": 300 }, { "epoch": 0.2509902022097144, "grad_norm": 0.0789838656783104, "learning_rate": 1.9837411351780035e-06, "loss": 0.6556, "step": 301 }, { "epoch": 0.2518240567021055, "grad_norm": 0.07958388328552246, "learning_rate": 1.9835793983034944e-06, "loss": 0.6301, "step": 302 }, { "epoch": 0.25265791119449654, "grad_norm": 0.08036024868488312, "learning_rate": 1.9834168676165915e-06, "loss": 0.592, "step": 303 }, { "epoch": 0.25349176568688764, "grad_norm": 0.08134333044290543, "learning_rate": 1.9832535432484677e-06, "loss": 0.7142, "step": 304 }, { "epoch": 0.25432562017927873, "grad_norm": 0.08333199471235275, "learning_rate": 1.983089425330937e-06, "loss": 0.7021, "step": 305 }, { "epoch": 0.2551594746716698, "grad_norm": 0.08486991375684738, "learning_rate": 1.982924513996452e-06, "loss": 0.6724, "step": 306 }, { "epoch": 0.25599332916406087, "grad_norm": 0.08286510407924652, "learning_rate": 1.982758809378108e-06, "loss": 0.6856, "step": 307 }, { "epoch": 0.25682718365645196, "grad_norm": 0.08175141364336014, "learning_rate": 1.982592311609639e-06, "loss": 0.6605, "step": 308 }, { "epoch": 0.257661038148843, "grad_norm": 0.08094872534275055, "learning_rate": 1.982425020825419e-06, "loss": 0.7136, "step": 309 }, { "epoch": 0.2584948926412341, "grad_norm": 0.07797659933567047, "learning_rate": 1.9822569371604632e-06, "loss": 0.6975, "step": 310 }, { "epoch": 0.2593287471336252, "grad_norm": 0.08523422479629517, "learning_rate": 1.982088060750426e-06, "loss": 0.6487, "step": 311 }, { "epoch": 0.2601626016260163, "grad_norm": 0.09064790606498718, "learning_rate": 1.981918391731601e-06, "loss": 0.7006, "step": 312 }, { "epoch": 0.2609964561184073, "grad_norm": 0.08136545121669769, "learning_rate": 1.9817479302409225e-06, "loss": 0.67, "step": 313 }, { "epoch": 0.2618303106107984, "grad_norm": 0.07847245037555695, "learning_rate": 1.9815766764159642e-06, "loss": 0.6692, "step": 314 }, { "epoch": 0.2626641651031895, "grad_norm": 0.07690361142158508, "learning_rate": 1.9814046303949387e-06, "loss": 0.5999, "step": 315 }, { "epoch": 0.26349801959558056, "grad_norm": 0.08166715502738953, "learning_rate": 1.9812317923166987e-06, "loss": 0.6666, "step": 316 }, { "epoch": 0.26433187408797165, "grad_norm": 0.08074827492237091, "learning_rate": 1.981058162320735e-06, "loss": 0.657, "step": 317 }, { "epoch": 0.26516572858036275, "grad_norm": 0.08311966806650162, "learning_rate": 1.980883740547179e-06, "loss": 0.6679, "step": 318 }, { "epoch": 0.2659995830727538, "grad_norm": 0.08272409439086914, "learning_rate": 1.9807085271368e-06, "loss": 0.6467, "step": 319 }, { "epoch": 0.2668334375651449, "grad_norm": 0.08190209418535233, "learning_rate": 1.980532522231007e-06, "loss": 0.623, "step": 320 }, { "epoch": 0.267667292057536, "grad_norm": 0.0819978415966034, "learning_rate": 1.980355725971847e-06, "loss": 0.677, "step": 321 }, { "epoch": 0.268501146549927, "grad_norm": 0.07838036119937897, "learning_rate": 1.980178138502006e-06, "loss": 0.6545, "step": 322 }, { "epoch": 0.2693350010423181, "grad_norm": 0.08094768226146698, "learning_rate": 1.979999759964809e-06, "loss": 0.6429, "step": 323 }, { "epoch": 0.2701688555347092, "grad_norm": 0.0796407088637352, "learning_rate": 1.9798205905042184e-06, "loss": 0.7114, "step": 324 }, { "epoch": 0.27100271002710025, "grad_norm": 0.07667295634746552, "learning_rate": 1.9796406302648366e-06, "loss": 0.7059, "step": 325 }, { "epoch": 0.27183656451949134, "grad_norm": 0.07456690818071365, "learning_rate": 1.9794598793919023e-06, "loss": 0.5275, "step": 326 }, { "epoch": 0.27267041901188244, "grad_norm": 0.08084844797849655, "learning_rate": 1.9792783380312935e-06, "loss": 0.6449, "step": 327 }, { "epoch": 0.27350427350427353, "grad_norm": 0.08692745119333267, "learning_rate": 1.9790960063295254e-06, "loss": 0.6831, "step": 328 }, { "epoch": 0.27433812799666457, "grad_norm": 0.0852961540222168, "learning_rate": 1.9789128844337524e-06, "loss": 0.6518, "step": 329 }, { "epoch": 0.27517198248905567, "grad_norm": 0.08140264451503754, "learning_rate": 1.978728972491765e-06, "loss": 0.606, "step": 330 }, { "epoch": 0.27600583698144676, "grad_norm": 0.08339618146419525, "learning_rate": 1.9785442706519927e-06, "loss": 0.6792, "step": 331 }, { "epoch": 0.2768396914738378, "grad_norm": 0.08711510896682739, "learning_rate": 1.9783587790635017e-06, "loss": 0.6903, "step": 332 }, { "epoch": 0.2776735459662289, "grad_norm": 0.08612138777971268, "learning_rate": 1.978172497875995e-06, "loss": 0.6998, "step": 333 }, { "epoch": 0.27850740045862, "grad_norm": 0.07922092080116272, "learning_rate": 1.9779854272398146e-06, "loss": 0.654, "step": 334 }, { "epoch": 0.27934125495101103, "grad_norm": 0.08098642528057098, "learning_rate": 1.9777975673059382e-06, "loss": 0.6706, "step": 335 }, { "epoch": 0.2801751094434021, "grad_norm": 0.0873970165848732, "learning_rate": 1.977608918225981e-06, "loss": 0.6856, "step": 336 }, { "epoch": 0.2810089639357932, "grad_norm": 0.082435242831707, "learning_rate": 1.9774194801521947e-06, "loss": 0.6375, "step": 337 }, { "epoch": 0.28184281842818426, "grad_norm": 0.08199653774499893, "learning_rate": 1.9772292532374687e-06, "loss": 0.6785, "step": 338 }, { "epoch": 0.28267667292057536, "grad_norm": 0.08817990124225616, "learning_rate": 1.9770382376353285e-06, "loss": 0.6642, "step": 339 }, { "epoch": 0.28351052741296645, "grad_norm": 0.08359472453594208, "learning_rate": 1.976846433499935e-06, "loss": 0.6555, "step": 340 }, { "epoch": 0.2843443819053575, "grad_norm": 0.08499454706907272, "learning_rate": 1.976653840986088e-06, "loss": 0.687, "step": 341 }, { "epoch": 0.2851782363977486, "grad_norm": 0.08286130428314209, "learning_rate": 1.976460460249222e-06, "loss": 0.6904, "step": 342 }, { "epoch": 0.2860120908901397, "grad_norm": 0.08244482427835464, "learning_rate": 1.976266291445406e-06, "loss": 0.6546, "step": 343 }, { "epoch": 0.2868459453825308, "grad_norm": 0.07950432598590851, "learning_rate": 1.976071334731349e-06, "loss": 0.6683, "step": 344 }, { "epoch": 0.2876797998749218, "grad_norm": 0.0854944959282875, "learning_rate": 1.975875590264393e-06, "loss": 0.6947, "step": 345 }, { "epoch": 0.2885136543673129, "grad_norm": 0.08143036812543869, "learning_rate": 1.975679058202516e-06, "loss": 0.6422, "step": 346 }, { "epoch": 0.289347508859704, "grad_norm": 0.0797310546040535, "learning_rate": 1.9754817387043325e-06, "loss": 0.6939, "step": 347 }, { "epoch": 0.29018136335209505, "grad_norm": 0.08961952477693558, "learning_rate": 1.9752836319290926e-06, "loss": 0.6848, "step": 348 }, { "epoch": 0.29101521784448614, "grad_norm": 0.08825525641441345, "learning_rate": 1.9750847380366806e-06, "loss": 0.7088, "step": 349 }, { "epoch": 0.29184907233687724, "grad_norm": 0.08601095527410507, "learning_rate": 1.9748850571876168e-06, "loss": 0.6283, "step": 350 }, { "epoch": 0.2926829268292683, "grad_norm": 0.08371152728796005, "learning_rate": 1.9746845895430568e-06, "loss": 0.6755, "step": 351 }, { "epoch": 0.29351678132165937, "grad_norm": 0.08786465227603912, "learning_rate": 1.974483335264791e-06, "loss": 0.6149, "step": 352 }, { "epoch": 0.29435063581405047, "grad_norm": 0.083786241710186, "learning_rate": 1.974281294515245e-06, "loss": 0.7104, "step": 353 }, { "epoch": 0.2951844903064415, "grad_norm": 0.0846792683005333, "learning_rate": 1.9740784674574785e-06, "loss": 0.7032, "step": 354 }, { "epoch": 0.2960183447988326, "grad_norm": 0.08477747440338135, "learning_rate": 1.973874854255186e-06, "loss": 0.6244, "step": 355 }, { "epoch": 0.2968521992912237, "grad_norm": 0.09119998663663864, "learning_rate": 1.9736704550726965e-06, "loss": 0.6373, "step": 356 }, { "epoch": 0.29768605378361473, "grad_norm": 0.08474856615066528, "learning_rate": 1.9734652700749733e-06, "loss": 0.736, "step": 357 }, { "epoch": 0.29851990827600583, "grad_norm": 0.08070901036262512, "learning_rate": 1.9732592994276145e-06, "loss": 0.6442, "step": 358 }, { "epoch": 0.2993537627683969, "grad_norm": 0.08306030929088593, "learning_rate": 1.9730525432968517e-06, "loss": 0.6863, "step": 359 }, { "epoch": 0.300187617260788, "grad_norm": 0.08554356545209885, "learning_rate": 1.97284500184955e-06, "loss": 0.6782, "step": 360 }, { "epoch": 0.30102147175317906, "grad_norm": 0.08350827544927597, "learning_rate": 1.9726366752532096e-06, "loss": 0.6656, "step": 361 }, { "epoch": 0.30185532624557015, "grad_norm": 0.08501608669757843, "learning_rate": 1.972427563675963e-06, "loss": 0.6651, "step": 362 }, { "epoch": 0.30268918073796125, "grad_norm": 0.08401606976985931, "learning_rate": 1.972217667286577e-06, "loss": 0.7153, "step": 363 }, { "epoch": 0.3035230352303523, "grad_norm": 0.08713418990373611, "learning_rate": 1.9720069862544513e-06, "loss": 0.6623, "step": 364 }, { "epoch": 0.3043568897227434, "grad_norm": 0.0871342197060585, "learning_rate": 1.9717955207496197e-06, "loss": 0.6571, "step": 365 }, { "epoch": 0.3051907442151345, "grad_norm": 0.08501847088336945, "learning_rate": 1.9715832709427477e-06, "loss": 0.6714, "step": 366 }, { "epoch": 0.3060245987075255, "grad_norm": 0.07894369214773178, "learning_rate": 1.971370237005136e-06, "loss": 0.6737, "step": 367 }, { "epoch": 0.3068584531999166, "grad_norm": 0.08392394334077835, "learning_rate": 1.9711564191087157e-06, "loss": 0.6807, "step": 368 }, { "epoch": 0.3076923076923077, "grad_norm": 0.08396563678979874, "learning_rate": 1.970941817426052e-06, "loss": 0.6766, "step": 369 }, { "epoch": 0.30852616218469875, "grad_norm": 0.07790801674127579, "learning_rate": 1.9707264321303425e-06, "loss": 0.6293, "step": 370 }, { "epoch": 0.30936001667708984, "grad_norm": 0.08699779212474823, "learning_rate": 1.970510263395417e-06, "loss": 0.681, "step": 371 }, { "epoch": 0.31019387116948094, "grad_norm": 0.08176835626363754, "learning_rate": 1.9702933113957384e-06, "loss": 0.6931, "step": 372 }, { "epoch": 0.311027725661872, "grad_norm": 0.08946527540683746, "learning_rate": 1.9700755763063997e-06, "loss": 0.6062, "step": 373 }, { "epoch": 0.3118615801542631, "grad_norm": 0.08260657638311386, "learning_rate": 1.9698570583031284e-06, "loss": 0.6466, "step": 374 }, { "epoch": 0.31269543464665417, "grad_norm": 0.08951833099126816, "learning_rate": 1.969637757562282e-06, "loss": 0.6458, "step": 375 }, { "epoch": 0.31352928913904526, "grad_norm": 0.08605215698480606, "learning_rate": 1.9694176742608507e-06, "loss": 0.6919, "step": 376 }, { "epoch": 0.3143631436314363, "grad_norm": 0.08389750868082047, "learning_rate": 1.969196808576456e-06, "loss": 0.6941, "step": 377 }, { "epoch": 0.3151969981238274, "grad_norm": 0.08429732173681259, "learning_rate": 1.968975160687351e-06, "loss": 0.622, "step": 378 }, { "epoch": 0.3160308526162185, "grad_norm": 0.08689334988594055, "learning_rate": 1.9687527307724195e-06, "loss": 0.6235, "step": 379 }, { "epoch": 0.31686470710860953, "grad_norm": 0.08494516462087631, "learning_rate": 1.9685295190111774e-06, "loss": 0.6491, "step": 380 }, { "epoch": 0.31769856160100063, "grad_norm": 0.0890749841928482, "learning_rate": 1.9683055255837708e-06, "loss": 0.6791, "step": 381 }, { "epoch": 0.3185324160933917, "grad_norm": 0.07894483208656311, "learning_rate": 1.968080750670977e-06, "loss": 0.7075, "step": 382 }, { "epoch": 0.31936627058578276, "grad_norm": 0.08575093746185303, "learning_rate": 1.9678551944542033e-06, "loss": 0.7027, "step": 383 }, { "epoch": 0.32020012507817386, "grad_norm": 0.08360203355550766, "learning_rate": 1.9676288571154895e-06, "loss": 0.5894, "step": 384 }, { "epoch": 0.32103397957056495, "grad_norm": 0.08341158181428909, "learning_rate": 1.9674017388375036e-06, "loss": 0.658, "step": 385 }, { "epoch": 0.321867834062956, "grad_norm": 0.0844089537858963, "learning_rate": 1.9671738398035446e-06, "loss": 0.6691, "step": 386 }, { "epoch": 0.3227016885553471, "grad_norm": 0.08446256816387177, "learning_rate": 1.9669451601975426e-06, "loss": 0.7211, "step": 387 }, { "epoch": 0.3235355430477382, "grad_norm": 0.08008047193288803, "learning_rate": 1.9667157002040565e-06, "loss": 0.5843, "step": 388 }, { "epoch": 0.3243693975401292, "grad_norm": 0.08821803331375122, "learning_rate": 1.9664854600082754e-06, "loss": 0.6444, "step": 389 }, { "epoch": 0.3252032520325203, "grad_norm": 0.08548033237457275, "learning_rate": 1.966254439796018e-06, "loss": 0.6643, "step": 390 }, { "epoch": 0.3260371065249114, "grad_norm": 0.08261854946613312, "learning_rate": 1.9660226397537322e-06, "loss": 0.6469, "step": 391 }, { "epoch": 0.3268709610173025, "grad_norm": 0.07960055768489838, "learning_rate": 1.965790060068497e-06, "loss": 0.6149, "step": 392 }, { "epoch": 0.32770481550969355, "grad_norm": 0.0834561213850975, "learning_rate": 1.9655567009280177e-06, "loss": 0.6798, "step": 393 }, { "epoch": 0.32853867000208464, "grad_norm": 0.08524155616760254, "learning_rate": 1.9653225625206313e-06, "loss": 0.6999, "step": 394 }, { "epoch": 0.32937252449447574, "grad_norm": 0.08503536134958267, "learning_rate": 1.965087645035302e-06, "loss": 0.6674, "step": 395 }, { "epoch": 0.3302063789868668, "grad_norm": 0.09243112802505493, "learning_rate": 1.964851948661624e-06, "loss": 0.6391, "step": 396 }, { "epoch": 0.3310402334792579, "grad_norm": 0.08015627413988113, "learning_rate": 1.96461547358982e-06, "loss": 0.611, "step": 397 }, { "epoch": 0.33187408797164897, "grad_norm": 0.08681736886501312, "learning_rate": 1.9643782200107395e-06, "loss": 0.6967, "step": 398 }, { "epoch": 0.33270794246404, "grad_norm": 0.08576110005378723, "learning_rate": 1.9641401881158623e-06, "loss": 0.6753, "step": 399 }, { "epoch": 0.3335417969564311, "grad_norm": 0.08432309329509735, "learning_rate": 1.9639013780972954e-06, "loss": 0.6312, "step": 400 }, { "epoch": 0.3335417969564311, "eval_loss": 0.6676867008209229, "eval_runtime": 321.9965, "eval_samples_per_second": 16.718, "eval_steps_per_second": 2.789, "step": 400 }, { "epoch": 0.3343756514488222, "grad_norm": 0.08781838417053223, "learning_rate": 1.9636617901477742e-06, "loss": 0.698, "step": 401 }, { "epoch": 0.33520950594121324, "grad_norm": 0.08723490685224533, "learning_rate": 1.9634214244606613e-06, "loss": 0.6642, "step": 402 }, { "epoch": 0.33604336043360433, "grad_norm": 0.08249269425868988, "learning_rate": 1.963180281229948e-06, "loss": 0.705, "step": 403 }, { "epoch": 0.3368772149259954, "grad_norm": 0.08193965256214142, "learning_rate": 1.962938360650252e-06, "loss": 0.6813, "step": 404 }, { "epoch": 0.33771106941838647, "grad_norm": 0.08125729858875275, "learning_rate": 1.962695662916819e-06, "loss": 0.6571, "step": 405 }, { "epoch": 0.33854492391077756, "grad_norm": 0.08508846163749695, "learning_rate": 1.962452188225522e-06, "loss": 0.6212, "step": 406 }, { "epoch": 0.33937877840316866, "grad_norm": 0.0881478488445282, "learning_rate": 1.962207936772861e-06, "loss": 0.6582, "step": 407 }, { "epoch": 0.34021263289555975, "grad_norm": 0.08948707580566406, "learning_rate": 1.9619629087559622e-06, "loss": 0.6924, "step": 408 }, { "epoch": 0.3410464873879508, "grad_norm": 0.08214866369962692, "learning_rate": 1.9617171043725797e-06, "loss": 0.6266, "step": 409 }, { "epoch": 0.3418803418803419, "grad_norm": 0.09406815469264984, "learning_rate": 1.961470523821093e-06, "loss": 0.6754, "step": 410 }, { "epoch": 0.342714196372733, "grad_norm": 0.08819446712732315, "learning_rate": 1.961223167300509e-06, "loss": 0.6858, "step": 411 }, { "epoch": 0.343548050865124, "grad_norm": 0.0902935117483139, "learning_rate": 1.9609750350104606e-06, "loss": 0.6654, "step": 412 }, { "epoch": 0.3443819053575151, "grad_norm": 0.0852714329957962, "learning_rate": 1.9607261271512065e-06, "loss": 0.6604, "step": 413 }, { "epoch": 0.3452157598499062, "grad_norm": 0.08480080962181091, "learning_rate": 1.9604764439236313e-06, "loss": 0.7252, "step": 414 }, { "epoch": 0.34604961434229725, "grad_norm": 0.08467400074005127, "learning_rate": 1.960225985529246e-06, "loss": 0.6182, "step": 415 }, { "epoch": 0.34688346883468835, "grad_norm": 0.09190120548009872, "learning_rate": 1.9599747521701865e-06, "loss": 0.6605, "step": 416 }, { "epoch": 0.34771732332707944, "grad_norm": 0.08994212001562119, "learning_rate": 1.9597227440492143e-06, "loss": 0.6674, "step": 417 }, { "epoch": 0.3485511778194705, "grad_norm": 0.08530712872743607, "learning_rate": 1.959469961369716e-06, "loss": 0.673, "step": 418 }, { "epoch": 0.3493850323118616, "grad_norm": 0.0814647376537323, "learning_rate": 1.9592164043357046e-06, "loss": 0.6224, "step": 419 }, { "epoch": 0.35021888680425267, "grad_norm": 0.08769946545362473, "learning_rate": 1.9589620731518164e-06, "loss": 0.6446, "step": 420 }, { "epoch": 0.3510527412966437, "grad_norm": 0.0855315700173378, "learning_rate": 1.958706968023313e-06, "loss": 0.6665, "step": 421 }, { "epoch": 0.3518865957890348, "grad_norm": 0.08764835447072983, "learning_rate": 1.958451089156082e-06, "loss": 0.6888, "step": 422 }, { "epoch": 0.3527204502814259, "grad_norm": 0.08674878627061844, "learning_rate": 1.9581944367566324e-06, "loss": 0.6894, "step": 423 }, { "epoch": 0.353554304773817, "grad_norm": 0.07654520124197006, "learning_rate": 1.9579370110321005e-06, "loss": 0.5972, "step": 424 }, { "epoch": 0.35438815926620804, "grad_norm": 0.08671235293149948, "learning_rate": 1.9576788121902454e-06, "loss": 0.6705, "step": 425 }, { "epoch": 0.35522201375859913, "grad_norm": 0.08799296617507935, "learning_rate": 1.9574198404394505e-06, "loss": 0.66, "step": 426 }, { "epoch": 0.3560558682509902, "grad_norm": 0.08908937871456146, "learning_rate": 1.9571600959887224e-06, "loss": 0.6687, "step": 427 }, { "epoch": 0.35688972274338127, "grad_norm": 0.08397683501243591, "learning_rate": 1.9568995790476915e-06, "loss": 0.5915, "step": 428 }, { "epoch": 0.35772357723577236, "grad_norm": 0.08805875480175018, "learning_rate": 1.9566382898266126e-06, "loss": 0.6706, "step": 429 }, { "epoch": 0.35855743172816346, "grad_norm": 0.08281008154153824, "learning_rate": 1.956376228536363e-06, "loss": 0.5692, "step": 430 }, { "epoch": 0.3593912862205545, "grad_norm": 0.08514556288719177, "learning_rate": 1.9561133953884427e-06, "loss": 0.5938, "step": 431 }, { "epoch": 0.3602251407129456, "grad_norm": 0.09112891554832458, "learning_rate": 1.955849790594975e-06, "loss": 0.7044, "step": 432 }, { "epoch": 0.3610589952053367, "grad_norm": 0.08578234165906906, "learning_rate": 1.9555854143687064e-06, "loss": 0.6901, "step": 433 }, { "epoch": 0.3618928496977277, "grad_norm": 0.08405105024576187, "learning_rate": 1.955320266923006e-06, "loss": 0.6307, "step": 434 }, { "epoch": 0.3627267041901188, "grad_norm": 0.085452601313591, "learning_rate": 1.9550543484718644e-06, "loss": 0.6147, "step": 435 }, { "epoch": 0.3635605586825099, "grad_norm": 0.0871538370847702, "learning_rate": 1.9547876592298955e-06, "loss": 0.6415, "step": 436 }, { "epoch": 0.36439441317490096, "grad_norm": 0.08855723589658737, "learning_rate": 1.954520199412334e-06, "loss": 0.7026, "step": 437 }, { "epoch": 0.36522826766729205, "grad_norm": 0.08221881091594696, "learning_rate": 1.9542519692350387e-06, "loss": 0.6813, "step": 438 }, { "epoch": 0.36606212215968315, "grad_norm": 0.08595414459705353, "learning_rate": 1.9539829689144876e-06, "loss": 0.6809, "step": 439 }, { "epoch": 0.36689597665207424, "grad_norm": 0.08485814183950424, "learning_rate": 1.9537131986677816e-06, "loss": 0.6352, "step": 440 }, { "epoch": 0.3677298311444653, "grad_norm": 0.08620929718017578, "learning_rate": 1.9534426587126433e-06, "loss": 0.7072, "step": 441 }, { "epoch": 0.3685636856368564, "grad_norm": 0.0919066071510315, "learning_rate": 1.9531713492674154e-06, "loss": 0.6809, "step": 442 }, { "epoch": 0.36939754012924747, "grad_norm": 0.08376951515674591, "learning_rate": 1.9528992705510628e-06, "loss": 0.6486, "step": 443 }, { "epoch": 0.3702313946216385, "grad_norm": 0.088445745408535, "learning_rate": 1.95262642278317e-06, "loss": 0.709, "step": 444 }, { "epoch": 0.3710652491140296, "grad_norm": 0.08183503895998001, "learning_rate": 1.952352806183943e-06, "loss": 0.6277, "step": 445 }, { "epoch": 0.3718991036064207, "grad_norm": 0.08638439327478409, "learning_rate": 1.9520784209742094e-06, "loss": 0.6665, "step": 446 }, { "epoch": 0.37273295809881174, "grad_norm": 0.08837208896875381, "learning_rate": 1.951803267375414e-06, "loss": 0.6103, "step": 447 }, { "epoch": 0.37356681259120283, "grad_norm": 0.08010973036289215, "learning_rate": 1.9515273456096247e-06, "loss": 0.662, "step": 448 }, { "epoch": 0.37440066708359393, "grad_norm": 0.08395659178495407, "learning_rate": 1.9512506558995283e-06, "loss": 0.687, "step": 449 }, { "epoch": 0.37523452157598497, "grad_norm": 0.08506524562835693, "learning_rate": 1.950973198468431e-06, "loss": 0.6695, "step": 450 }, { "epoch": 0.37606837606837606, "grad_norm": 0.08150207251310349, "learning_rate": 1.950694973540259e-06, "loss": 0.6258, "step": 451 }, { "epoch": 0.37690223056076716, "grad_norm": 0.08509568125009537, "learning_rate": 1.9504159813395576e-06, "loss": 0.5774, "step": 452 }, { "epoch": 0.3777360850531582, "grad_norm": 0.08139500021934509, "learning_rate": 1.950136222091492e-06, "loss": 0.6652, "step": 453 }, { "epoch": 0.3785699395455493, "grad_norm": 0.08891302347183228, "learning_rate": 1.949855696021846e-06, "loss": 0.6602, "step": 454 }, { "epoch": 0.3794037940379404, "grad_norm": 0.08729292452335358, "learning_rate": 1.949574403357022e-06, "loss": 0.6232, "step": 455 }, { "epoch": 0.3802376485303315, "grad_norm": 0.0855722650885582, "learning_rate": 1.9492923443240425e-06, "loss": 0.7249, "step": 456 }, { "epoch": 0.3810715030227225, "grad_norm": 0.09225167334079742, "learning_rate": 1.949009519150546e-06, "loss": 0.7061, "step": 457 }, { "epoch": 0.3819053575151136, "grad_norm": 0.08833880722522736, "learning_rate": 1.9487259280647918e-06, "loss": 0.6683, "step": 458 }, { "epoch": 0.3827392120075047, "grad_norm": 0.08648336678743362, "learning_rate": 1.948441571295656e-06, "loss": 0.6716, "step": 459 }, { "epoch": 0.38357306649989575, "grad_norm": 0.08678124845027924, "learning_rate": 1.9481564490726324e-06, "loss": 0.6869, "step": 460 }, { "epoch": 0.38440692099228685, "grad_norm": 0.0866696834564209, "learning_rate": 1.9478705616258344e-06, "loss": 0.6346, "step": 461 }, { "epoch": 0.38524077548467794, "grad_norm": 0.09009408950805664, "learning_rate": 1.9475839091859904e-06, "loss": 0.6507, "step": 462 }, { "epoch": 0.386074629977069, "grad_norm": 0.09393912553787231, "learning_rate": 1.9472964919844484e-06, "loss": 0.6624, "step": 463 }, { "epoch": 0.3869084844694601, "grad_norm": 0.09663153439760208, "learning_rate": 1.9470083102531722e-06, "loss": 0.6378, "step": 464 }, { "epoch": 0.3877423389618512, "grad_norm": 0.08551555126905441, "learning_rate": 1.9467193642247435e-06, "loss": 0.6717, "step": 465 }, { "epoch": 0.3885761934542422, "grad_norm": 0.08986209332942963, "learning_rate": 1.9464296541323602e-06, "loss": 0.7313, "step": 466 }, { "epoch": 0.3894100479466333, "grad_norm": 0.08851300179958344, "learning_rate": 1.9461391802098373e-06, "loss": 0.6783, "step": 467 }, { "epoch": 0.3902439024390244, "grad_norm": 0.09088604897260666, "learning_rate": 1.9458479426916068e-06, "loss": 0.5952, "step": 468 }, { "epoch": 0.39107775693141544, "grad_norm": 0.08506208658218384, "learning_rate": 1.9455559418127144e-06, "loss": 0.6912, "step": 469 }, { "epoch": 0.39191161142380654, "grad_norm": 0.087041936814785, "learning_rate": 1.945263177808826e-06, "loss": 0.6722, "step": 470 }, { "epoch": 0.39274546591619763, "grad_norm": 0.09510099142789841, "learning_rate": 1.9449696509162193e-06, "loss": 0.697, "step": 471 }, { "epoch": 0.39357932040858873, "grad_norm": 0.0864386260509491, "learning_rate": 1.944675361371791e-06, "loss": 0.6882, "step": 472 }, { "epoch": 0.39441317490097977, "grad_norm": 0.08964356780052185, "learning_rate": 1.944380309413051e-06, "loss": 0.6403, "step": 473 }, { "epoch": 0.39524702939337086, "grad_norm": 0.09786627441644669, "learning_rate": 1.9440844952781253e-06, "loss": 0.7037, "step": 474 }, { "epoch": 0.39608088388576196, "grad_norm": 0.08709276467561722, "learning_rate": 1.9437879192057556e-06, "loss": 0.6619, "step": 475 }, { "epoch": 0.396914738378153, "grad_norm": 0.08677015453577042, "learning_rate": 1.9434905814352976e-06, "loss": 0.6609, "step": 476 }, { "epoch": 0.3977485928705441, "grad_norm": 0.08886487782001495, "learning_rate": 1.9431924822067225e-06, "loss": 0.6045, "step": 477 }, { "epoch": 0.3985824473629352, "grad_norm": 0.08575446903705597, "learning_rate": 1.942893621760616e-06, "loss": 0.6886, "step": 478 }, { "epoch": 0.39941630185532623, "grad_norm": 0.08386967331171036, "learning_rate": 1.9425940003381767e-06, "loss": 0.6721, "step": 479 }, { "epoch": 0.4002501563477173, "grad_norm": 0.08702324330806732, "learning_rate": 1.9422936181812197e-06, "loss": 0.6959, "step": 480 }, { "epoch": 0.4010840108401084, "grad_norm": 0.09090246260166168, "learning_rate": 1.9419924755321727e-06, "loss": 0.7094, "step": 481 }, { "epoch": 0.40191786533249946, "grad_norm": 0.09213659167289734, "learning_rate": 1.9416905726340767e-06, "loss": 0.6446, "step": 482 }, { "epoch": 0.40275171982489055, "grad_norm": 0.09240783751010895, "learning_rate": 1.9413879097305878e-06, "loss": 0.6401, "step": 483 }, { "epoch": 0.40358557431728165, "grad_norm": 0.09058364480733871, "learning_rate": 1.941084487065974e-06, "loss": 0.7035, "step": 484 }, { "epoch": 0.4044194288096727, "grad_norm": 0.0906047448515892, "learning_rate": 1.940780304885117e-06, "loss": 0.6519, "step": 485 }, { "epoch": 0.4052532833020638, "grad_norm": 0.09101078659296036, "learning_rate": 1.940475363433512e-06, "loss": 0.6934, "step": 486 }, { "epoch": 0.4060871377944549, "grad_norm": 0.08729363977909088, "learning_rate": 1.940169662957266e-06, "loss": 0.5822, "step": 487 }, { "epoch": 0.406920992286846, "grad_norm": 0.0946895033121109, "learning_rate": 1.9398632037031003e-06, "loss": 0.6534, "step": 488 }, { "epoch": 0.407754846779237, "grad_norm": 0.09069128334522247, "learning_rate": 1.9395559859183463e-06, "loss": 0.6706, "step": 489 }, { "epoch": 0.4085887012716281, "grad_norm": 0.08318338543176651, "learning_rate": 1.9392480098509483e-06, "loss": 0.6368, "step": 490 }, { "epoch": 0.4094225557640192, "grad_norm": 0.0826449766755104, "learning_rate": 1.9389392757494645e-06, "loss": 0.667, "step": 491 }, { "epoch": 0.41025641025641024, "grad_norm": 0.09276958554983139, "learning_rate": 1.938629783863062e-06, "loss": 0.6709, "step": 492 }, { "epoch": 0.41109026474880134, "grad_norm": 0.08281219005584717, "learning_rate": 1.9383195344415215e-06, "loss": 0.584, "step": 493 }, { "epoch": 0.41192411924119243, "grad_norm": 0.08632051944732666, "learning_rate": 1.938008527735234e-06, "loss": 0.6743, "step": 494 }, { "epoch": 0.41275797373358347, "grad_norm": 0.08970591425895691, "learning_rate": 1.937696763995202e-06, "loss": 0.6611, "step": 495 }, { "epoch": 0.41359182822597457, "grad_norm": 0.0902903825044632, "learning_rate": 1.93738424347304e-06, "loss": 0.6455, "step": 496 }, { "epoch": 0.41442568271836566, "grad_norm": 0.08964331448078156, "learning_rate": 1.9370709664209715e-06, "loss": 0.6561, "step": 497 }, { "epoch": 0.4152595372107567, "grad_norm": 0.08478616923093796, "learning_rate": 1.9367569330918317e-06, "loss": 0.595, "step": 498 }, { "epoch": 0.4160933917031478, "grad_norm": 0.09221872687339783, "learning_rate": 1.9364421437390658e-06, "loss": 0.6363, "step": 499 }, { "epoch": 0.4169272461955389, "grad_norm": 0.09467694163322449, "learning_rate": 1.936126598616729e-06, "loss": 0.6696, "step": 500 }, { "epoch": 0.41776110068792993, "grad_norm": 0.09464087337255478, "learning_rate": 1.935810297979487e-06, "loss": 0.6224, "step": 501 }, { "epoch": 0.418594955180321, "grad_norm": 0.08994987607002258, "learning_rate": 1.9354932420826147e-06, "loss": 0.6383, "step": 502 }, { "epoch": 0.4194288096727121, "grad_norm": 0.0921832025051117, "learning_rate": 1.9351754311819974e-06, "loss": 0.685, "step": 503 }, { "epoch": 0.4202626641651032, "grad_norm": 0.08833765983581543, "learning_rate": 1.934856865534129e-06, "loss": 0.6233, "step": 504 }, { "epoch": 0.42109651865749426, "grad_norm": 0.08863073587417603, "learning_rate": 1.934537545396111e-06, "loss": 0.6756, "step": 505 }, { "epoch": 0.42193037314988535, "grad_norm": 0.08703204244375229, "learning_rate": 1.934217471025658e-06, "loss": 0.6372, "step": 506 }, { "epoch": 0.42276422764227645, "grad_norm": 0.09010464698076248, "learning_rate": 1.9338966426810887e-06, "loss": 0.6677, "step": 507 }, { "epoch": 0.4235980821346675, "grad_norm": 0.09014829248189926, "learning_rate": 1.9335750606213336e-06, "loss": 0.6839, "step": 508 }, { "epoch": 0.4244319366270586, "grad_norm": 0.08871164172887802, "learning_rate": 1.93325272510593e-06, "loss": 0.7012, "step": 509 }, { "epoch": 0.4252657911194497, "grad_norm": 0.08788250386714935, "learning_rate": 1.9329296363950236e-06, "loss": 0.6169, "step": 510 }, { "epoch": 0.4260996456118407, "grad_norm": 0.08845999836921692, "learning_rate": 1.9326057947493675e-06, "loss": 0.6424, "step": 511 }, { "epoch": 0.4269335001042318, "grad_norm": 0.0858997106552124, "learning_rate": 1.9322812004303238e-06, "loss": 0.6389, "step": 512 }, { "epoch": 0.4277673545966229, "grad_norm": 0.08850055187940598, "learning_rate": 1.9319558536998603e-06, "loss": 0.6975, "step": 513 }, { "epoch": 0.42860120908901395, "grad_norm": 0.09360900521278381, "learning_rate": 1.9316297548205534e-06, "loss": 0.686, "step": 514 }, { "epoch": 0.42943506358140504, "grad_norm": 0.08473866432905197, "learning_rate": 1.931302904055586e-06, "loss": 0.6517, "step": 515 }, { "epoch": 0.43026891807379614, "grad_norm": 0.08656331151723862, "learning_rate": 1.9309753016687477e-06, "loss": 0.7003, "step": 516 }, { "epoch": 0.4311027725661872, "grad_norm": 0.08407936990261078, "learning_rate": 1.9306469479244347e-06, "loss": 0.6611, "step": 517 }, { "epoch": 0.43193662705857827, "grad_norm": 0.09026416391134262, "learning_rate": 1.930317843087651e-06, "loss": 0.6749, "step": 518 }, { "epoch": 0.43277048155096937, "grad_norm": 0.10031198710203171, "learning_rate": 1.929987987424004e-06, "loss": 0.6403, "step": 519 }, { "epoch": 0.43360433604336046, "grad_norm": 0.09739411622285843, "learning_rate": 1.9296573811997086e-06, "loss": 0.6893, "step": 520 }, { "epoch": 0.4344381905357515, "grad_norm": 0.08792299032211304, "learning_rate": 1.929326024681587e-06, "loss": 0.6268, "step": 521 }, { "epoch": 0.4352720450281426, "grad_norm": 0.0874318853020668, "learning_rate": 1.9289939181370646e-06, "loss": 0.6659, "step": 522 }, { "epoch": 0.4361058995205337, "grad_norm": 0.09086845815181732, "learning_rate": 1.9286610618341724e-06, "loss": 0.6399, "step": 523 }, { "epoch": 0.43693975401292473, "grad_norm": 0.09616074711084366, "learning_rate": 1.9283274560415477e-06, "loss": 0.6325, "step": 524 }, { "epoch": 0.4377736085053158, "grad_norm": 0.08674857765436172, "learning_rate": 1.9279931010284322e-06, "loss": 0.6487, "step": 525 }, { "epoch": 0.4386074629977069, "grad_norm": 0.09142550826072693, "learning_rate": 1.927657997064671e-06, "loss": 0.6509, "step": 526 }, { "epoch": 0.43944131749009796, "grad_norm": 0.0838690921664238, "learning_rate": 1.9273221444207158e-06, "loss": 0.6087, "step": 527 }, { "epoch": 0.44027517198248906, "grad_norm": 0.0922529399394989, "learning_rate": 1.9269855433676213e-06, "loss": 0.6815, "step": 528 }, { "epoch": 0.44110902647488015, "grad_norm": 0.08945809304714203, "learning_rate": 1.926648194177046e-06, "loss": 0.623, "step": 529 }, { "epoch": 0.4419428809672712, "grad_norm": 0.09663840383291245, "learning_rate": 1.926310097121253e-06, "loss": 0.6244, "step": 530 }, { "epoch": 0.4427767354596623, "grad_norm": 0.0872710794210434, "learning_rate": 1.9259712524731083e-06, "loss": 0.6877, "step": 531 }, { "epoch": 0.4436105899520534, "grad_norm": 0.0860145092010498, "learning_rate": 1.9256316605060813e-06, "loss": 0.6458, "step": 532 }, { "epoch": 0.4444444444444444, "grad_norm": 0.08691411465406418, "learning_rate": 1.9252913214942456e-06, "loss": 0.6286, "step": 533 }, { "epoch": 0.4452782989368355, "grad_norm": 0.0899488553404808, "learning_rate": 1.9249502357122757e-06, "loss": 0.6154, "step": 534 }, { "epoch": 0.4461121534292266, "grad_norm": 0.08670518547296524, "learning_rate": 1.924608403435451e-06, "loss": 0.6189, "step": 535 }, { "epoch": 0.4469460079216177, "grad_norm": 0.08927959948778152, "learning_rate": 1.9242658249396514e-06, "loss": 0.6591, "step": 536 }, { "epoch": 0.44777986241400874, "grad_norm": 0.09092804044485092, "learning_rate": 1.9239225005013607e-06, "loss": 0.6756, "step": 537 }, { "epoch": 0.44861371690639984, "grad_norm": 0.08876843005418777, "learning_rate": 1.923578430397664e-06, "loss": 0.6221, "step": 538 }, { "epoch": 0.44944757139879093, "grad_norm": 0.09023214131593704, "learning_rate": 1.923233614906248e-06, "loss": 0.6478, "step": 539 }, { "epoch": 0.450281425891182, "grad_norm": 0.09291035681962967, "learning_rate": 1.9228880543054006e-06, "loss": 0.6568, "step": 540 }, { "epoch": 0.45111528038357307, "grad_norm": 0.09169748425483704, "learning_rate": 1.9225417488740127e-06, "loss": 0.6966, "step": 541 }, { "epoch": 0.45194913487596416, "grad_norm": 0.0960812196135521, "learning_rate": 1.922194698891574e-06, "loss": 0.6426, "step": 542 }, { "epoch": 0.4527829893683552, "grad_norm": 0.09381034225225449, "learning_rate": 1.9218469046381778e-06, "loss": 0.7138, "step": 543 }, { "epoch": 0.4536168438607463, "grad_norm": 0.09605712443590164, "learning_rate": 1.9214983663945157e-06, "loss": 0.6677, "step": 544 }, { "epoch": 0.4544506983531374, "grad_norm": 0.09246627241373062, "learning_rate": 1.921149084441881e-06, "loss": 0.5995, "step": 545 }, { "epoch": 0.45528455284552843, "grad_norm": 0.09615227580070496, "learning_rate": 1.9207990590621663e-06, "loss": 0.6543, "step": 546 }, { "epoch": 0.45611840733791953, "grad_norm": 0.09620420634746552, "learning_rate": 1.9204482905378654e-06, "loss": 0.5895, "step": 547 }, { "epoch": 0.4569522618303106, "grad_norm": 0.08878765255212784, "learning_rate": 1.920096779152071e-06, "loss": 0.599, "step": 548 }, { "epoch": 0.45778611632270166, "grad_norm": 0.09067462384700775, "learning_rate": 1.9197445251884763e-06, "loss": 0.6897, "step": 549 }, { "epoch": 0.45861997081509276, "grad_norm": 0.09180627018213272, "learning_rate": 1.9193915289313724e-06, "loss": 0.6647, "step": 550 }, { "epoch": 0.45945382530748385, "grad_norm": 0.08824802190065384, "learning_rate": 1.91903779066565e-06, "loss": 0.6376, "step": 551 }, { "epoch": 0.46028767979987495, "grad_norm": 0.08835819363594055, "learning_rate": 1.918683310676799e-06, "loss": 0.6922, "step": 552 }, { "epoch": 0.461121534292266, "grad_norm": 0.09096652269363403, "learning_rate": 1.918328089250908e-06, "loss": 0.7169, "step": 553 }, { "epoch": 0.4619553887846571, "grad_norm": 0.09415201842784882, "learning_rate": 1.917972126674664e-06, "loss": 0.659, "step": 554 }, { "epoch": 0.4627892432770482, "grad_norm": 0.08964891731739044, "learning_rate": 1.9176154232353513e-06, "loss": 0.6869, "step": 555 }, { "epoch": 0.4636230977694392, "grad_norm": 0.08688930422067642, "learning_rate": 1.917257979220853e-06, "loss": 0.627, "step": 556 }, { "epoch": 0.4644569522618303, "grad_norm": 0.09115028381347656, "learning_rate": 1.9168997949196496e-06, "loss": 0.64, "step": 557 }, { "epoch": 0.4652908067542214, "grad_norm": 0.09430664777755737, "learning_rate": 1.9165408706208184e-06, "loss": 0.6646, "step": 558 }, { "epoch": 0.46612466124661245, "grad_norm": 0.09062688797712326, "learning_rate": 1.916181206614036e-06, "loss": 0.6392, "step": 559 }, { "epoch": 0.46695851573900354, "grad_norm": 0.09099205583333969, "learning_rate": 1.9158208031895737e-06, "loss": 0.7133, "step": 560 }, { "epoch": 0.46779237023139464, "grad_norm": 0.09944413602352142, "learning_rate": 1.9154596606383002e-06, "loss": 0.6958, "step": 561 }, { "epoch": 0.4686262247237857, "grad_norm": 0.09643740206956863, "learning_rate": 1.9150977792516816e-06, "loss": 0.6813, "step": 562 }, { "epoch": 0.4694600792161768, "grad_norm": 0.09532240033149719, "learning_rate": 1.914735159321779e-06, "loss": 0.6528, "step": 563 }, { "epoch": 0.47029393370856787, "grad_norm": 0.08927876502275467, "learning_rate": 1.914371801141251e-06, "loss": 0.639, "step": 564 }, { "epoch": 0.4711277882009589, "grad_norm": 0.09561195224523544, "learning_rate": 1.914007705003351e-06, "loss": 0.6795, "step": 565 }, { "epoch": 0.47196164269335, "grad_norm": 0.0882195308804512, "learning_rate": 1.9136428712019275e-06, "loss": 0.6437, "step": 566 }, { "epoch": 0.4727954971857411, "grad_norm": 0.08940508961677551, "learning_rate": 1.913277300031426e-06, "loss": 0.6485, "step": 567 }, { "epoch": 0.4736293516781322, "grad_norm": 0.0933694839477539, "learning_rate": 1.912910991786886e-06, "loss": 0.6876, "step": 568 }, { "epoch": 0.47446320617052323, "grad_norm": 0.09212841838598251, "learning_rate": 1.9125439467639414e-06, "loss": 0.5669, "step": 569 }, { "epoch": 0.47529706066291433, "grad_norm": 0.09037458896636963, "learning_rate": 1.9121761652588214e-06, "loss": 0.6489, "step": 570 }, { "epoch": 0.4761309151553054, "grad_norm": 0.08847213536500931, "learning_rate": 1.9118076475683506e-06, "loss": 0.6456, "step": 571 }, { "epoch": 0.47696476964769646, "grad_norm": 0.08496098965406418, "learning_rate": 1.9114383939899455e-06, "loss": 0.6174, "step": 572 }, { "epoch": 0.47779862414008756, "grad_norm": 0.09430284053087234, "learning_rate": 1.911068404821618e-06, "loss": 0.7102, "step": 573 }, { "epoch": 0.47863247863247865, "grad_norm": 0.09211906790733337, "learning_rate": 1.910697680361974e-06, "loss": 0.6161, "step": 574 }, { "epoch": 0.4794663331248697, "grad_norm": 0.09092090278863907, "learning_rate": 1.910326220910211e-06, "loss": 0.6595, "step": 575 }, { "epoch": 0.4803001876172608, "grad_norm": 0.09773701429367065, "learning_rate": 1.909954026766122e-06, "loss": 0.6303, "step": 576 }, { "epoch": 0.4811340421096519, "grad_norm": 0.09343478083610535, "learning_rate": 1.909581098230091e-06, "loss": 0.6578, "step": 577 }, { "epoch": 0.4819678966020429, "grad_norm": 0.09686136245727539, "learning_rate": 1.9092074356030966e-06, "loss": 0.6504, "step": 578 }, { "epoch": 0.482801751094434, "grad_norm": 0.09207551926374435, "learning_rate": 1.9088330391867076e-06, "loss": 0.6092, "step": 579 }, { "epoch": 0.4836356055868251, "grad_norm": 0.093394935131073, "learning_rate": 1.908457909283087e-06, "loss": 0.645, "step": 580 }, { "epoch": 0.48446946007921615, "grad_norm": 0.10280878096818924, "learning_rate": 1.9080820461949886e-06, "loss": 0.7288, "step": 581 }, { "epoch": 0.48530331457160725, "grad_norm": 0.09714383631944656, "learning_rate": 1.9077054502257585e-06, "loss": 0.6213, "step": 582 }, { "epoch": 0.48613716906399834, "grad_norm": 0.09004565328359604, "learning_rate": 1.9073281216793337e-06, "loss": 0.6459, "step": 583 }, { "epoch": 0.48697102355638944, "grad_norm": 0.12440581619739532, "learning_rate": 1.906950060860243e-06, "loss": 0.6823, "step": 584 }, { "epoch": 0.4878048780487805, "grad_norm": 0.09314379096031189, "learning_rate": 1.9065712680736066e-06, "loss": 0.6475, "step": 585 }, { "epoch": 0.48863873254117157, "grad_norm": 0.09512604027986526, "learning_rate": 1.906191743625134e-06, "loss": 0.5932, "step": 586 }, { "epoch": 0.48947258703356267, "grad_norm": 0.08977750688791275, "learning_rate": 1.9058114878211265e-06, "loss": 0.6546, "step": 587 }, { "epoch": 0.4903064415259537, "grad_norm": 0.09166161715984344, "learning_rate": 1.905430500968475e-06, "loss": 0.6772, "step": 588 }, { "epoch": 0.4911402960183448, "grad_norm": 0.10014794021844864, "learning_rate": 1.9050487833746609e-06, "loss": 0.6945, "step": 589 }, { "epoch": 0.4919741505107359, "grad_norm": 0.09376100450754166, "learning_rate": 1.9046663353477548e-06, "loss": 0.6581, "step": 590 }, { "epoch": 0.49280800500312694, "grad_norm": 0.09505487233400345, "learning_rate": 1.9042831571964171e-06, "loss": 0.6785, "step": 591 }, { "epoch": 0.49364185949551803, "grad_norm": 0.09371201694011688, "learning_rate": 1.9038992492298975e-06, "loss": 0.6331, "step": 592 }, { "epoch": 0.4944757139879091, "grad_norm": 0.09442009776830673, "learning_rate": 1.9035146117580348e-06, "loss": 0.5728, "step": 593 }, { "epoch": 0.49530956848030017, "grad_norm": 0.09772226214408875, "learning_rate": 1.903129245091256e-06, "loss": 0.6635, "step": 594 }, { "epoch": 0.49614342297269126, "grad_norm": 0.09484317898750305, "learning_rate": 1.9027431495405776e-06, "loss": 0.6327, "step": 595 }, { "epoch": 0.49697727746508236, "grad_norm": 0.09658980369567871, "learning_rate": 1.9023563254176032e-06, "loss": 0.6608, "step": 596 }, { "epoch": 0.4978111319574734, "grad_norm": 0.09717069566249847, "learning_rate": 1.9019687730345251e-06, "loss": 0.6605, "step": 597 }, { "epoch": 0.4986449864498645, "grad_norm": 0.09019803255796432, "learning_rate": 1.9015804927041233e-06, "loss": 0.6867, "step": 598 }, { "epoch": 0.4994788409422556, "grad_norm": 0.09377589821815491, "learning_rate": 1.9011914847397653e-06, "loss": 0.6419, "step": 599 }, { "epoch": 0.5003126954346466, "grad_norm": 0.09151824563741684, "learning_rate": 1.9008017494554055e-06, "loss": 0.7184, "step": 600 }, { "epoch": 0.5011465499270378, "grad_norm": 0.0963352620601654, "learning_rate": 1.900411287165586e-06, "loss": 0.6897, "step": 601 }, { "epoch": 0.5019804044194288, "grad_norm": 0.08974531292915344, "learning_rate": 1.9000200981854347e-06, "loss": 0.5847, "step": 602 }, { "epoch": 0.5028142589118199, "grad_norm": 0.09220883995294571, "learning_rate": 1.8996281828306665e-06, "loss": 0.6652, "step": 603 }, { "epoch": 0.503648113404211, "grad_norm": 0.09491855651140213, "learning_rate": 1.899235541417583e-06, "loss": 0.595, "step": 604 }, { "epoch": 0.504481967896602, "grad_norm": 0.09616294503211975, "learning_rate": 1.898842174263071e-06, "loss": 0.6378, "step": 605 }, { "epoch": 0.5053158223889931, "grad_norm": 0.08942185342311859, "learning_rate": 1.8984480816846035e-06, "loss": 0.6459, "step": 606 }, { "epoch": 0.5061496768813842, "grad_norm": 0.09059803187847137, "learning_rate": 1.8980532640002388e-06, "loss": 0.6019, "step": 607 }, { "epoch": 0.5069835313737753, "grad_norm": 0.09913185238838196, "learning_rate": 1.89765772152862e-06, "loss": 0.669, "step": 608 }, { "epoch": 0.5078173858661663, "grad_norm": 0.10223888605833054, "learning_rate": 1.8972614545889756e-06, "loss": 0.6454, "step": 609 }, { "epoch": 0.5086512403585575, "grad_norm": 0.09271295368671417, "learning_rate": 1.896864463501119e-06, "loss": 0.6467, "step": 610 }, { "epoch": 0.5094850948509485, "grad_norm": 0.08862827718257904, "learning_rate": 1.896466748585448e-06, "loss": 0.6343, "step": 611 }, { "epoch": 0.5103189493433395, "grad_norm": 0.09303930401802063, "learning_rate": 1.8960683101629435e-06, "loss": 0.6389, "step": 612 }, { "epoch": 0.5111528038357307, "grad_norm": 0.09331949055194855, "learning_rate": 1.8956691485551719e-06, "loss": 0.6507, "step": 613 }, { "epoch": 0.5119866583281217, "grad_norm": 0.0993635281920433, "learning_rate": 1.8952692640842825e-06, "loss": 0.63, "step": 614 }, { "epoch": 0.5128205128205128, "grad_norm": 0.09348301589488983, "learning_rate": 1.8948686570730074e-06, "loss": 0.676, "step": 615 }, { "epoch": 0.5136543673129039, "grad_norm": 0.0950765460729599, "learning_rate": 1.894467327844663e-06, "loss": 0.7035, "step": 616 }, { "epoch": 0.514488221805295, "grad_norm": 0.08676007390022278, "learning_rate": 1.8940652767231483e-06, "loss": 0.6067, "step": 617 }, { "epoch": 0.515322076297686, "grad_norm": 0.09285329282283783, "learning_rate": 1.8936625040329438e-06, "loss": 0.6441, "step": 618 }, { "epoch": 0.5161559307900772, "grad_norm": 0.09361077845096588, "learning_rate": 1.8932590100991136e-06, "loss": 0.6767, "step": 619 }, { "epoch": 0.5169897852824682, "grad_norm": 0.09173277020454407, "learning_rate": 1.8928547952473033e-06, "loss": 0.6395, "step": 620 }, { "epoch": 0.5178236397748592, "grad_norm": 0.09198316186666489, "learning_rate": 1.892449859803741e-06, "loss": 0.7006, "step": 621 }, { "epoch": 0.5186574942672504, "grad_norm": 0.09348779171705246, "learning_rate": 1.8920442040952356e-06, "loss": 0.6641, "step": 622 }, { "epoch": 0.5194913487596414, "grad_norm": 0.08798382431268692, "learning_rate": 1.8916378284491774e-06, "loss": 0.6478, "step": 623 }, { "epoch": 0.5203252032520326, "grad_norm": 0.09442038089036942, "learning_rate": 1.8912307331935383e-06, "loss": 0.6201, "step": 624 }, { "epoch": 0.5211590577444236, "grad_norm": 0.09519796818494797, "learning_rate": 1.8908229186568705e-06, "loss": 0.5917, "step": 625 }, { "epoch": 0.5219929122368147, "grad_norm": 0.09593921154737473, "learning_rate": 1.8904143851683062e-06, "loss": 0.6859, "step": 626 }, { "epoch": 0.5228267667292058, "grad_norm": 0.09510339796543121, "learning_rate": 1.890005133057559e-06, "loss": 0.6679, "step": 627 }, { "epoch": 0.5236606212215968, "grad_norm": 0.09473798424005508, "learning_rate": 1.8895951626549222e-06, "loss": 0.6108, "step": 628 }, { "epoch": 0.5244944757139879, "grad_norm": 0.10544616729021072, "learning_rate": 1.8891844742912678e-06, "loss": 0.6007, "step": 629 }, { "epoch": 0.525328330206379, "grad_norm": 0.09652596712112427, "learning_rate": 1.8887730682980482e-06, "loss": 0.6858, "step": 630 }, { "epoch": 0.5261621846987701, "grad_norm": 0.09385030716657639, "learning_rate": 1.888360945007295e-06, "loss": 0.692, "step": 631 }, { "epoch": 0.5269960391911611, "grad_norm": 0.10086268931627274, "learning_rate": 1.887948104751618e-06, "loss": 0.7302, "step": 632 }, { "epoch": 0.5278298936835523, "grad_norm": 0.09164178371429443, "learning_rate": 1.8875345478642065e-06, "loss": 0.5793, "step": 633 }, { "epoch": 0.5286637481759433, "grad_norm": 0.08686941117048264, "learning_rate": 1.8871202746788274e-06, "loss": 0.6304, "step": 634 }, { "epoch": 0.5294976026683343, "grad_norm": 0.09579890966415405, "learning_rate": 1.8867052855298264e-06, "loss": 0.6663, "step": 635 }, { "epoch": 0.5303314571607255, "grad_norm": 0.09054259210824966, "learning_rate": 1.8862895807521264e-06, "loss": 0.6395, "step": 636 }, { "epoch": 0.5311653116531165, "grad_norm": 0.0913679376244545, "learning_rate": 1.8858731606812284e-06, "loss": 0.6481, "step": 637 }, { "epoch": 0.5319991661455076, "grad_norm": 0.09573955833911896, "learning_rate": 1.8854560256532098e-06, "loss": 0.6543, "step": 638 }, { "epoch": 0.5328330206378987, "grad_norm": 0.09667269140481949, "learning_rate": 1.8850381760047262e-06, "loss": 0.6532, "step": 639 }, { "epoch": 0.5336668751302898, "grad_norm": 0.09536033123731613, "learning_rate": 1.8846196120730093e-06, "loss": 0.6429, "step": 640 }, { "epoch": 0.5345007296226808, "grad_norm": 0.09058564901351929, "learning_rate": 1.8842003341958673e-06, "loss": 0.579, "step": 641 }, { "epoch": 0.535334584115072, "grad_norm": 0.09930071234703064, "learning_rate": 1.8837803427116844e-06, "loss": 0.5868, "step": 642 }, { "epoch": 0.536168438607463, "grad_norm": 0.0978417843580246, "learning_rate": 1.8833596379594211e-06, "loss": 0.6822, "step": 643 }, { "epoch": 0.537002293099854, "grad_norm": 0.09346287697553635, "learning_rate": 1.882938220278614e-06, "loss": 0.6116, "step": 644 }, { "epoch": 0.5378361475922452, "grad_norm": 0.09151905030012131, "learning_rate": 1.8825160900093738e-06, "loss": 0.6496, "step": 645 }, { "epoch": 0.5386700020846362, "grad_norm": 0.0970907062292099, "learning_rate": 1.8820932474923871e-06, "loss": 0.6504, "step": 646 }, { "epoch": 0.5395038565770273, "grad_norm": 0.09408881515264511, "learning_rate": 1.8816696930689159e-06, "loss": 0.6829, "step": 647 }, { "epoch": 0.5403377110694184, "grad_norm": 0.09966862946748734, "learning_rate": 1.8812454270807951e-06, "loss": 0.5898, "step": 648 }, { "epoch": 0.5411715655618095, "grad_norm": 0.0900130420923233, "learning_rate": 1.8808204498704356e-06, "loss": 0.6042, "step": 649 }, { "epoch": 0.5420054200542005, "grad_norm": 0.09694640338420868, "learning_rate": 1.8803947617808214e-06, "loss": 0.6727, "step": 650 }, { "epoch": 0.5428392745465916, "grad_norm": 0.09688873589038849, "learning_rate": 1.8799683631555103e-06, "loss": 0.6137, "step": 651 }, { "epoch": 0.5436731290389827, "grad_norm": 0.09676310420036316, "learning_rate": 1.8795412543386337e-06, "loss": 0.612, "step": 652 }, { "epoch": 0.5445069835313737, "grad_norm": 0.09581848233938217, "learning_rate": 1.8791134356748962e-06, "loss": 0.6632, "step": 653 }, { "epoch": 0.5453408380237649, "grad_norm": 0.099350206553936, "learning_rate": 1.8786849075095753e-06, "loss": 0.6652, "step": 654 }, { "epoch": 0.5461746925161559, "grad_norm": 0.09312117099761963, "learning_rate": 1.878255670188521e-06, "loss": 0.6184, "step": 655 }, { "epoch": 0.5470085470085471, "grad_norm": 0.09312383830547333, "learning_rate": 1.8778257240581554e-06, "loss": 0.6595, "step": 656 }, { "epoch": 0.5478424015009381, "grad_norm": 0.09485237300395966, "learning_rate": 1.8773950694654732e-06, "loss": 0.716, "step": 657 }, { "epoch": 0.5486762559933291, "grad_norm": 0.09585438668727875, "learning_rate": 1.8769637067580405e-06, "loss": 0.7058, "step": 658 }, { "epoch": 0.5495101104857203, "grad_norm": 0.09024535119533539, "learning_rate": 1.876531636283995e-06, "loss": 0.594, "step": 659 }, { "epoch": 0.5503439649781113, "grad_norm": 0.09285992383956909, "learning_rate": 1.8760988583920456e-06, "loss": 0.648, "step": 660 }, { "epoch": 0.5511778194705024, "grad_norm": 0.08939266204833984, "learning_rate": 1.8756653734314722e-06, "loss": 0.5866, "step": 661 }, { "epoch": 0.5520116739628935, "grad_norm": 0.10212475061416626, "learning_rate": 1.8752311817521254e-06, "loss": 0.5485, "step": 662 }, { "epoch": 0.5528455284552846, "grad_norm": 0.0938698872923851, "learning_rate": 1.8747962837044256e-06, "loss": 0.692, "step": 663 }, { "epoch": 0.5536793829476756, "grad_norm": 0.09794861823320389, "learning_rate": 1.8743606796393644e-06, "loss": 0.6449, "step": 664 }, { "epoch": 0.5545132374400668, "grad_norm": 0.09574401378631592, "learning_rate": 1.8739243699085019e-06, "loss": 0.5878, "step": 665 }, { "epoch": 0.5553470919324578, "grad_norm": 0.10169863700866699, "learning_rate": 1.8734873548639687e-06, "loss": 0.6968, "step": 666 }, { "epoch": 0.5561809464248488, "grad_norm": 0.09560302644968033, "learning_rate": 1.8730496348584642e-06, "loss": 0.6448, "step": 667 }, { "epoch": 0.55701480091724, "grad_norm": 0.10035623610019684, "learning_rate": 1.8726112102452572e-06, "loss": 0.6893, "step": 668 }, { "epoch": 0.557848655409631, "grad_norm": 0.093927301466465, "learning_rate": 1.872172081378184e-06, "loss": 0.6541, "step": 669 }, { "epoch": 0.5586825099020221, "grad_norm": 0.0979999378323555, "learning_rate": 1.8717322486116508e-06, "loss": 0.6651, "step": 670 }, { "epoch": 0.5595163643944132, "grad_norm": 0.10020069032907486, "learning_rate": 1.8712917123006312e-06, "loss": 0.6289, "step": 671 }, { "epoch": 0.5603502188868043, "grad_norm": 0.10121606290340424, "learning_rate": 1.8708504728006664e-06, "loss": 0.6547, "step": 672 }, { "epoch": 0.5611840733791953, "grad_norm": 0.09832029789686203, "learning_rate": 1.870408530467865e-06, "loss": 0.651, "step": 673 }, { "epoch": 0.5620179278715864, "grad_norm": 0.0978146642446518, "learning_rate": 1.8699658856589037e-06, "loss": 0.6453, "step": 674 }, { "epoch": 0.5628517823639775, "grad_norm": 0.10008279979228973, "learning_rate": 1.8695225387310249e-06, "loss": 0.6945, "step": 675 }, { "epoch": 0.5636856368563685, "grad_norm": 0.09760496020317078, "learning_rate": 1.8690784900420388e-06, "loss": 0.6222, "step": 676 }, { "epoch": 0.5645194913487597, "grad_norm": 0.09725689142942429, "learning_rate": 1.8686337399503215e-06, "loss": 0.6464, "step": 677 }, { "epoch": 0.5653533458411507, "grad_norm": 0.09347565472126007, "learning_rate": 1.8681882888148152e-06, "loss": 0.6282, "step": 678 }, { "epoch": 0.5661872003335418, "grad_norm": 0.09704628586769104, "learning_rate": 1.867742136995028e-06, "loss": 0.6529, "step": 679 }, { "epoch": 0.5670210548259329, "grad_norm": 0.09918136149644852, "learning_rate": 1.8672952848510328e-06, "loss": 0.739, "step": 680 }, { "epoch": 0.5678549093183239, "grad_norm": 0.09772541373968124, "learning_rate": 1.8668477327434686e-06, "loss": 0.6383, "step": 681 }, { "epoch": 0.568688763810715, "grad_norm": 0.09866555780172348, "learning_rate": 1.866399481033539e-06, "loss": 0.6721, "step": 682 }, { "epoch": 0.5695226183031061, "grad_norm": 0.09793423861265182, "learning_rate": 1.8659505300830123e-06, "loss": 0.6055, "step": 683 }, { "epoch": 0.5703564727954972, "grad_norm": 0.10061061382293701, "learning_rate": 1.865500880254221e-06, "loss": 0.6544, "step": 684 }, { "epoch": 0.5711903272878882, "grad_norm": 0.09208130091428757, "learning_rate": 1.8650505319100617e-06, "loss": 0.6014, "step": 685 }, { "epoch": 0.5720241817802794, "grad_norm": 0.0995427742600441, "learning_rate": 1.8645994854139948e-06, "loss": 0.6965, "step": 686 }, { "epoch": 0.5728580362726704, "grad_norm": 0.09244947880506516, "learning_rate": 1.8641477411300438e-06, "loss": 0.6272, "step": 687 }, { "epoch": 0.5736918907650616, "grad_norm": 0.09937774389982224, "learning_rate": 1.863695299422796e-06, "loss": 0.6648, "step": 688 }, { "epoch": 0.5745257452574526, "grad_norm": 0.09111293405294418, "learning_rate": 1.8632421606574007e-06, "loss": 0.5476, "step": 689 }, { "epoch": 0.5753595997498436, "grad_norm": 0.10068730264902115, "learning_rate": 1.8627883251995708e-06, "loss": 0.6535, "step": 690 }, { "epoch": 0.5761934542422348, "grad_norm": 0.09240923076868057, "learning_rate": 1.862333793415581e-06, "loss": 0.575, "step": 691 }, { "epoch": 0.5770273087346258, "grad_norm": 0.09765233844518661, "learning_rate": 1.8618785656722676e-06, "loss": 0.6323, "step": 692 }, { "epoch": 0.5778611632270169, "grad_norm": 0.09996625781059265, "learning_rate": 1.861422642337029e-06, "loss": 0.639, "step": 693 }, { "epoch": 0.578695017719408, "grad_norm": 0.10148533433675766, "learning_rate": 1.860966023777825e-06, "loss": 0.7196, "step": 694 }, { "epoch": 0.579528872211799, "grad_norm": 0.09370430558919907, "learning_rate": 1.8605087103631764e-06, "loss": 0.6385, "step": 695 }, { "epoch": 0.5803627267041901, "grad_norm": 0.09228570014238358, "learning_rate": 1.8600507024621648e-06, "loss": 0.6242, "step": 696 }, { "epoch": 0.5811965811965812, "grad_norm": 0.09689746052026749, "learning_rate": 1.8595920004444324e-06, "loss": 0.6135, "step": 697 }, { "epoch": 0.5820304356889723, "grad_norm": 0.09802035987377167, "learning_rate": 1.8591326046801812e-06, "loss": 0.5789, "step": 698 }, { "epoch": 0.5828642901813633, "grad_norm": 0.10172217339277267, "learning_rate": 1.8586725155401735e-06, "loss": 0.5921, "step": 699 }, { "epoch": 0.5836981446737545, "grad_norm": 0.10300835222005844, "learning_rate": 1.858211733395731e-06, "loss": 0.6647, "step": 700 }, { "epoch": 0.5845319991661455, "grad_norm": 0.09017598628997803, "learning_rate": 1.8577502586187353e-06, "loss": 0.6409, "step": 701 }, { "epoch": 0.5853658536585366, "grad_norm": 0.10058888792991638, "learning_rate": 1.8572880915816259e-06, "loss": 0.5902, "step": 702 }, { "epoch": 0.5861997081509277, "grad_norm": 0.10125189274549484, "learning_rate": 1.856825232657402e-06, "loss": 0.6392, "step": 703 }, { "epoch": 0.5870335626433187, "grad_norm": 0.0983993262052536, "learning_rate": 1.8563616822196204e-06, "loss": 0.5915, "step": 704 }, { "epoch": 0.5878674171357098, "grad_norm": 0.09559126943349838, "learning_rate": 1.8558974406423965e-06, "loss": 0.6386, "step": 705 }, { "epoch": 0.5887012716281009, "grad_norm": 0.09088745713233948, "learning_rate": 1.8554325083004034e-06, "loss": 0.6483, "step": 706 }, { "epoch": 0.589535126120492, "grad_norm": 0.09516370296478271, "learning_rate": 1.854966885568872e-06, "loss": 0.6556, "step": 707 }, { "epoch": 0.590368980612883, "grad_norm": 0.09569456428289413, "learning_rate": 1.8545005728235896e-06, "loss": 0.6394, "step": 708 }, { "epoch": 0.5912028351052742, "grad_norm": 0.0980261042714119, "learning_rate": 1.854033570440901e-06, "loss": 0.6415, "step": 709 }, { "epoch": 0.5920366895976652, "grad_norm": 0.09796703606843948, "learning_rate": 1.8535658787977075e-06, "loss": 0.6736, "step": 710 }, { "epoch": 0.5928705440900562, "grad_norm": 0.09802737832069397, "learning_rate": 1.8530974982714664e-06, "loss": 0.5802, "step": 711 }, { "epoch": 0.5937043985824474, "grad_norm": 0.09888147562742233, "learning_rate": 1.8526284292401914e-06, "loss": 0.6648, "step": 712 }, { "epoch": 0.5945382530748384, "grad_norm": 0.10190586000680923, "learning_rate": 1.8521586720824515e-06, "loss": 0.7032, "step": 713 }, { "epoch": 0.5953721075672295, "grad_norm": 0.09422854334115982, "learning_rate": 1.8516882271773712e-06, "loss": 0.6551, "step": 714 }, { "epoch": 0.5962059620596206, "grad_norm": 0.09025522321462631, "learning_rate": 1.8512170949046305e-06, "loss": 0.6231, "step": 715 }, { "epoch": 0.5970398165520117, "grad_norm": 0.102678582072258, "learning_rate": 1.850745275644463e-06, "loss": 0.6924, "step": 716 }, { "epoch": 0.5978736710444027, "grad_norm": 0.09951562434434891, "learning_rate": 1.8502727697776579e-06, "loss": 0.5763, "step": 717 }, { "epoch": 0.5987075255367938, "grad_norm": 0.10048934072256088, "learning_rate": 1.8497995776855579e-06, "loss": 0.6369, "step": 718 }, { "epoch": 0.5995413800291849, "grad_norm": 0.11139318346977234, "learning_rate": 1.84932569975006e-06, "loss": 0.6086, "step": 719 }, { "epoch": 0.600375234521576, "grad_norm": 0.10424143821001053, "learning_rate": 1.8488511363536138e-06, "loss": 0.6394, "step": 720 }, { "epoch": 0.6012090890139671, "grad_norm": 0.09213969111442566, "learning_rate": 1.8483758878792236e-06, "loss": 0.6554, "step": 721 }, { "epoch": 0.6020429435063581, "grad_norm": 0.0998111367225647, "learning_rate": 1.8478999547104452e-06, "loss": 0.6717, "step": 722 }, { "epoch": 0.6028767979987493, "grad_norm": 0.09211792796850204, "learning_rate": 1.8474233372313877e-06, "loss": 0.5694, "step": 723 }, { "epoch": 0.6037106524911403, "grad_norm": 0.10054522007703781, "learning_rate": 1.8469460358267127e-06, "loss": 0.6501, "step": 724 }, { "epoch": 0.6045445069835313, "grad_norm": 0.09979543089866638, "learning_rate": 1.8464680508816323e-06, "loss": 0.6056, "step": 725 }, { "epoch": 0.6053783614759225, "grad_norm": 0.1000729575753212, "learning_rate": 1.8459893827819126e-06, "loss": 0.6302, "step": 726 }, { "epoch": 0.6062122159683135, "grad_norm": 0.09924089163541794, "learning_rate": 1.8455100319138692e-06, "loss": 0.6432, "step": 727 }, { "epoch": 0.6070460704607046, "grad_norm": 0.0920461118221283, "learning_rate": 1.8450299986643695e-06, "loss": 0.6081, "step": 728 }, { "epoch": 0.6078799249530957, "grad_norm": 0.10430511087179184, "learning_rate": 1.8445492834208316e-06, "loss": 0.6586, "step": 729 }, { "epoch": 0.6087137794454868, "grad_norm": 0.09923563152551651, "learning_rate": 1.8440678865712236e-06, "loss": 0.6166, "step": 730 }, { "epoch": 0.6095476339378778, "grad_norm": 0.1012277603149414, "learning_rate": 1.843585808504064e-06, "loss": 0.6843, "step": 731 }, { "epoch": 0.610381488430269, "grad_norm": 0.09866297990083694, "learning_rate": 1.8431030496084214e-06, "loss": 0.6556, "step": 732 }, { "epoch": 0.61121534292266, "grad_norm": 0.09874875843524933, "learning_rate": 1.8426196102739132e-06, "loss": 0.5993, "step": 733 }, { "epoch": 0.612049197415051, "grad_norm": 0.09275837987661362, "learning_rate": 1.8421354908907067e-06, "loss": 0.6326, "step": 734 }, { "epoch": 0.6128830519074422, "grad_norm": 0.09502032399177551, "learning_rate": 1.8416506918495173e-06, "loss": 0.6077, "step": 735 }, { "epoch": 0.6137169063998332, "grad_norm": 0.09185368567705154, "learning_rate": 1.8411652135416093e-06, "loss": 0.6064, "step": 736 }, { "epoch": 0.6145507608922243, "grad_norm": 0.09815992414951324, "learning_rate": 1.8406790563587956e-06, "loss": 0.6313, "step": 737 }, { "epoch": 0.6153846153846154, "grad_norm": 0.09629546850919724, "learning_rate": 1.8401922206934361e-06, "loss": 0.6106, "step": 738 }, { "epoch": 0.6162184698770065, "grad_norm": 0.09635171294212341, "learning_rate": 1.839704706938439e-06, "loss": 0.6523, "step": 739 }, { "epoch": 0.6170523243693975, "grad_norm": 0.10179682075977325, "learning_rate": 1.8392165154872595e-06, "loss": 0.5873, "step": 740 }, { "epoch": 0.6178861788617886, "grad_norm": 0.10270754247903824, "learning_rate": 1.8387276467338996e-06, "loss": 0.673, "step": 741 }, { "epoch": 0.6187200333541797, "grad_norm": 0.09746810793876648, "learning_rate": 1.8382381010729086e-06, "loss": 0.6564, "step": 742 }, { "epoch": 0.6195538878465707, "grad_norm": 0.0962262824177742, "learning_rate": 1.8377478788993813e-06, "loss": 0.5738, "step": 743 }, { "epoch": 0.6203877423389619, "grad_norm": 0.09518938511610031, "learning_rate": 1.8372569806089586e-06, "loss": 0.6345, "step": 744 }, { "epoch": 0.6212215968313529, "grad_norm": 0.09188467264175415, "learning_rate": 1.8367654065978276e-06, "loss": 0.6112, "step": 745 }, { "epoch": 0.622055451323744, "grad_norm": 0.09746599197387695, "learning_rate": 1.8362731572627202e-06, "loss": 0.6349, "step": 746 }, { "epoch": 0.6228893058161351, "grad_norm": 0.10009641200304031, "learning_rate": 1.8357802330009136e-06, "loss": 0.6502, "step": 747 }, { "epoch": 0.6237231603085261, "grad_norm": 0.10499203950166702, "learning_rate": 1.8352866342102296e-06, "loss": 0.6812, "step": 748 }, { "epoch": 0.6245570148009172, "grad_norm": 0.09757328033447266, "learning_rate": 1.8347923612890349e-06, "loss": 0.6025, "step": 749 }, { "epoch": 0.6253908692933083, "grad_norm": 0.10616346448659897, "learning_rate": 1.8342974146362394e-06, "loss": 0.6638, "step": 750 }, { "epoch": 0.6262247237856994, "grad_norm": 0.10131774097681046, "learning_rate": 1.8338017946512968e-06, "loss": 0.5456, "step": 751 }, { "epoch": 0.6270585782780905, "grad_norm": 0.10244927555322647, "learning_rate": 1.8333055017342054e-06, "loss": 0.6751, "step": 752 }, { "epoch": 0.6278924327704816, "grad_norm": 0.10338141769170761, "learning_rate": 1.8328085362855055e-06, "loss": 0.6647, "step": 753 }, { "epoch": 0.6287262872628726, "grad_norm": 0.10026909410953522, "learning_rate": 1.8323108987062802e-06, "loss": 0.6543, "step": 754 }, { "epoch": 0.6295601417552638, "grad_norm": 0.10641954839229584, "learning_rate": 1.8318125893981553e-06, "loss": 0.6425, "step": 755 }, { "epoch": 0.6303939962476548, "grad_norm": 0.09939148277044296, "learning_rate": 1.8313136087632995e-06, "loss": 0.6531, "step": 756 }, { "epoch": 0.6312278507400458, "grad_norm": 0.10437260568141937, "learning_rate": 1.8308139572044215e-06, "loss": 0.6783, "step": 757 }, { "epoch": 0.632061705232437, "grad_norm": 0.09760341793298721, "learning_rate": 1.830313635124773e-06, "loss": 0.6734, "step": 758 }, { "epoch": 0.632895559724828, "grad_norm": 0.1017068475484848, "learning_rate": 1.8298126429281467e-06, "loss": 0.6171, "step": 759 }, { "epoch": 0.6337294142172191, "grad_norm": 0.09881062060594559, "learning_rate": 1.8293109810188754e-06, "loss": 0.6309, "step": 760 }, { "epoch": 0.6345632687096102, "grad_norm": 0.11102797091007233, "learning_rate": 1.8288086498018326e-06, "loss": 0.6361, "step": 761 }, { "epoch": 0.6353971232020013, "grad_norm": 0.09473263472318649, "learning_rate": 1.828305649682433e-06, "loss": 0.5787, "step": 762 }, { "epoch": 0.6362309776943923, "grad_norm": 0.10742990672588348, "learning_rate": 1.8278019810666293e-06, "loss": 0.6638, "step": 763 }, { "epoch": 0.6370648321867834, "grad_norm": 0.09987856447696686, "learning_rate": 1.8272976443609156e-06, "loss": 0.6054, "step": 764 }, { "epoch": 0.6378986866791745, "grad_norm": 0.09839458018541336, "learning_rate": 1.8267926399723238e-06, "loss": 0.6258, "step": 765 }, { "epoch": 0.6387325411715655, "grad_norm": 0.09883236885070801, "learning_rate": 1.8262869683084258e-06, "loss": 0.6695, "step": 766 }, { "epoch": 0.6395663956639567, "grad_norm": 0.10263945162296295, "learning_rate": 1.825780629777331e-06, "loss": 0.6032, "step": 767 }, { "epoch": 0.6404002501563477, "grad_norm": 0.09623769670724869, "learning_rate": 1.8252736247876874e-06, "loss": 0.6794, "step": 768 }, { "epoch": 0.6412341046487388, "grad_norm": 0.10218116641044617, "learning_rate": 1.8247659537486811e-06, "loss": 0.5969, "step": 769 }, { "epoch": 0.6420679591411299, "grad_norm": 0.09405802190303802, "learning_rate": 1.8242576170700352e-06, "loss": 0.6493, "step": 770 }, { "epoch": 0.642901813633521, "grad_norm": 0.09709781408309937, "learning_rate": 1.8237486151620112e-06, "loss": 0.7054, "step": 771 }, { "epoch": 0.643735668125912, "grad_norm": 0.09584866464138031, "learning_rate": 1.823238948435406e-06, "loss": 0.6299, "step": 772 }, { "epoch": 0.6445695226183031, "grad_norm": 0.09731820970773697, "learning_rate": 1.8227286173015538e-06, "loss": 0.6633, "step": 773 }, { "epoch": 0.6454033771106942, "grad_norm": 0.09664048999547958, "learning_rate": 1.822217622172325e-06, "loss": 0.6075, "step": 774 }, { "epoch": 0.6462372316030852, "grad_norm": 0.0990557000041008, "learning_rate": 1.8217059634601259e-06, "loss": 0.6755, "step": 775 }, { "epoch": 0.6470710860954764, "grad_norm": 0.10071469098329544, "learning_rate": 1.8211936415778983e-06, "loss": 0.6197, "step": 776 }, { "epoch": 0.6479049405878674, "grad_norm": 0.10386556386947632, "learning_rate": 1.820680656939119e-06, "loss": 0.6195, "step": 777 }, { "epoch": 0.6487387950802584, "grad_norm": 0.09966234862804413, "learning_rate": 1.8201670099578001e-06, "loss": 0.6654, "step": 778 }, { "epoch": 0.6495726495726496, "grad_norm": 0.09882418066263199, "learning_rate": 1.819652701048488e-06, "loss": 0.6525, "step": 779 }, { "epoch": 0.6504065040650406, "grad_norm": 0.09504549950361252, "learning_rate": 1.8191377306262633e-06, "loss": 0.6326, "step": 780 }, { "epoch": 0.6512403585574317, "grad_norm": 0.09834848344326019, "learning_rate": 1.8186220991067404e-06, "loss": 0.6417, "step": 781 }, { "epoch": 0.6520742130498228, "grad_norm": 0.09583116322755814, "learning_rate": 1.8181058069060676e-06, "loss": 0.6644, "step": 782 }, { "epoch": 0.6529080675422139, "grad_norm": 0.09287475794553757, "learning_rate": 1.8175888544409264e-06, "loss": 0.6385, "step": 783 }, { "epoch": 0.653741922034605, "grad_norm": 0.10364864766597748, "learning_rate": 1.8170712421285305e-06, "loss": 0.6996, "step": 784 }, { "epoch": 0.6545757765269961, "grad_norm": 0.09863609820604324, "learning_rate": 1.816552970386627e-06, "loss": 0.6272, "step": 785 }, { "epoch": 0.6554096310193871, "grad_norm": 0.10557069629430771, "learning_rate": 1.8160340396334949e-06, "loss": 0.6318, "step": 786 }, { "epoch": 0.6562434855117782, "grad_norm": 0.10506842285394669, "learning_rate": 1.8155144502879443e-06, "loss": 0.6557, "step": 787 }, { "epoch": 0.6570773400041693, "grad_norm": 0.09928935766220093, "learning_rate": 1.8149942027693182e-06, "loss": 0.6832, "step": 788 }, { "epoch": 0.6579111944965603, "grad_norm": 0.09821849316358566, "learning_rate": 1.8144732974974902e-06, "loss": 0.6342, "step": 789 }, { "epoch": 0.6587450489889515, "grad_norm": 0.09966670721769333, "learning_rate": 1.8139517348928637e-06, "loss": 0.6321, "step": 790 }, { "epoch": 0.6595789034813425, "grad_norm": 0.10439924150705338, "learning_rate": 1.8134295153763745e-06, "loss": 0.6552, "step": 791 }, { "epoch": 0.6604127579737336, "grad_norm": 0.09848225861787796, "learning_rate": 1.8129066393694877e-06, "loss": 0.593, "step": 792 }, { "epoch": 0.6612466124661247, "grad_norm": 0.09793344885110855, "learning_rate": 1.8123831072941974e-06, "loss": 0.6406, "step": 793 }, { "epoch": 0.6620804669585157, "grad_norm": 0.0945219025015831, "learning_rate": 1.8118589195730285e-06, "loss": 0.6118, "step": 794 }, { "epoch": 0.6629143214509068, "grad_norm": 0.10230748355388641, "learning_rate": 1.8113340766290343e-06, "loss": 0.65, "step": 795 }, { "epoch": 0.6637481759432979, "grad_norm": 0.10141695290803909, "learning_rate": 1.8108085788857973e-06, "loss": 0.6109, "step": 796 }, { "epoch": 0.664582030435689, "grad_norm": 0.11011820286512375, "learning_rate": 1.8102824267674282e-06, "loss": 0.6531, "step": 797 }, { "epoch": 0.66541588492808, "grad_norm": 0.09687966108322144, "learning_rate": 1.8097556206985658e-06, "loss": 0.649, "step": 798 }, { "epoch": 0.6662497394204712, "grad_norm": 0.09653454273939133, "learning_rate": 1.8092281611043764e-06, "loss": 0.6487, "step": 799 }, { "epoch": 0.6670835939128622, "grad_norm": 0.10373052954673767, "learning_rate": 1.808700048410555e-06, "loss": 0.6221, "step": 800 }, { "epoch": 0.6670835939128622, "eval_loss": 0.6415942907333374, "eval_runtime": 321.6008, "eval_samples_per_second": 16.738, "eval_steps_per_second": 2.792, "step": 800 }, { "epoch": 0.6679174484052532, "grad_norm": 0.10485277324914932, "learning_rate": 1.8081712830433224e-06, "loss": 0.6176, "step": 801 }, { "epoch": 0.6687513028976444, "grad_norm": 0.10105489194393158, "learning_rate": 1.8076418654294266e-06, "loss": 0.6708, "step": 802 }, { "epoch": 0.6695851573900354, "grad_norm": 0.1096423864364624, "learning_rate": 1.8071117959961416e-06, "loss": 0.601, "step": 803 }, { "epoch": 0.6704190118824265, "grad_norm": 0.10762202739715576, "learning_rate": 1.8065810751712682e-06, "loss": 0.6567, "step": 804 }, { "epoch": 0.6712528663748176, "grad_norm": 0.1107725277543068, "learning_rate": 1.8060497033831324e-06, "loss": 0.5889, "step": 805 }, { "epoch": 0.6720867208672087, "grad_norm": 0.10468114167451859, "learning_rate": 1.8055176810605857e-06, "loss": 0.5847, "step": 806 }, { "epoch": 0.6729205753595997, "grad_norm": 0.11013887077569962, "learning_rate": 1.8049850086330048e-06, "loss": 0.6008, "step": 807 }, { "epoch": 0.6737544298519909, "grad_norm": 0.1043848916888237, "learning_rate": 1.8044516865302908e-06, "loss": 0.6368, "step": 808 }, { "epoch": 0.6745882843443819, "grad_norm": 0.09962385892868042, "learning_rate": 1.8039177151828691e-06, "loss": 0.5842, "step": 809 }, { "epoch": 0.6754221388367729, "grad_norm": 0.1098175197839737, "learning_rate": 1.8033830950216897e-06, "loss": 0.6397, "step": 810 }, { "epoch": 0.6762559933291641, "grad_norm": 0.10420270264148712, "learning_rate": 1.8028478264782252e-06, "loss": 0.6131, "step": 811 }, { "epoch": 0.6770898478215551, "grad_norm": 0.10396002978086472, "learning_rate": 1.8023119099844723e-06, "loss": 0.6499, "step": 812 }, { "epoch": 0.6779237023139462, "grad_norm": 0.09836006909608841, "learning_rate": 1.8017753459729504e-06, "loss": 0.5962, "step": 813 }, { "epoch": 0.6787575568063373, "grad_norm": 0.11005699634552002, "learning_rate": 1.8012381348767014e-06, "loss": 0.6202, "step": 814 }, { "epoch": 0.6795914112987284, "grad_norm": 0.10937905311584473, "learning_rate": 1.8007002771292896e-06, "loss": 0.6053, "step": 815 }, { "epoch": 0.6804252657911195, "grad_norm": 0.10817151516675949, "learning_rate": 1.800161773164801e-06, "loss": 0.6713, "step": 816 }, { "epoch": 0.6812591202835105, "grad_norm": 0.10142417997121811, "learning_rate": 1.7996226234178434e-06, "loss": 0.6166, "step": 817 }, { "epoch": 0.6820929747759016, "grad_norm": 0.10415297746658325, "learning_rate": 1.799082828323545e-06, "loss": 0.6912, "step": 818 }, { "epoch": 0.6829268292682927, "grad_norm": 0.09985463321208954, "learning_rate": 1.7985423883175565e-06, "loss": 0.6694, "step": 819 }, { "epoch": 0.6837606837606838, "grad_norm": 0.1025400459766388, "learning_rate": 1.7980013038360475e-06, "loss": 0.6464, "step": 820 }, { "epoch": 0.6845945382530748, "grad_norm": 0.09324987977743149, "learning_rate": 1.7974595753157082e-06, "loss": 0.6251, "step": 821 }, { "epoch": 0.685428392745466, "grad_norm": 0.1065581887960434, "learning_rate": 1.796917203193749e-06, "loss": 0.6427, "step": 822 }, { "epoch": 0.686262247237857, "grad_norm": 0.1113988533616066, "learning_rate": 1.7963741879078985e-06, "loss": 0.6652, "step": 823 }, { "epoch": 0.687096101730248, "grad_norm": 0.09690185636281967, "learning_rate": 1.7958305298964066e-06, "loss": 0.6294, "step": 824 }, { "epoch": 0.6879299562226392, "grad_norm": 0.10385391116142273, "learning_rate": 1.7952862295980397e-06, "loss": 0.611, "step": 825 }, { "epoch": 0.6887638107150302, "grad_norm": 0.10155371576547623, "learning_rate": 1.794741287452084e-06, "loss": 0.6268, "step": 826 }, { "epoch": 0.6895976652074213, "grad_norm": 0.10731323808431625, "learning_rate": 1.7941957038983425e-06, "loss": 0.6294, "step": 827 }, { "epoch": 0.6904315196998124, "grad_norm": 0.0989241898059845, "learning_rate": 1.7936494793771369e-06, "loss": 0.6739, "step": 828 }, { "epoch": 0.6912653741922035, "grad_norm": 0.10223259776830673, "learning_rate": 1.7931026143293059e-06, "loss": 0.5857, "step": 829 }, { "epoch": 0.6920992286845945, "grad_norm": 0.10547658056020737, "learning_rate": 1.792555109196205e-06, "loss": 0.6864, "step": 830 }, { "epoch": 0.6929330831769857, "grad_norm": 0.1027214452624321, "learning_rate": 1.7920069644197063e-06, "loss": 0.5857, "step": 831 }, { "epoch": 0.6937669376693767, "grad_norm": 0.10017300397157669, "learning_rate": 1.7914581804421983e-06, "loss": 0.6466, "step": 832 }, { "epoch": 0.6946007921617677, "grad_norm": 0.09962292015552521, "learning_rate": 1.7909087577065851e-06, "loss": 0.5917, "step": 833 }, { "epoch": 0.6954346466541589, "grad_norm": 0.10357961803674698, "learning_rate": 1.7903586966562868e-06, "loss": 0.6382, "step": 834 }, { "epoch": 0.6962685011465499, "grad_norm": 0.1033301129937172, "learning_rate": 1.789807997735238e-06, "loss": 0.624, "step": 835 }, { "epoch": 0.697102355638941, "grad_norm": 0.09935612231492996, "learning_rate": 1.789256661387889e-06, "loss": 0.6475, "step": 836 }, { "epoch": 0.6979362101313321, "grad_norm": 0.09738706797361374, "learning_rate": 1.7887046880592033e-06, "loss": 0.652, "step": 837 }, { "epoch": 0.6987700646237232, "grad_norm": 0.10389488190412521, "learning_rate": 1.7881520781946597e-06, "loss": 0.6755, "step": 838 }, { "epoch": 0.6996039191161142, "grad_norm": 0.09787558019161224, "learning_rate": 1.7875988322402501e-06, "loss": 0.618, "step": 839 }, { "epoch": 0.7004377736085053, "grad_norm": 0.09852594137191772, "learning_rate": 1.78704495064248e-06, "loss": 0.618, "step": 840 }, { "epoch": 0.7012716281008964, "grad_norm": 0.09819158911705017, "learning_rate": 1.7864904338483675e-06, "loss": 0.5994, "step": 841 }, { "epoch": 0.7021054825932874, "grad_norm": 0.10942944139242172, "learning_rate": 1.7859352823054437e-06, "loss": 0.6405, "step": 842 }, { "epoch": 0.7029393370856786, "grad_norm": 0.10699477791786194, "learning_rate": 1.7853794964617522e-06, "loss": 0.6444, "step": 843 }, { "epoch": 0.7037731915780696, "grad_norm": 0.10151588916778564, "learning_rate": 1.784823076765848e-06, "loss": 0.6233, "step": 844 }, { "epoch": 0.7046070460704607, "grad_norm": 0.10147272795438766, "learning_rate": 1.7842660236667974e-06, "loss": 0.6606, "step": 845 }, { "epoch": 0.7054409005628518, "grad_norm": 0.1031784638762474, "learning_rate": 1.7837083376141796e-06, "loss": 0.6332, "step": 846 }, { "epoch": 0.7062747550552428, "grad_norm": 0.10318465530872345, "learning_rate": 1.7831500190580823e-06, "loss": 0.5944, "step": 847 }, { "epoch": 0.707108609547634, "grad_norm": 0.10107962787151337, "learning_rate": 1.7825910684491054e-06, "loss": 0.686, "step": 848 }, { "epoch": 0.707942464040025, "grad_norm": 0.1008550301194191, "learning_rate": 1.7820314862383584e-06, "loss": 0.6356, "step": 849 }, { "epoch": 0.7087763185324161, "grad_norm": 0.1049947738647461, "learning_rate": 1.7814712728774598e-06, "loss": 0.6436, "step": 850 }, { "epoch": 0.7096101730248072, "grad_norm": 0.10559200495481491, "learning_rate": 1.7809104288185389e-06, "loss": 0.6522, "step": 851 }, { "epoch": 0.7104440275171983, "grad_norm": 0.09807837009429932, "learning_rate": 1.7803489545142325e-06, "loss": 0.6252, "step": 852 }, { "epoch": 0.7112778820095893, "grad_norm": 0.1072913408279419, "learning_rate": 1.7797868504176874e-06, "loss": 0.6483, "step": 853 }, { "epoch": 0.7121117365019805, "grad_norm": 0.09987325221300125, "learning_rate": 1.7792241169825579e-06, "loss": 0.5955, "step": 854 }, { "epoch": 0.7129455909943715, "grad_norm": 0.10160518437623978, "learning_rate": 1.778660754663006e-06, "loss": 0.6006, "step": 855 }, { "epoch": 0.7137794454867625, "grad_norm": 0.10053478926420212, "learning_rate": 1.7780967639137025e-06, "loss": 0.6583, "step": 856 }, { "epoch": 0.7146132999791537, "grad_norm": 0.10520876199007034, "learning_rate": 1.777532145189824e-06, "loss": 0.6061, "step": 857 }, { "epoch": 0.7154471544715447, "grad_norm": 0.10797824710607529, "learning_rate": 1.776966898947054e-06, "loss": 0.6764, "step": 858 }, { "epoch": 0.7162810089639358, "grad_norm": 0.10396280139684677, "learning_rate": 1.7764010256415837e-06, "loss": 0.6302, "step": 859 }, { "epoch": 0.7171148634563269, "grad_norm": 0.10683812946081161, "learning_rate": 1.7758345257301094e-06, "loss": 0.6618, "step": 860 }, { "epoch": 0.717948717948718, "grad_norm": 0.09601815789937973, "learning_rate": 1.7752673996698326e-06, "loss": 0.6167, "step": 861 }, { "epoch": 0.718782572441109, "grad_norm": 0.1079709529876709, "learning_rate": 1.774699647918462e-06, "loss": 0.6085, "step": 862 }, { "epoch": 0.7196164269335001, "grad_norm": 0.10923435539007187, "learning_rate": 1.774131270934209e-06, "loss": 0.6492, "step": 863 }, { "epoch": 0.7204502814258912, "grad_norm": 0.10494917631149292, "learning_rate": 1.7735622691757912e-06, "loss": 0.6281, "step": 864 }, { "epoch": 0.7212841359182822, "grad_norm": 0.10305362194776535, "learning_rate": 1.7729926431024301e-06, "loss": 0.6381, "step": 865 }, { "epoch": 0.7221179904106734, "grad_norm": 0.1005750373005867, "learning_rate": 1.7724223931738505e-06, "loss": 0.667, "step": 866 }, { "epoch": 0.7229518449030644, "grad_norm": 0.10202402621507645, "learning_rate": 1.7718515198502812e-06, "loss": 0.6685, "step": 867 }, { "epoch": 0.7237856993954555, "grad_norm": 0.10256760567426682, "learning_rate": 1.7712800235924546e-06, "loss": 0.6728, "step": 868 }, { "epoch": 0.7246195538878466, "grad_norm": 0.1020650714635849, "learning_rate": 1.7707079048616046e-06, "loss": 0.6144, "step": 869 }, { "epoch": 0.7254534083802376, "grad_norm": 0.10101509839296341, "learning_rate": 1.770135164119468e-06, "loss": 0.6244, "step": 870 }, { "epoch": 0.7262872628726287, "grad_norm": 0.09737613797187805, "learning_rate": 1.769561801828284e-06, "loss": 0.6507, "step": 871 }, { "epoch": 0.7271211173650198, "grad_norm": 0.09866712987422943, "learning_rate": 1.7689878184507937e-06, "loss": 0.5875, "step": 872 }, { "epoch": 0.7279549718574109, "grad_norm": 0.0963241383433342, "learning_rate": 1.7684132144502382e-06, "loss": 0.6237, "step": 873 }, { "epoch": 0.7287888263498019, "grad_norm": 0.09706535190343857, "learning_rate": 1.7678379902903603e-06, "loss": 0.5665, "step": 874 }, { "epoch": 0.7296226808421931, "grad_norm": 0.09960032999515533, "learning_rate": 1.7672621464354034e-06, "loss": 0.6288, "step": 875 }, { "epoch": 0.7304565353345841, "grad_norm": 0.10458897799253464, "learning_rate": 1.766685683350111e-06, "loss": 0.6148, "step": 876 }, { "epoch": 0.7312903898269751, "grad_norm": 0.10443190485239029, "learning_rate": 1.766108601499726e-06, "loss": 0.6248, "step": 877 }, { "epoch": 0.7321242443193663, "grad_norm": 0.10145988315343857, "learning_rate": 1.765530901349991e-06, "loss": 0.6117, "step": 878 }, { "epoch": 0.7329580988117573, "grad_norm": 0.10847010463476181, "learning_rate": 1.7649525833671474e-06, "loss": 0.7046, "step": 879 }, { "epoch": 0.7337919533041485, "grad_norm": 0.10503443330526352, "learning_rate": 1.7643736480179352e-06, "loss": 0.6176, "step": 880 }, { "epoch": 0.7346258077965395, "grad_norm": 0.10929647833108902, "learning_rate": 1.7637940957695934e-06, "loss": 0.6434, "step": 881 }, { "epoch": 0.7354596622889306, "grad_norm": 0.10153260827064514, "learning_rate": 1.7632139270898576e-06, "loss": 0.638, "step": 882 }, { "epoch": 0.7362935167813217, "grad_norm": 0.10493889451026917, "learning_rate": 1.7626331424469615e-06, "loss": 0.6517, "step": 883 }, { "epoch": 0.7371273712737128, "grad_norm": 0.09808988124132156, "learning_rate": 1.7620517423096368e-06, "loss": 0.6179, "step": 884 }, { "epoch": 0.7379612257661038, "grad_norm": 0.10479886084794998, "learning_rate": 1.7614697271471103e-06, "loss": 0.6419, "step": 885 }, { "epoch": 0.7387950802584949, "grad_norm": 0.09941962361335754, "learning_rate": 1.7608870974291065e-06, "loss": 0.6128, "step": 886 }, { "epoch": 0.739628934750886, "grad_norm": 0.10169458389282227, "learning_rate": 1.7603038536258453e-06, "loss": 0.609, "step": 887 }, { "epoch": 0.740462789243277, "grad_norm": 0.09726240485906601, "learning_rate": 1.7597199962080423e-06, "loss": 0.6371, "step": 888 }, { "epoch": 0.7412966437356682, "grad_norm": 0.10218486189842224, "learning_rate": 1.759135525646908e-06, "loss": 0.5768, "step": 889 }, { "epoch": 0.7421304982280592, "grad_norm": 0.10321173816919327, "learning_rate": 1.7585504424141483e-06, "loss": 0.6007, "step": 890 }, { "epoch": 0.7429643527204502, "grad_norm": 0.10717494040727615, "learning_rate": 1.7579647469819631e-06, "loss": 0.6353, "step": 891 }, { "epoch": 0.7437982072128414, "grad_norm": 0.1051764264702797, "learning_rate": 1.7573784398230474e-06, "loss": 0.6098, "step": 892 }, { "epoch": 0.7446320617052324, "grad_norm": 0.10321137309074402, "learning_rate": 1.7567915214105881e-06, "loss": 0.5986, "step": 893 }, { "epoch": 0.7454659161976235, "grad_norm": 0.10979589819908142, "learning_rate": 1.7562039922182671e-06, "loss": 0.6518, "step": 894 }, { "epoch": 0.7462997706900146, "grad_norm": 0.10153292864561081, "learning_rate": 1.7556158527202585e-06, "loss": 0.6358, "step": 895 }, { "epoch": 0.7471336251824057, "grad_norm": 0.10513276606798172, "learning_rate": 1.7550271033912287e-06, "loss": 0.6085, "step": 896 }, { "epoch": 0.7479674796747967, "grad_norm": 0.10275143384933472, "learning_rate": 1.7544377447063372e-06, "loss": 0.5748, "step": 897 }, { "epoch": 0.7488013341671879, "grad_norm": 0.10438092797994614, "learning_rate": 1.7538477771412339e-06, "loss": 0.5943, "step": 898 }, { "epoch": 0.7496351886595789, "grad_norm": 0.1010277271270752, "learning_rate": 1.7532572011720617e-06, "loss": 0.6112, "step": 899 }, { "epoch": 0.7504690431519699, "grad_norm": 0.10229716449975967, "learning_rate": 1.752666017275453e-06, "loss": 0.5861, "step": 900 }, { "epoch": 0.7513028976443611, "grad_norm": 0.10199355334043503, "learning_rate": 1.7520742259285323e-06, "loss": 0.6388, "step": 901 }, { "epoch": 0.7521367521367521, "grad_norm": 0.10268741846084595, "learning_rate": 1.7514818276089128e-06, "loss": 0.6875, "step": 902 }, { "epoch": 0.7529706066291432, "grad_norm": 0.0992840975522995, "learning_rate": 1.7508888227946992e-06, "loss": 0.6756, "step": 903 }, { "epoch": 0.7538044611215343, "grad_norm": 0.09615278989076614, "learning_rate": 1.7502952119644845e-06, "loss": 0.6474, "step": 904 }, { "epoch": 0.7546383156139254, "grad_norm": 0.10315293818712234, "learning_rate": 1.749700995597351e-06, "loss": 0.6127, "step": 905 }, { "epoch": 0.7554721701063164, "grad_norm": 0.10551444441080093, "learning_rate": 1.7491061741728702e-06, "loss": 0.6357, "step": 906 }, { "epoch": 0.7563060245987075, "grad_norm": 0.10280577838420868, "learning_rate": 1.748510748171101e-06, "loss": 0.6045, "step": 907 }, { "epoch": 0.7571398790910986, "grad_norm": 0.09516436606645584, "learning_rate": 1.7479147180725912e-06, "loss": 0.6368, "step": 908 }, { "epoch": 0.7579737335834896, "grad_norm": 0.10799692571163177, "learning_rate": 1.7473180843583762e-06, "loss": 0.6669, "step": 909 }, { "epoch": 0.7588075880758808, "grad_norm": 0.1064273938536644, "learning_rate": 1.7467208475099774e-06, "loss": 0.6481, "step": 910 }, { "epoch": 0.7596414425682718, "grad_norm": 0.1054486557841301, "learning_rate": 1.7461230080094043e-06, "loss": 0.6108, "step": 911 }, { "epoch": 0.760475297060663, "grad_norm": 0.10060249269008636, "learning_rate": 1.7455245663391516e-06, "loss": 0.5604, "step": 912 }, { "epoch": 0.761309151553054, "grad_norm": 0.1015784963965416, "learning_rate": 1.7449255229822008e-06, "loss": 0.6248, "step": 913 }, { "epoch": 0.762143006045445, "grad_norm": 0.10114108771085739, "learning_rate": 1.744325878422019e-06, "loss": 0.6683, "step": 914 }, { "epoch": 0.7629768605378362, "grad_norm": 0.10236384719610214, "learning_rate": 1.7437256331425576e-06, "loss": 0.657, "step": 915 }, { "epoch": 0.7638107150302272, "grad_norm": 0.10892149806022644, "learning_rate": 1.743124787628254e-06, "loss": 0.6434, "step": 916 }, { "epoch": 0.7646445695226183, "grad_norm": 0.10639214515686035, "learning_rate": 1.7425233423640298e-06, "loss": 0.5859, "step": 917 }, { "epoch": 0.7654784240150094, "grad_norm": 0.10279032588005066, "learning_rate": 1.7419212978352898e-06, "loss": 0.6779, "step": 918 }, { "epoch": 0.7663122785074005, "grad_norm": 0.11178586632013321, "learning_rate": 1.741318654527923e-06, "loss": 0.6827, "step": 919 }, { "epoch": 0.7671461329997915, "grad_norm": 0.1018877848982811, "learning_rate": 1.7407154129283019e-06, "loss": 0.6423, "step": 920 }, { "epoch": 0.7679799874921827, "grad_norm": 0.1052117571234703, "learning_rate": 1.7401115735232817e-06, "loss": 0.6871, "step": 921 }, { "epoch": 0.7688138419845737, "grad_norm": 0.10656667500734329, "learning_rate": 1.7395071368001997e-06, "loss": 0.6384, "step": 922 }, { "epoch": 0.7696476964769647, "grad_norm": 0.09973619878292084, "learning_rate": 1.738902103246876e-06, "loss": 0.5659, "step": 923 }, { "epoch": 0.7704815509693559, "grad_norm": 0.10072916746139526, "learning_rate": 1.7382964733516112e-06, "loss": 0.6315, "step": 924 }, { "epoch": 0.7713154054617469, "grad_norm": 0.11351827532052994, "learning_rate": 1.7376902476031882e-06, "loss": 0.7005, "step": 925 }, { "epoch": 0.772149259954138, "grad_norm": 0.1084694042801857, "learning_rate": 1.7370834264908711e-06, "loss": 0.6077, "step": 926 }, { "epoch": 0.7729831144465291, "grad_norm": 0.10465063899755478, "learning_rate": 1.7364760105044033e-06, "loss": 0.5937, "step": 927 }, { "epoch": 0.7738169689389202, "grad_norm": 0.09937559068202972, "learning_rate": 1.7358680001340092e-06, "loss": 0.5854, "step": 928 }, { "epoch": 0.7746508234313112, "grad_norm": 0.11517240107059479, "learning_rate": 1.7352593958703921e-06, "loss": 0.6738, "step": 929 }, { "epoch": 0.7754846779237023, "grad_norm": 0.10875809192657471, "learning_rate": 1.734650198204736e-06, "loss": 0.6487, "step": 930 }, { "epoch": 0.7763185324160934, "grad_norm": 0.10209451615810394, "learning_rate": 1.7340404076287021e-06, "loss": 0.6277, "step": 931 }, { "epoch": 0.7771523869084844, "grad_norm": 0.11190709471702576, "learning_rate": 1.7334300246344316e-06, "loss": 0.6393, "step": 932 }, { "epoch": 0.7779862414008756, "grad_norm": 0.10622856765985489, "learning_rate": 1.7328190497145427e-06, "loss": 0.6665, "step": 933 }, { "epoch": 0.7788200958932666, "grad_norm": 0.11038866639137268, "learning_rate": 1.7322074833621318e-06, "loss": 0.6092, "step": 934 }, { "epoch": 0.7796539503856577, "grad_norm": 0.1060342788696289, "learning_rate": 1.7315953260707735e-06, "loss": 0.6427, "step": 935 }, { "epoch": 0.7804878048780488, "grad_norm": 0.10829740762710571, "learning_rate": 1.7309825783345174e-06, "loss": 0.6851, "step": 936 }, { "epoch": 0.7813216593704398, "grad_norm": 0.1001635491847992, "learning_rate": 1.7303692406478908e-06, "loss": 0.5645, "step": 937 }, { "epoch": 0.7821555138628309, "grad_norm": 0.09842494130134583, "learning_rate": 1.7297553135058978e-06, "loss": 0.613, "step": 938 }, { "epoch": 0.782989368355222, "grad_norm": 0.10335814207792282, "learning_rate": 1.7291407974040167e-06, "loss": 0.6583, "step": 939 }, { "epoch": 0.7838232228476131, "grad_norm": 0.10193807631731033, "learning_rate": 1.728525692838202e-06, "loss": 0.6242, "step": 940 }, { "epoch": 0.7846570773400041, "grad_norm": 0.1064397543668747, "learning_rate": 1.7279100003048832e-06, "loss": 0.7043, "step": 941 }, { "epoch": 0.7854909318323953, "grad_norm": 0.09790313243865967, "learning_rate": 1.7272937203009642e-06, "loss": 0.5869, "step": 942 }, { "epoch": 0.7863247863247863, "grad_norm": 0.10425697267055511, "learning_rate": 1.7266768533238225e-06, "loss": 0.6282, "step": 943 }, { "epoch": 0.7871586408171775, "grad_norm": 0.10319948196411133, "learning_rate": 1.72605939987131e-06, "loss": 0.5899, "step": 944 }, { "epoch": 0.7879924953095685, "grad_norm": 0.10117416828870773, "learning_rate": 1.725441360441752e-06, "loss": 0.5734, "step": 945 }, { "epoch": 0.7888263498019595, "grad_norm": 0.10920300334692001, "learning_rate": 1.7248227355339458e-06, "loss": 0.612, "step": 946 }, { "epoch": 0.7896602042943507, "grad_norm": 0.0993722602725029, "learning_rate": 1.7242035256471623e-06, "loss": 0.5778, "step": 947 }, { "epoch": 0.7904940587867417, "grad_norm": 0.12078487873077393, "learning_rate": 1.723583731281144e-06, "loss": 0.7115, "step": 948 }, { "epoch": 0.7913279132791328, "grad_norm": 0.10817496478557587, "learning_rate": 1.7229633529361051e-06, "loss": 0.6699, "step": 949 }, { "epoch": 0.7921617677715239, "grad_norm": 0.10841819643974304, "learning_rate": 1.7223423911127313e-06, "loss": 0.6089, "step": 950 }, { "epoch": 0.792995622263915, "grad_norm": 0.10630793124437332, "learning_rate": 1.7217208463121788e-06, "loss": 0.6615, "step": 951 }, { "epoch": 0.793829476756306, "grad_norm": 0.1000799611210823, "learning_rate": 1.721098719036075e-06, "loss": 0.6231, "step": 952 }, { "epoch": 0.7946633312486971, "grad_norm": 0.11009353399276733, "learning_rate": 1.7204760097865167e-06, "loss": 0.6197, "step": 953 }, { "epoch": 0.7954971857410882, "grad_norm": 0.10809972882270813, "learning_rate": 1.7198527190660706e-06, "loss": 0.5975, "step": 954 }, { "epoch": 0.7963310402334792, "grad_norm": 0.10163717716932297, "learning_rate": 1.719228847377773e-06, "loss": 0.7125, "step": 955 }, { "epoch": 0.7971648947258704, "grad_norm": 0.1121022030711174, "learning_rate": 1.7186043952251286e-06, "loss": 0.6821, "step": 956 }, { "epoch": 0.7979987492182614, "grad_norm": 0.11300718039274216, "learning_rate": 1.7179793631121106e-06, "loss": 0.6137, "step": 957 }, { "epoch": 0.7988326037106525, "grad_norm": 0.10873489826917648, "learning_rate": 1.717353751543161e-06, "loss": 0.5621, "step": 958 }, { "epoch": 0.7996664582030436, "grad_norm": 0.11031791567802429, "learning_rate": 1.7167275610231887e-06, "loss": 0.6363, "step": 959 }, { "epoch": 0.8005003126954346, "grad_norm": 0.10201506316661835, "learning_rate": 1.7161007920575704e-06, "loss": 0.5696, "step": 960 }, { "epoch": 0.8013341671878257, "grad_norm": 0.09877403825521469, "learning_rate": 1.7154734451521486e-06, "loss": 0.6027, "step": 961 }, { "epoch": 0.8021680216802168, "grad_norm": 0.1055438295006752, "learning_rate": 1.7148455208132334e-06, "loss": 0.6204, "step": 962 }, { "epoch": 0.8030018761726079, "grad_norm": 0.11029175668954849, "learning_rate": 1.7142170195476005e-06, "loss": 0.5826, "step": 963 }, { "epoch": 0.8038357306649989, "grad_norm": 0.1105756014585495, "learning_rate": 1.7135879418624913e-06, "loss": 0.6526, "step": 964 }, { "epoch": 0.8046695851573901, "grad_norm": 0.1054423451423645, "learning_rate": 1.712958288265612e-06, "loss": 0.6254, "step": 965 }, { "epoch": 0.8055034396497811, "grad_norm": 0.10613211989402771, "learning_rate": 1.712328059265134e-06, "loss": 0.5974, "step": 966 }, { "epoch": 0.8063372941421721, "grad_norm": 0.09556613117456436, "learning_rate": 1.7116972553696932e-06, "loss": 0.6144, "step": 967 }, { "epoch": 0.8071711486345633, "grad_norm": 0.10485559701919556, "learning_rate": 1.711065877088389e-06, "loss": 0.5998, "step": 968 }, { "epoch": 0.8080050031269543, "grad_norm": 0.10885662585496902, "learning_rate": 1.7104339249307848e-06, "loss": 0.6298, "step": 969 }, { "epoch": 0.8088388576193454, "grad_norm": 0.10140252858400345, "learning_rate": 1.709801399406907e-06, "loss": 0.5747, "step": 970 }, { "epoch": 0.8096727121117365, "grad_norm": 0.09874554723501205, "learning_rate": 1.7091683010272446e-06, "loss": 0.5676, "step": 971 }, { "epoch": 0.8105065666041276, "grad_norm": 0.10777262598276138, "learning_rate": 1.7085346303027493e-06, "loss": 0.6326, "step": 972 }, { "epoch": 0.8113404210965186, "grad_norm": 0.09974364936351776, "learning_rate": 1.7079003877448344e-06, "loss": 0.5638, "step": 973 }, { "epoch": 0.8121742755889098, "grad_norm": 0.10661293566226959, "learning_rate": 1.7072655738653745e-06, "loss": 0.6641, "step": 974 }, { "epoch": 0.8130081300813008, "grad_norm": 0.10669861733913422, "learning_rate": 1.7066301891767061e-06, "loss": 0.6478, "step": 975 }, { "epoch": 0.813841984573692, "grad_norm": 0.10368253290653229, "learning_rate": 1.7059942341916256e-06, "loss": 0.6198, "step": 976 }, { "epoch": 0.814675839066083, "grad_norm": 0.10855797678232193, "learning_rate": 1.7053577094233897e-06, "loss": 0.6453, "step": 977 }, { "epoch": 0.815509693558474, "grad_norm": 0.11007635295391083, "learning_rate": 1.7047206153857156e-06, "loss": 0.5748, "step": 978 }, { "epoch": 0.8163435480508652, "grad_norm": 0.10489246994256973, "learning_rate": 1.7040829525927796e-06, "loss": 0.579, "step": 979 }, { "epoch": 0.8171774025432562, "grad_norm": 0.11224298179149628, "learning_rate": 1.7034447215592164e-06, "loss": 0.6484, "step": 980 }, { "epoch": 0.8180112570356473, "grad_norm": 0.10526615381240845, "learning_rate": 1.7028059228001204e-06, "loss": 0.6746, "step": 981 }, { "epoch": 0.8188451115280384, "grad_norm": 0.10187442600727081, "learning_rate": 1.7021665568310435e-06, "loss": 0.6462, "step": 982 }, { "epoch": 0.8196789660204294, "grad_norm": 0.10846269875764847, "learning_rate": 1.7015266241679952e-06, "loss": 0.6329, "step": 983 }, { "epoch": 0.8205128205128205, "grad_norm": 0.110533706843853, "learning_rate": 1.7008861253274429e-06, "loss": 0.658, "step": 984 }, { "epoch": 0.8213466750052116, "grad_norm": 0.10688811540603638, "learning_rate": 1.7002450608263107e-06, "loss": 0.5996, "step": 985 }, { "epoch": 0.8221805294976027, "grad_norm": 0.10414768755435944, "learning_rate": 1.6996034311819796e-06, "loss": 0.6333, "step": 986 }, { "epoch": 0.8230143839899937, "grad_norm": 0.0999814048409462, "learning_rate": 1.698961236912286e-06, "loss": 0.6609, "step": 987 }, { "epoch": 0.8238482384823849, "grad_norm": 0.10203441977500916, "learning_rate": 1.6983184785355222e-06, "loss": 0.6095, "step": 988 }, { "epoch": 0.8246820929747759, "grad_norm": 0.10571661591529846, "learning_rate": 1.6976751565704362e-06, "loss": 0.6125, "step": 989 }, { "epoch": 0.8255159474671669, "grad_norm": 0.10715372860431671, "learning_rate": 1.6970312715362304e-06, "loss": 0.689, "step": 990 }, { "epoch": 0.8263498019595581, "grad_norm": 0.10807687044143677, "learning_rate": 1.696386823952562e-06, "loss": 0.611, "step": 991 }, { "epoch": 0.8271836564519491, "grad_norm": 0.10625305026769638, "learning_rate": 1.6957418143395418e-06, "loss": 0.6616, "step": 992 }, { "epoch": 0.8280175109443402, "grad_norm": 0.10512302070856094, "learning_rate": 1.6950962432177348e-06, "loss": 0.6147, "step": 993 }, { "epoch": 0.8288513654367313, "grad_norm": 0.10524202138185501, "learning_rate": 1.6944501111081579e-06, "loss": 0.6094, "step": 994 }, { "epoch": 0.8296852199291224, "grad_norm": 0.10839590430259705, "learning_rate": 1.6938034185322828e-06, "loss": 0.5816, "step": 995 }, { "epoch": 0.8305190744215134, "grad_norm": 0.10930492728948593, "learning_rate": 1.6931561660120312e-06, "loss": 0.6621, "step": 996 }, { "epoch": 0.8313529289139046, "grad_norm": 0.10111601650714874, "learning_rate": 1.692508354069779e-06, "loss": 0.6083, "step": 997 }, { "epoch": 0.8321867834062956, "grad_norm": 0.11010827124118805, "learning_rate": 1.691859983228352e-06, "loss": 0.6689, "step": 998 }, { "epoch": 0.8330206378986866, "grad_norm": 0.10622293502092361, "learning_rate": 1.6912110540110272e-06, "loss": 0.6142, "step": 999 }, { "epoch": 0.8338544923910778, "grad_norm": 0.10322947055101395, "learning_rate": 1.6905615669415325e-06, "loss": 0.5797, "step": 1000 }, { "epoch": 0.8346883468834688, "grad_norm": 0.11376485228538513, "learning_rate": 1.689911522544047e-06, "loss": 0.6323, "step": 1001 }, { "epoch": 0.8355222013758599, "grad_norm": 0.10336299985647202, "learning_rate": 1.6892609213431981e-06, "loss": 0.5907, "step": 1002 }, { "epoch": 0.836356055868251, "grad_norm": 0.10549134016036987, "learning_rate": 1.6886097638640631e-06, "loss": 0.6483, "step": 1003 }, { "epoch": 0.837189910360642, "grad_norm": 0.10979744791984558, "learning_rate": 1.6879580506321687e-06, "loss": 0.5829, "step": 1004 }, { "epoch": 0.8380237648530331, "grad_norm": 0.10647737979888916, "learning_rate": 1.6873057821734894e-06, "loss": 0.5714, "step": 1005 }, { "epoch": 0.8388576193454242, "grad_norm": 0.11350049823522568, "learning_rate": 1.6866529590144485e-06, "loss": 0.6756, "step": 1006 }, { "epoch": 0.8396914738378153, "grad_norm": 0.10672125220298767, "learning_rate": 1.6859995816819167e-06, "loss": 0.6459, "step": 1007 }, { "epoch": 0.8405253283302064, "grad_norm": 0.11561840772628784, "learning_rate": 1.6853456507032112e-06, "loss": 0.6912, "step": 1008 }, { "epoch": 0.8413591828225975, "grad_norm": 0.11554259806871414, "learning_rate": 1.6846911666060973e-06, "loss": 0.6032, "step": 1009 }, { "epoch": 0.8421930373149885, "grad_norm": 0.10536365956068039, "learning_rate": 1.6840361299187857e-06, "loss": 0.6182, "step": 1010 }, { "epoch": 0.8430268918073797, "grad_norm": 0.1125800758600235, "learning_rate": 1.6833805411699344e-06, "loss": 0.6519, "step": 1011 }, { "epoch": 0.8438607462997707, "grad_norm": 0.10594160109758377, "learning_rate": 1.6827244008886453e-06, "loss": 0.6407, "step": 1012 }, { "epoch": 0.8446946007921617, "grad_norm": 0.10480530560016632, "learning_rate": 1.6820677096044667e-06, "loss": 0.6398, "step": 1013 }, { "epoch": 0.8455284552845529, "grad_norm": 0.10666308552026749, "learning_rate": 1.6814104678473905e-06, "loss": 0.612, "step": 1014 }, { "epoch": 0.8463623097769439, "grad_norm": 0.10556510835886002, "learning_rate": 1.6807526761478533e-06, "loss": 0.5729, "step": 1015 }, { "epoch": 0.847196164269335, "grad_norm": 0.11039448529481888, "learning_rate": 1.6800943350367368e-06, "loss": 0.6323, "step": 1016 }, { "epoch": 0.8480300187617261, "grad_norm": 0.10304141789674759, "learning_rate": 1.6794354450453638e-06, "loss": 0.6426, "step": 1017 }, { "epoch": 0.8488638732541172, "grad_norm": 0.10969959199428558, "learning_rate": 1.678776006705502e-06, "loss": 0.558, "step": 1018 }, { "epoch": 0.8496977277465082, "grad_norm": 0.10663289576768875, "learning_rate": 1.6781160205493605e-06, "loss": 0.593, "step": 1019 }, { "epoch": 0.8505315822388994, "grad_norm": 0.10291384905576706, "learning_rate": 1.6774554871095915e-06, "loss": 0.6281, "step": 1020 }, { "epoch": 0.8513654367312904, "grad_norm": 0.10907971113920212, "learning_rate": 1.6767944069192876e-06, "loss": 0.6158, "step": 1021 }, { "epoch": 0.8521992912236814, "grad_norm": 0.11564616858959198, "learning_rate": 1.6761327805119838e-06, "loss": 0.6662, "step": 1022 }, { "epoch": 0.8530331457160726, "grad_norm": 0.10590541362762451, "learning_rate": 1.6754706084216555e-06, "loss": 0.6323, "step": 1023 }, { "epoch": 0.8538670002084636, "grad_norm": 0.10117157548666, "learning_rate": 1.6748078911827187e-06, "loss": 0.6112, "step": 1024 }, { "epoch": 0.8547008547008547, "grad_norm": 0.11531368643045425, "learning_rate": 1.674144629330029e-06, "loss": 0.6413, "step": 1025 }, { "epoch": 0.8555347091932458, "grad_norm": 0.10621378570795059, "learning_rate": 1.673480823398882e-06, "loss": 0.6394, "step": 1026 }, { "epoch": 0.8563685636856369, "grad_norm": 0.11612808704376221, "learning_rate": 1.672816473925012e-06, "loss": 0.637, "step": 1027 }, { "epoch": 0.8572024181780279, "grad_norm": 0.10893040150403976, "learning_rate": 1.672151581444592e-06, "loss": 0.5923, "step": 1028 }, { "epoch": 0.858036272670419, "grad_norm": 0.10822536796331406, "learning_rate": 1.6714861464942333e-06, "loss": 0.6766, "step": 1029 }, { "epoch": 0.8588701271628101, "grad_norm": 0.10714786499738693, "learning_rate": 1.6708201696109856e-06, "loss": 0.5649, "step": 1030 }, { "epoch": 0.8597039816552011, "grad_norm": 0.10966484993696213, "learning_rate": 1.6701536513323349e-06, "loss": 0.6219, "step": 1031 }, { "epoch": 0.8605378361475923, "grad_norm": 0.1037144884467125, "learning_rate": 1.669486592196205e-06, "loss": 0.6352, "step": 1032 }, { "epoch": 0.8613716906399833, "grad_norm": 0.11299126595258713, "learning_rate": 1.668818992740956e-06, "loss": 0.6734, "step": 1033 }, { "epoch": 0.8622055451323744, "grad_norm": 0.11029834300279617, "learning_rate": 1.6681508535053834e-06, "loss": 0.6579, "step": 1034 }, { "epoch": 0.8630393996247655, "grad_norm": 0.10519642382860184, "learning_rate": 1.6674821750287197e-06, "loss": 0.6072, "step": 1035 }, { "epoch": 0.8638732541171565, "grad_norm": 0.11065202951431274, "learning_rate": 1.6668129578506313e-06, "loss": 0.6562, "step": 1036 }, { "epoch": 0.8647071086095476, "grad_norm": 0.11024871468544006, "learning_rate": 1.66614320251122e-06, "loss": 0.6, "step": 1037 }, { "epoch": 0.8655409631019387, "grad_norm": 0.10754359513521194, "learning_rate": 1.6654729095510219e-06, "loss": 0.6028, "step": 1038 }, { "epoch": 0.8663748175943298, "grad_norm": 0.11345981806516647, "learning_rate": 1.6648020795110069e-06, "loss": 0.7018, "step": 1039 }, { "epoch": 0.8672086720867209, "grad_norm": 0.10346484929323196, "learning_rate": 1.6641307129325783e-06, "loss": 0.6866, "step": 1040 }, { "epoch": 0.868042526579112, "grad_norm": 0.10640288889408112, "learning_rate": 1.6634588103575723e-06, "loss": 0.5851, "step": 1041 }, { "epoch": 0.868876381071503, "grad_norm": 0.11161711812019348, "learning_rate": 1.6627863723282584e-06, "loss": 0.6182, "step": 1042 }, { "epoch": 0.8697102355638942, "grad_norm": 0.10999694466590881, "learning_rate": 1.662113399387337e-06, "loss": 0.5887, "step": 1043 }, { "epoch": 0.8705440900562852, "grad_norm": 0.10471642017364502, "learning_rate": 1.6614398920779418e-06, "loss": 0.6854, "step": 1044 }, { "epoch": 0.8713779445486762, "grad_norm": 0.10635983943939209, "learning_rate": 1.660765850943636e-06, "loss": 0.5945, "step": 1045 }, { "epoch": 0.8722117990410674, "grad_norm": 0.10250985622406006, "learning_rate": 1.6600912765284153e-06, "loss": 0.6283, "step": 1046 }, { "epoch": 0.8730456535334584, "grad_norm": 0.10342993587255478, "learning_rate": 1.6594161693767046e-06, "loss": 0.5878, "step": 1047 }, { "epoch": 0.8738795080258495, "grad_norm": 0.10388782620429993, "learning_rate": 1.6587405300333593e-06, "loss": 0.6197, "step": 1048 }, { "epoch": 0.8747133625182406, "grad_norm": 0.10757216066122055, "learning_rate": 1.658064359043664e-06, "loss": 0.6643, "step": 1049 }, { "epoch": 0.8755472170106317, "grad_norm": 0.10185371339321136, "learning_rate": 1.657387656953333e-06, "loss": 0.6129, "step": 1050 }, { "epoch": 0.8763810715030227, "grad_norm": 0.10625848174095154, "learning_rate": 1.6567104243085081e-06, "loss": 0.6389, "step": 1051 }, { "epoch": 0.8772149259954138, "grad_norm": 0.1068265438079834, "learning_rate": 1.6560326616557605e-06, "loss": 0.6474, "step": 1052 }, { "epoch": 0.8780487804878049, "grad_norm": 0.10008926689624786, "learning_rate": 1.6553543695420888e-06, "loss": 0.592, "step": 1053 }, { "epoch": 0.8788826349801959, "grad_norm": 0.11250849813222885, "learning_rate": 1.6546755485149181e-06, "loss": 0.6612, "step": 1054 }, { "epoch": 0.8797164894725871, "grad_norm": 0.10780669748783112, "learning_rate": 1.6539961991221017e-06, "loss": 0.5717, "step": 1055 }, { "epoch": 0.8805503439649781, "grad_norm": 0.10522449761629105, "learning_rate": 1.6533163219119181e-06, "loss": 0.6377, "step": 1056 }, { "epoch": 0.8813841984573692, "grad_norm": 0.11719299852848053, "learning_rate": 1.6526359174330727e-06, "loss": 0.6148, "step": 1057 }, { "epoch": 0.8822180529497603, "grad_norm": 0.10418464988470078, "learning_rate": 1.6519549862346959e-06, "loss": 0.547, "step": 1058 }, { "epoch": 0.8830519074421513, "grad_norm": 0.1149667277932167, "learning_rate": 1.6512735288663433e-06, "loss": 0.5509, "step": 1059 }, { "epoch": 0.8838857619345424, "grad_norm": 0.11353281885385513, "learning_rate": 1.6505915458779954e-06, "loss": 0.6679, "step": 1060 }, { "epoch": 0.8847196164269335, "grad_norm": 0.10084139555692673, "learning_rate": 1.6499090378200564e-06, "loss": 0.6315, "step": 1061 }, { "epoch": 0.8855534709193246, "grad_norm": 0.10753504931926727, "learning_rate": 1.6492260052433551e-06, "loss": 0.6441, "step": 1062 }, { "epoch": 0.8863873254117156, "grad_norm": 0.10944227129220963, "learning_rate": 1.6485424486991427e-06, "loss": 0.6378, "step": 1063 }, { "epoch": 0.8872211799041068, "grad_norm": 0.10542403161525726, "learning_rate": 1.6478583687390937e-06, "loss": 0.5715, "step": 1064 }, { "epoch": 0.8880550343964978, "grad_norm": 0.10192226618528366, "learning_rate": 1.6471737659153054e-06, "loss": 0.6035, "step": 1065 }, { "epoch": 0.8888888888888888, "grad_norm": 0.11085714399814606, "learning_rate": 1.6464886407802958e-06, "loss": 0.6705, "step": 1066 }, { "epoch": 0.88972274338128, "grad_norm": 0.10521090775728226, "learning_rate": 1.645802993887006e-06, "loss": 0.6487, "step": 1067 }, { "epoch": 0.890556597873671, "grad_norm": 0.11182847619056702, "learning_rate": 1.645116825788798e-06, "loss": 0.5908, "step": 1068 }, { "epoch": 0.8913904523660621, "grad_norm": 0.10744032263755798, "learning_rate": 1.6444301370394533e-06, "loss": 0.6978, "step": 1069 }, { "epoch": 0.8922243068584532, "grad_norm": 0.11645323783159256, "learning_rate": 1.6437429281931742e-06, "loss": 0.6552, "step": 1070 }, { "epoch": 0.8930581613508443, "grad_norm": 0.10556191205978394, "learning_rate": 1.6430551998045833e-06, "loss": 0.6485, "step": 1071 }, { "epoch": 0.8938920158432354, "grad_norm": 0.11244357377290726, "learning_rate": 1.6423669524287216e-06, "loss": 0.6352, "step": 1072 }, { "epoch": 0.8947258703356264, "grad_norm": 0.11015337705612183, "learning_rate": 1.6416781866210494e-06, "loss": 0.5928, "step": 1073 }, { "epoch": 0.8955597248280175, "grad_norm": 0.11401670426130295, "learning_rate": 1.6409889029374457e-06, "loss": 0.591, "step": 1074 }, { "epoch": 0.8963935793204086, "grad_norm": 0.10178755223751068, "learning_rate": 1.6402991019342073e-06, "loss": 0.5772, "step": 1075 }, { "epoch": 0.8972274338127997, "grad_norm": 0.10722572356462479, "learning_rate": 1.6396087841680478e-06, "loss": 0.616, "step": 1076 }, { "epoch": 0.8980612883051907, "grad_norm": 0.10824041068553925, "learning_rate": 1.6389179501960987e-06, "loss": 0.6289, "step": 1077 }, { "epoch": 0.8988951427975819, "grad_norm": 0.10898349434137344, "learning_rate": 1.638226600575908e-06, "loss": 0.6197, "step": 1078 }, { "epoch": 0.8997289972899729, "grad_norm": 0.10828059166669846, "learning_rate": 1.6375347358654397e-06, "loss": 0.6335, "step": 1079 }, { "epoch": 0.900562851782364, "grad_norm": 0.11353094130754471, "learning_rate": 1.6368423566230728e-06, "loss": 0.6306, "step": 1080 }, { "epoch": 0.9013967062747551, "grad_norm": 0.1095949038863182, "learning_rate": 1.6361494634076033e-06, "loss": 0.6579, "step": 1081 }, { "epoch": 0.9022305607671461, "grad_norm": 0.10779423266649246, "learning_rate": 1.6354560567782406e-06, "loss": 0.6268, "step": 1082 }, { "epoch": 0.9030644152595372, "grad_norm": 0.10539865493774414, "learning_rate": 1.6347621372946088e-06, "loss": 0.6318, "step": 1083 }, { "epoch": 0.9038982697519283, "grad_norm": 0.11474636197090149, "learning_rate": 1.6340677055167458e-06, "loss": 0.6843, "step": 1084 }, { "epoch": 0.9047321242443194, "grad_norm": 0.11284147948026657, "learning_rate": 1.633372762005103e-06, "loss": 0.6167, "step": 1085 }, { "epoch": 0.9055659787367104, "grad_norm": 0.10842804610729218, "learning_rate": 1.632677307320545e-06, "loss": 0.5851, "step": 1086 }, { "epoch": 0.9063998332291016, "grad_norm": 0.10843487083911896, "learning_rate": 1.6319813420243495e-06, "loss": 0.6337, "step": 1087 }, { "epoch": 0.9072336877214926, "grad_norm": 0.10792160034179688, "learning_rate": 1.6312848666782048e-06, "loss": 0.6015, "step": 1088 }, { "epoch": 0.9080675422138836, "grad_norm": 0.10741087794303894, "learning_rate": 1.6305878818442122e-06, "loss": 0.6285, "step": 1089 }, { "epoch": 0.9089013967062748, "grad_norm": 0.10371655970811844, "learning_rate": 1.6298903880848834e-06, "loss": 0.5982, "step": 1090 }, { "epoch": 0.9097352511986658, "grad_norm": 0.10612435638904572, "learning_rate": 1.6291923859631415e-06, "loss": 0.5962, "step": 1091 }, { "epoch": 0.9105691056910569, "grad_norm": 0.10705637186765671, "learning_rate": 1.6284938760423188e-06, "loss": 0.5926, "step": 1092 }, { "epoch": 0.911402960183448, "grad_norm": 0.10797090083360672, "learning_rate": 1.627794858886159e-06, "loss": 0.632, "step": 1093 }, { "epoch": 0.9122368146758391, "grad_norm": 0.1132771223783493, "learning_rate": 1.6270953350588137e-06, "loss": 0.647, "step": 1094 }, { "epoch": 0.9130706691682301, "grad_norm": 0.10793612152338028, "learning_rate": 1.626395305124844e-06, "loss": 0.6367, "step": 1095 }, { "epoch": 0.9139045236606212, "grad_norm": 0.10706419497728348, "learning_rate": 1.6256947696492196e-06, "loss": 0.6472, "step": 1096 }, { "epoch": 0.9147383781530123, "grad_norm": 0.10842972993850708, "learning_rate": 1.6249937291973184e-06, "loss": 0.6185, "step": 1097 }, { "epoch": 0.9155722326454033, "grad_norm": 0.11361444741487503, "learning_rate": 1.6242921843349252e-06, "loss": 0.617, "step": 1098 }, { "epoch": 0.9164060871377945, "grad_norm": 0.11315753310918808, "learning_rate": 1.6235901356282322e-06, "loss": 0.6478, "step": 1099 }, { "epoch": 0.9172399416301855, "grad_norm": 0.12495766580104828, "learning_rate": 1.6228875836438385e-06, "loss": 0.6252, "step": 1100 }, { "epoch": 0.9180737961225766, "grad_norm": 0.11157601326704025, "learning_rate": 1.622184528948749e-06, "loss": 0.6214, "step": 1101 }, { "epoch": 0.9189076506149677, "grad_norm": 0.10694071650505066, "learning_rate": 1.6214809721103744e-06, "loss": 0.619, "step": 1102 }, { "epoch": 0.9197415051073587, "grad_norm": 0.1112818494439125, "learning_rate": 1.6207769136965307e-06, "loss": 0.6229, "step": 1103 }, { "epoch": 0.9205753595997499, "grad_norm": 0.10422814637422562, "learning_rate": 1.6200723542754389e-06, "loss": 0.5837, "step": 1104 }, { "epoch": 0.9214092140921409, "grad_norm": 0.11240324378013611, "learning_rate": 1.619367294415724e-06, "loss": 0.7071, "step": 1105 }, { "epoch": 0.922243068584532, "grad_norm": 0.10819468647241592, "learning_rate": 1.6186617346864151e-06, "loss": 0.5728, "step": 1106 }, { "epoch": 0.9230769230769231, "grad_norm": 0.1053207591176033, "learning_rate": 1.6179556756569448e-06, "loss": 0.5915, "step": 1107 }, { "epoch": 0.9239107775693142, "grad_norm": 0.1075674444437027, "learning_rate": 1.6172491178971482e-06, "loss": 0.5935, "step": 1108 }, { "epoch": 0.9247446320617052, "grad_norm": 0.11159633845090866, "learning_rate": 1.6165420619772635e-06, "loss": 0.6493, "step": 1109 }, { "epoch": 0.9255784865540964, "grad_norm": 0.11671009659767151, "learning_rate": 1.6158345084679307e-06, "loss": 0.6661, "step": 1110 }, { "epoch": 0.9264123410464874, "grad_norm": 0.11934785544872284, "learning_rate": 1.6151264579401917e-06, "loss": 0.6335, "step": 1111 }, { "epoch": 0.9272461955388784, "grad_norm": 0.11291314661502838, "learning_rate": 1.6144179109654887e-06, "loss": 0.6413, "step": 1112 }, { "epoch": 0.9280800500312696, "grad_norm": 0.10902924090623856, "learning_rate": 1.6137088681156654e-06, "loss": 0.6713, "step": 1113 }, { "epoch": 0.9289139045236606, "grad_norm": 0.10608460754156113, "learning_rate": 1.6129993299629651e-06, "loss": 0.6024, "step": 1114 }, { "epoch": 0.9297477590160517, "grad_norm": 0.10624940693378448, "learning_rate": 1.6122892970800317e-06, "loss": 0.6128, "step": 1115 }, { "epoch": 0.9305816135084428, "grad_norm": 0.11591742187738419, "learning_rate": 1.6115787700399071e-06, "loss": 0.6242, "step": 1116 }, { "epoch": 0.9314154680008339, "grad_norm": 0.11706274002790451, "learning_rate": 1.610867749416033e-06, "loss": 0.6356, "step": 1117 }, { "epoch": 0.9322493224932249, "grad_norm": 0.10509736090898514, "learning_rate": 1.6101562357822491e-06, "loss": 0.6337, "step": 1118 }, { "epoch": 0.933083176985616, "grad_norm": 0.10801702737808228, "learning_rate": 1.6094442297127935e-06, "loss": 0.615, "step": 1119 }, { "epoch": 0.9339170314780071, "grad_norm": 0.11627451330423355, "learning_rate": 1.6087317317823007e-06, "loss": 0.6059, "step": 1120 }, { "epoch": 0.9347508859703981, "grad_norm": 0.11316727101802826, "learning_rate": 1.6080187425658033e-06, "loss": 0.6092, "step": 1121 }, { "epoch": 0.9355847404627893, "grad_norm": 0.10899489372968674, "learning_rate": 1.6073052626387296e-06, "loss": 0.5787, "step": 1122 }, { "epoch": 0.9364185949551803, "grad_norm": 0.10977214574813843, "learning_rate": 1.606591292576904e-06, "loss": 0.6022, "step": 1123 }, { "epoch": 0.9372524494475714, "grad_norm": 0.10926050692796707, "learning_rate": 1.6058768329565469e-06, "loss": 0.6626, "step": 1124 }, { "epoch": 0.9380863039399625, "grad_norm": 0.10779014229774475, "learning_rate": 1.6051618843542736e-06, "loss": 0.6381, "step": 1125 }, { "epoch": 0.9389201584323535, "grad_norm": 0.11509402096271515, "learning_rate": 1.6044464473470939e-06, "loss": 0.5817, "step": 1126 }, { "epoch": 0.9397540129247446, "grad_norm": 0.10561665147542953, "learning_rate": 1.6037305225124121e-06, "loss": 0.5634, "step": 1127 }, { "epoch": 0.9405878674171357, "grad_norm": 0.1081278994679451, "learning_rate": 1.6030141104280253e-06, "loss": 0.6437, "step": 1128 }, { "epoch": 0.9414217219095268, "grad_norm": 0.10470031946897507, "learning_rate": 1.6022972116721256e-06, "loss": 0.5853, "step": 1129 }, { "epoch": 0.9422555764019178, "grad_norm": 0.1069476306438446, "learning_rate": 1.601579826823296e-06, "loss": 0.5653, "step": 1130 }, { "epoch": 0.943089430894309, "grad_norm": 0.1095786914229393, "learning_rate": 1.600861956460513e-06, "loss": 0.6191, "step": 1131 }, { "epoch": 0.9439232853867, "grad_norm": 0.10863108187913895, "learning_rate": 1.600143601163144e-06, "loss": 0.6166, "step": 1132 }, { "epoch": 0.944757139879091, "grad_norm": 0.110601507127285, "learning_rate": 1.5994247615109498e-06, "loss": 0.5716, "step": 1133 }, { "epoch": 0.9455909943714822, "grad_norm": 0.108455128967762, "learning_rate": 1.598705438084079e-06, "loss": 0.5888, "step": 1134 }, { "epoch": 0.9464248488638732, "grad_norm": 0.11156295984983444, "learning_rate": 1.5979856314630728e-06, "loss": 0.6723, "step": 1135 }, { "epoch": 0.9472587033562644, "grad_norm": 0.11098117381334305, "learning_rate": 1.5972653422288622e-06, "loss": 0.5671, "step": 1136 }, { "epoch": 0.9480925578486554, "grad_norm": 0.11285511404275894, "learning_rate": 1.5965445709627669e-06, "loss": 0.6437, "step": 1137 }, { "epoch": 0.9489264123410465, "grad_norm": 0.10554268956184387, "learning_rate": 1.5958233182464965e-06, "loss": 0.6289, "step": 1138 }, { "epoch": 0.9497602668334376, "grad_norm": 0.10124699026346207, "learning_rate": 1.5951015846621483e-06, "loss": 0.6068, "step": 1139 }, { "epoch": 0.9505941213258287, "grad_norm": 0.11128734052181244, "learning_rate": 1.5943793707922084e-06, "loss": 0.5907, "step": 1140 }, { "epoch": 0.9514279758182197, "grad_norm": 0.11676076054573059, "learning_rate": 1.5936566772195503e-06, "loss": 0.602, "step": 1141 }, { "epoch": 0.9522618303106108, "grad_norm": 0.11664719134569168, "learning_rate": 1.5929335045274343e-06, "loss": 0.6197, "step": 1142 }, { "epoch": 0.9530956848030019, "grad_norm": 0.1060841903090477, "learning_rate": 1.5922098532995083e-06, "loss": 0.553, "step": 1143 }, { "epoch": 0.9539295392953929, "grad_norm": 0.11080587655305862, "learning_rate": 1.591485724119805e-06, "loss": 0.6357, "step": 1144 }, { "epoch": 0.9547633937877841, "grad_norm": 0.10428017377853394, "learning_rate": 1.5907611175727442e-06, "loss": 0.5693, "step": 1145 }, { "epoch": 0.9555972482801751, "grad_norm": 0.10886859893798828, "learning_rate": 1.59003603424313e-06, "loss": 0.6142, "step": 1146 }, { "epoch": 0.9564311027725662, "grad_norm": 0.1091897264122963, "learning_rate": 1.5893104747161522e-06, "loss": 0.6234, "step": 1147 }, { "epoch": 0.9572649572649573, "grad_norm": 0.11705927550792694, "learning_rate": 1.5885844395773841e-06, "loss": 0.6322, "step": 1148 }, { "epoch": 0.9580988117573483, "grad_norm": 0.11114629358053207, "learning_rate": 1.5878579294127831e-06, "loss": 0.6233, "step": 1149 }, { "epoch": 0.9589326662497394, "grad_norm": 0.11006072908639908, "learning_rate": 1.5871309448086903e-06, "loss": 0.6271, "step": 1150 }, { "epoch": 0.9597665207421305, "grad_norm": 0.10859903693199158, "learning_rate": 1.5864034863518292e-06, "loss": 0.6101, "step": 1151 }, { "epoch": 0.9606003752345216, "grad_norm": 0.11398789286613464, "learning_rate": 1.585675554629306e-06, "loss": 0.6183, "step": 1152 }, { "epoch": 0.9614342297269126, "grad_norm": 0.10914972424507141, "learning_rate": 1.5849471502286087e-06, "loss": 0.6687, "step": 1153 }, { "epoch": 0.9622680842193038, "grad_norm": 0.11407602578401566, "learning_rate": 1.5842182737376074e-06, "loss": 0.6513, "step": 1154 }, { "epoch": 0.9631019387116948, "grad_norm": 0.11537064611911774, "learning_rate": 1.5834889257445525e-06, "loss": 0.6284, "step": 1155 }, { "epoch": 0.9639357932040858, "grad_norm": 0.10912200808525085, "learning_rate": 1.582759106838075e-06, "loss": 0.634, "step": 1156 }, { "epoch": 0.964769647696477, "grad_norm": 0.11783526092767715, "learning_rate": 1.5820288176071861e-06, "loss": 0.6435, "step": 1157 }, { "epoch": 0.965603502188868, "grad_norm": 0.11490530520677567, "learning_rate": 1.5812980586412765e-06, "loss": 0.5872, "step": 1158 }, { "epoch": 0.9664373566812591, "grad_norm": 0.10283337533473969, "learning_rate": 1.5805668305301167e-06, "loss": 0.565, "step": 1159 }, { "epoch": 0.9672712111736502, "grad_norm": 0.10747389495372772, "learning_rate": 1.5798351338638548e-06, "loss": 0.5878, "step": 1160 }, { "epoch": 0.9681050656660413, "grad_norm": 0.11634895950555801, "learning_rate": 1.5791029692330172e-06, "loss": 0.6547, "step": 1161 }, { "epoch": 0.9689389201584323, "grad_norm": 0.11201413720846176, "learning_rate": 1.5783703372285086e-06, "loss": 0.6039, "step": 1162 }, { "epoch": 0.9697727746508235, "grad_norm": 0.11117483675479889, "learning_rate": 1.5776372384416105e-06, "loss": 0.6447, "step": 1163 }, { "epoch": 0.9706066291432145, "grad_norm": 0.11274691671133041, "learning_rate": 1.5769036734639815e-06, "loss": 0.5847, "step": 1164 }, { "epoch": 0.9714404836356055, "grad_norm": 0.11471108347177505, "learning_rate": 1.5761696428876556e-06, "loss": 0.6523, "step": 1165 }, { "epoch": 0.9722743381279967, "grad_norm": 0.11089842766523361, "learning_rate": 1.5754351473050434e-06, "loss": 0.5949, "step": 1166 }, { "epoch": 0.9731081926203877, "grad_norm": 0.10404518991708755, "learning_rate": 1.5747001873089306e-06, "loss": 0.5496, "step": 1167 }, { "epoch": 0.9739420471127789, "grad_norm": 0.116578109562397, "learning_rate": 1.5739647634924773e-06, "loss": 0.6103, "step": 1168 }, { "epoch": 0.9747759016051699, "grad_norm": 0.1054786667227745, "learning_rate": 1.5732288764492184e-06, "loss": 0.623, "step": 1169 }, { "epoch": 0.975609756097561, "grad_norm": 0.11369701474905014, "learning_rate": 1.5724925267730624e-06, "loss": 0.567, "step": 1170 }, { "epoch": 0.9764436105899521, "grad_norm": 0.1198234111070633, "learning_rate": 1.5717557150582917e-06, "loss": 0.6188, "step": 1171 }, { "epoch": 0.9772774650823431, "grad_norm": 0.12100456655025482, "learning_rate": 1.5710184418995604e-06, "loss": 0.641, "step": 1172 }, { "epoch": 0.9781113195747342, "grad_norm": 0.10783829540014267, "learning_rate": 1.5702807078918965e-06, "loss": 0.5889, "step": 1173 }, { "epoch": 0.9789451740671253, "grad_norm": 0.11204763501882553, "learning_rate": 1.5695425136306987e-06, "loss": 0.6271, "step": 1174 }, { "epoch": 0.9797790285595164, "grad_norm": 0.11363273113965988, "learning_rate": 1.568803859711738e-06, "loss": 0.6099, "step": 1175 }, { "epoch": 0.9806128830519074, "grad_norm": 0.10527540743350983, "learning_rate": 1.5680647467311555e-06, "loss": 0.5782, "step": 1176 }, { "epoch": 0.9814467375442986, "grad_norm": 0.11106487363576889, "learning_rate": 1.5673251752854645e-06, "loss": 0.645, "step": 1177 }, { "epoch": 0.9822805920366896, "grad_norm": 0.11001728475093842, "learning_rate": 1.5665851459715457e-06, "loss": 0.611, "step": 1178 }, { "epoch": 0.9831144465290806, "grad_norm": 0.10988467186689377, "learning_rate": 1.5658446593866517e-06, "loss": 0.6229, "step": 1179 }, { "epoch": 0.9839483010214718, "grad_norm": 0.10910095274448395, "learning_rate": 1.565103716128403e-06, "loss": 0.5964, "step": 1180 }, { "epoch": 0.9847821555138628, "grad_norm": 0.11653497070074081, "learning_rate": 1.5643623167947891e-06, "loss": 0.6451, "step": 1181 }, { "epoch": 0.9856160100062539, "grad_norm": 0.11314839869737625, "learning_rate": 1.5636204619841667e-06, "loss": 0.6419, "step": 1182 }, { "epoch": 0.986449864498645, "grad_norm": 0.10827460885047913, "learning_rate": 1.5628781522952611e-06, "loss": 0.6129, "step": 1183 }, { "epoch": 0.9872837189910361, "grad_norm": 0.1053231805562973, "learning_rate": 1.5621353883271649e-06, "loss": 0.6455, "step": 1184 }, { "epoch": 0.9881175734834271, "grad_norm": 0.11166463792324066, "learning_rate": 1.561392170679336e-06, "loss": 0.594, "step": 1185 }, { "epoch": 0.9889514279758183, "grad_norm": 0.11850026249885559, "learning_rate": 1.5606484999516e-06, "loss": 0.575, "step": 1186 }, { "epoch": 0.9897852824682093, "grad_norm": 0.11536535620689392, "learning_rate": 1.5599043767441471e-06, "loss": 0.645, "step": 1187 }, { "epoch": 0.9906191369606003, "grad_norm": 0.1191059872508049, "learning_rate": 1.5591598016575328e-06, "loss": 0.6007, "step": 1188 }, { "epoch": 0.9914529914529915, "grad_norm": 0.1131712794303894, "learning_rate": 1.558414775292678e-06, "loss": 0.5926, "step": 1189 }, { "epoch": 0.9922868459453825, "grad_norm": 0.11502361297607422, "learning_rate": 1.5576692982508663e-06, "loss": 0.6223, "step": 1190 }, { "epoch": 0.9931207004377736, "grad_norm": 0.10861647129058838, "learning_rate": 1.5569233711337474e-06, "loss": 0.5948, "step": 1191 }, { "epoch": 0.9939545549301647, "grad_norm": 0.11109884828329086, "learning_rate": 1.5561769945433323e-06, "loss": 0.5778, "step": 1192 }, { "epoch": 0.9947884094225558, "grad_norm": 0.1124456375837326, "learning_rate": 1.5554301690819951e-06, "loss": 0.6732, "step": 1193 }, { "epoch": 0.9956222639149468, "grad_norm": 0.10982295870780945, "learning_rate": 1.5546828953524727e-06, "loss": 0.6221, "step": 1194 }, { "epoch": 0.9964561184073379, "grad_norm": 0.10954145342111588, "learning_rate": 1.553935173957863e-06, "loss": 0.6045, "step": 1195 }, { "epoch": 0.997289972899729, "grad_norm": 0.10329638421535492, "learning_rate": 1.5531870055016265e-06, "loss": 0.5827, "step": 1196 }, { "epoch": 0.99812382739212, "grad_norm": 0.1147105023264885, "learning_rate": 1.5524383905875828e-06, "loss": 0.6676, "step": 1197 }, { "epoch": 0.9989576818845112, "grad_norm": 0.10970206558704376, "learning_rate": 1.5516893298199133e-06, "loss": 0.6159, "step": 1198 }, { "epoch": 0.9997915363769022, "grad_norm": 0.10848627239465714, "learning_rate": 1.5509398238031585e-06, "loss": 0.6106, "step": 1199 } ], "logging_steps": 1, "max_steps": 3597, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1199, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.45611918040105e+18, "train_batch_size": 6, "trial_name": null, "trial_params": null }