| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.4995408631772268, |
| "eval_steps": 136, |
| "global_step": 272, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0018365472910927456, |
| "grad_norm": 0.49180246368465724, |
| "learning_rate": 0.0, |
| "loss": 1.3574, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0018365472910927456, |
| "eval_loss": 2.4259753227233887, |
| "eval_runtime": 39.8169, |
| "eval_samples_per_second": 5.149, |
| "eval_steps_per_second": 0.452, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0036730945821854912, |
| "grad_norm": 0.3994587341086747, |
| "learning_rate": 2.5000000000000004e-07, |
| "loss": 1.444, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.005509641873278237, |
| "grad_norm": 0.39017597145072364, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 1.5149, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0073461891643709825, |
| "grad_norm": 0.47444439480391987, |
| "learning_rate": 7.5e-07, |
| "loss": 1.5855, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.009182736455463728, |
| "grad_norm": 0.794434599507836, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 1.6531, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.011019283746556474, |
| "grad_norm": 0.7108153394196348, |
| "learning_rate": 1.25e-06, |
| "loss": 1.7881, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.012855831037649219, |
| "grad_norm": 0.5204076611421804, |
| "learning_rate": 1.5e-06, |
| "loss": 1.5592, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.014692378328741965, |
| "grad_norm": 0.7569351115766059, |
| "learning_rate": 1.75e-06, |
| "loss": 1.5735, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.01652892561983471, |
| "grad_norm": 0.5678098307469198, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 1.5934, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.018365472910927456, |
| "grad_norm": 0.40098167184732914, |
| "learning_rate": 2.25e-06, |
| "loss": 1.688, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.020202020202020204, |
| "grad_norm": 0.4268491869708594, |
| "learning_rate": 2.5e-06, |
| "loss": 1.6286, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.02203856749311295, |
| "grad_norm": 0.5835872767276206, |
| "learning_rate": 2.7500000000000004e-06, |
| "loss": 1.679, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.023875114784205693, |
| "grad_norm": 0.41861495783037117, |
| "learning_rate": 3e-06, |
| "loss": 1.7093, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.025711662075298437, |
| "grad_norm": 0.3375113108661923, |
| "learning_rate": 3.2500000000000002e-06, |
| "loss": 1.7431, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.027548209366391185, |
| "grad_norm": 0.48885555509704504, |
| "learning_rate": 3.5e-06, |
| "loss": 1.4873, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.02938475665748393, |
| "grad_norm": 0.4483288005954393, |
| "learning_rate": 3.7500000000000005e-06, |
| "loss": 1.6619, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.031221303948576674, |
| "grad_norm": 0.4299802045225574, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 1.6997, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.03305785123966942, |
| "grad_norm": 0.5824485228901973, |
| "learning_rate": 4.25e-06, |
| "loss": 1.691, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.03489439853076217, |
| "grad_norm": 0.32860558551239133, |
| "learning_rate": 4.5e-06, |
| "loss": 1.5414, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.03673094582185491, |
| "grad_norm": 1.7514560949718712, |
| "learning_rate": 4.75e-06, |
| "loss": 1.522, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.03856749311294766, |
| "grad_norm": 0.3844467204577219, |
| "learning_rate": 5e-06, |
| "loss": 1.6948, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.04040404040404041, |
| "grad_norm": 0.329345071029554, |
| "learning_rate": 5.2500000000000006e-06, |
| "loss": 1.509, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.04224058769513315, |
| "grad_norm": 0.28268786055882505, |
| "learning_rate": 5.500000000000001e-06, |
| "loss": 1.5563, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.0440771349862259, |
| "grad_norm": 0.3422617324754064, |
| "learning_rate": 5.75e-06, |
| "loss": 1.5615, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.04591368227731864, |
| "grad_norm": 0.33860326348440456, |
| "learning_rate": 6e-06, |
| "loss": 1.7008, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.047750229568411386, |
| "grad_norm": 0.2789219845798442, |
| "learning_rate": 6.25e-06, |
| "loss": 1.5667, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.049586776859504134, |
| "grad_norm": 0.37047693270617005, |
| "learning_rate": 6.5000000000000004e-06, |
| "loss": 1.5557, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.051423324150596875, |
| "grad_norm": 0.3368470842339362, |
| "learning_rate": 6.750000000000001e-06, |
| "loss": 1.6045, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.05325987144168962, |
| "grad_norm": 0.30344837893854465, |
| "learning_rate": 7e-06, |
| "loss": 1.6766, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.05509641873278237, |
| "grad_norm": 0.6359004570618099, |
| "learning_rate": 7.25e-06, |
| "loss": 1.6499, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.05693296602387511, |
| "grad_norm": 0.3258591226646554, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 1.6565, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.05876951331496786, |
| "grad_norm": 0.3697834149605013, |
| "learning_rate": 7.75e-06, |
| "loss": 1.4883, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.06060606060606061, |
| "grad_norm": 0.265668853003883, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 1.8289, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.06244260789715335, |
| "grad_norm": 0.39987679350668315, |
| "learning_rate": 8.25e-06, |
| "loss": 1.6633, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.0642791551882461, |
| "grad_norm": 0.39636333420956993, |
| "learning_rate": 8.5e-06, |
| "loss": 1.2298, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.06611570247933884, |
| "grad_norm": 0.3464362348603123, |
| "learning_rate": 8.750000000000001e-06, |
| "loss": 1.485, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.06795224977043159, |
| "grad_norm": 0.31616445355046285, |
| "learning_rate": 9e-06, |
| "loss": 1.5553, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.06978879706152434, |
| "grad_norm": 0.4441888412468937, |
| "learning_rate": 9.250000000000001e-06, |
| "loss": 1.5967, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.07162534435261708, |
| "grad_norm": 0.3547318554597848, |
| "learning_rate": 9.5e-06, |
| "loss": 1.5006, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.07346189164370982, |
| "grad_norm": 0.27557088472944596, |
| "learning_rate": 9.75e-06, |
| "loss": 1.5673, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.07529843893480258, |
| "grad_norm": 0.2956312497066916, |
| "learning_rate": 1e-05, |
| "loss": 1.4936, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.07713498622589532, |
| "grad_norm": 0.2322611308466898, |
| "learning_rate": 9.999994591993822e-06, |
| "loss": 1.4759, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.07897153351698806, |
| "grad_norm": 0.22039029040435731, |
| "learning_rate": 9.999978367986988e-06, |
| "loss": 1.6625, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.08080808080808081, |
| "grad_norm": 0.3446344059315139, |
| "learning_rate": 9.999951328014591e-06, |
| "loss": 1.6309, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.08264462809917356, |
| "grad_norm": 0.23666900781940328, |
| "learning_rate": 9.999913472135126e-06, |
| "loss": 1.5347, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.0844811753902663, |
| "grad_norm": 0.2840328883351318, |
| "learning_rate": 9.999864800430482e-06, |
| "loss": 1.5115, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.08631772268135904, |
| "grad_norm": 0.2407651293731456, |
| "learning_rate": 9.999805313005946e-06, |
| "loss": 1.4199, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.0881542699724518, |
| "grad_norm": 0.24925631476018442, |
| "learning_rate": 9.999735009990202e-06, |
| "loss": 1.7008, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.08999081726354453, |
| "grad_norm": 0.2502728573485842, |
| "learning_rate": 9.99965389153533e-06, |
| "loss": 1.6181, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.09182736455463728, |
| "grad_norm": 0.23272569963887466, |
| "learning_rate": 9.999561957816803e-06, |
| "loss": 1.4668, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.09366391184573003, |
| "grad_norm": 0.23492521900860228, |
| "learning_rate": 9.999459209033495e-06, |
| "loss": 1.6884, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.09550045913682277, |
| "grad_norm": 0.24231529936066795, |
| "learning_rate": 9.999345645407671e-06, |
| "loss": 1.3811, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.09733700642791551, |
| "grad_norm": 0.23749137313908725, |
| "learning_rate": 9.999221267184993e-06, |
| "loss": 1.6599, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.09917355371900827, |
| "grad_norm": 0.2631749052017168, |
| "learning_rate": 9.999086074634516e-06, |
| "loss": 1.5321, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.10101010101010101, |
| "grad_norm": 0.265780364913351, |
| "learning_rate": 9.998940068048688e-06, |
| "loss": 1.7954, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.10284664830119375, |
| "grad_norm": 0.23373755925772408, |
| "learning_rate": 9.998783247743353e-06, |
| "loss": 1.6503, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.1046831955922865, |
| "grad_norm": 0.26345499971500486, |
| "learning_rate": 9.998615614057743e-06, |
| "loss": 1.6842, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.10651974288337925, |
| "grad_norm": 0.22858688890828285, |
| "learning_rate": 9.998437167354485e-06, |
| "loss": 1.6053, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.10835629017447199, |
| "grad_norm": 0.22732836541277007, |
| "learning_rate": 9.998247908019594e-06, |
| "loss": 1.5357, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.11019283746556474, |
| "grad_norm": 0.21575052353545032, |
| "learning_rate": 9.998047836462476e-06, |
| "loss": 1.5875, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.11202938475665748, |
| "grad_norm": 0.3308350003540223, |
| "learning_rate": 9.997836953115927e-06, |
| "loss": 1.9333, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.11386593204775022, |
| "grad_norm": 0.24160639887541288, |
| "learning_rate": 9.99761525843613e-06, |
| "loss": 1.6121, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.11570247933884298, |
| "grad_norm": 0.23537591666009566, |
| "learning_rate": 9.997382752902658e-06, |
| "loss": 1.6341, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.11753902662993572, |
| "grad_norm": 0.30577270648255184, |
| "learning_rate": 9.997139437018463e-06, |
| "loss": 1.4635, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.11937557392102846, |
| "grad_norm": 0.24971896873950858, |
| "learning_rate": 9.996885311309892e-06, |
| "loss": 1.8186, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.12121212121212122, |
| "grad_norm": 0.25847791225683553, |
| "learning_rate": 9.996620376326667e-06, |
| "loss": 1.491, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.12304866850321396, |
| "grad_norm": 0.24031460889715148, |
| "learning_rate": 9.996344632641895e-06, |
| "loss": 1.6563, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.1248852157943067, |
| "grad_norm": 0.2550524831797301, |
| "learning_rate": 9.996058080852067e-06, |
| "loss": 1.6427, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.12672176308539945, |
| "grad_norm": 0.22467108780123965, |
| "learning_rate": 9.995760721577053e-06, |
| "loss": 1.4595, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.1285583103764922, |
| "grad_norm": 0.282181674918973, |
| "learning_rate": 9.995452555460098e-06, |
| "loss": 1.6837, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.13039485766758493, |
| "grad_norm": 0.5727450301366364, |
| "learning_rate": 9.995133583167833e-06, |
| "loss": 1.6003, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.1322314049586777, |
| "grad_norm": 0.22744917091853156, |
| "learning_rate": 9.994803805390257e-06, |
| "loss": 1.3544, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.13406795224977044, |
| "grad_norm": 0.22395196659772004, |
| "learning_rate": 9.994463222840748e-06, |
| "loss": 1.6198, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.13590449954086317, |
| "grad_norm": 0.2098292344869194, |
| "learning_rate": 9.994111836256049e-06, |
| "loss": 1.5059, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.13774104683195593, |
| "grad_norm": 0.22329332454983955, |
| "learning_rate": 9.993749646396286e-06, |
| "loss": 1.4666, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.13957759412304868, |
| "grad_norm": 1.131446258847722, |
| "learning_rate": 9.993376654044948e-06, |
| "loss": 1.5164, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.1414141414141414, |
| "grad_norm": 0.22716717526603358, |
| "learning_rate": 9.992992860008893e-06, |
| "loss": 1.4978, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.14325068870523416, |
| "grad_norm": 0.24179796579866508, |
| "learning_rate": 9.992598265118344e-06, |
| "loss": 1.3147, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.14508723599632692, |
| "grad_norm": 0.2127244611718112, |
| "learning_rate": 9.99219287022689e-06, |
| "loss": 1.4498, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.14692378328741965, |
| "grad_norm": 0.27213431233903435, |
| "learning_rate": 9.991776676211483e-06, |
| "loss": 1.8822, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.1487603305785124, |
| "grad_norm": 0.22988463490869612, |
| "learning_rate": 9.991349683972435e-06, |
| "loss": 1.4995, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.15059687786960516, |
| "grad_norm": 0.23481878477688978, |
| "learning_rate": 9.990911894433415e-06, |
| "loss": 1.5132, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.15243342516069788, |
| "grad_norm": 0.37026228973170716, |
| "learning_rate": 9.990463308541452e-06, |
| "loss": 1.5846, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.15426997245179064, |
| "grad_norm": 0.34328184021255104, |
| "learning_rate": 9.990003927266928e-06, |
| "loss": 1.5882, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.1561065197428834, |
| "grad_norm": 0.2239251502463014, |
| "learning_rate": 9.989533751603578e-06, |
| "loss": 1.5294, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.15794306703397612, |
| "grad_norm": 0.2531093496713428, |
| "learning_rate": 9.989052782568484e-06, |
| "loss": 1.4136, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.15977961432506887, |
| "grad_norm": 0.22960377831673873, |
| "learning_rate": 9.988561021202083e-06, |
| "loss": 1.5179, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.16161616161616163, |
| "grad_norm": 0.2353475082843009, |
| "learning_rate": 9.988058468568154e-06, |
| "loss": 1.5986, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.16345270890725436, |
| "grad_norm": 0.2095228695568511, |
| "learning_rate": 9.987545125753818e-06, |
| "loss": 1.7077, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.1652892561983471, |
| "grad_norm": 0.25954954331361235, |
| "learning_rate": 9.987020993869543e-06, |
| "loss": 1.5786, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.16712580348943984, |
| "grad_norm": 0.3475458310303135, |
| "learning_rate": 9.986486074049131e-06, |
| "loss": 1.6145, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.1689623507805326, |
| "grad_norm": 0.26936951415786353, |
| "learning_rate": 9.98594036744972e-06, |
| "loss": 1.7652, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.17079889807162535, |
| "grad_norm": 0.21373420050407635, |
| "learning_rate": 9.985383875251783e-06, |
| "loss": 1.6871, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.17263544536271808, |
| "grad_norm": 0.23205711833790327, |
| "learning_rate": 9.98481659865913e-06, |
| "loss": 1.4672, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.17447199265381083, |
| "grad_norm": 0.26381334544086116, |
| "learning_rate": 9.98423853889889e-06, |
| "loss": 1.6741, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.1763085399449036, |
| "grad_norm": 0.24396664978240085, |
| "learning_rate": 9.983649697221528e-06, |
| "loss": 1.5546, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.1781450872359963, |
| "grad_norm": 0.2533801841511916, |
| "learning_rate": 9.983050074900824e-06, |
| "loss": 1.6902, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.17998163452708907, |
| "grad_norm": 0.2115479895852038, |
| "learning_rate": 9.982439673233885e-06, |
| "loss": 1.6094, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.18181818181818182, |
| "grad_norm": 0.22659674725493387, |
| "learning_rate": 9.98181849354113e-06, |
| "loss": 1.3953, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.18365472910927455, |
| "grad_norm": 0.4346020173346875, |
| "learning_rate": 9.981186537166301e-06, |
| "loss": 1.4842, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1854912764003673, |
| "grad_norm": 0.22752027491511684, |
| "learning_rate": 9.980543805476447e-06, |
| "loss": 1.6125, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.18732782369146006, |
| "grad_norm": 0.2178158645117292, |
| "learning_rate": 9.979890299861923e-06, |
| "loss": 1.3843, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.1891643709825528, |
| "grad_norm": 0.2629457576326313, |
| "learning_rate": 9.979226021736396e-06, |
| "loss": 1.5034, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.19100091827364554, |
| "grad_norm": 0.2638758563035785, |
| "learning_rate": 9.978550972536834e-06, |
| "loss": 1.2882, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.1928374655647383, |
| "grad_norm": 0.25957055432676646, |
| "learning_rate": 9.977865153723508e-06, |
| "loss": 1.5669, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.19467401285583102, |
| "grad_norm": 0.2107272760638706, |
| "learning_rate": 9.977168566779976e-06, |
| "loss": 1.4316, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.19651056014692378, |
| "grad_norm": 0.23996444946668347, |
| "learning_rate": 9.976461213213104e-06, |
| "loss": 1.5958, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.19834710743801653, |
| "grad_norm": 0.3866853138626082, |
| "learning_rate": 9.975743094553037e-06, |
| "loss": 1.6301, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.20018365472910926, |
| "grad_norm": 0.2659575860373734, |
| "learning_rate": 9.975014212353212e-06, |
| "loss": 1.6435, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.20202020202020202, |
| "grad_norm": 0.25698703101518205, |
| "learning_rate": 9.974274568190349e-06, |
| "loss": 1.4947, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.20385674931129477, |
| "grad_norm": 0.23394729883248447, |
| "learning_rate": 9.973524163664447e-06, |
| "loss": 1.5486, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.2056932966023875, |
| "grad_norm": 0.2682338169486529, |
| "learning_rate": 9.972763000398784e-06, |
| "loss": 1.562, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.20752984389348025, |
| "grad_norm": 0.22594240276292787, |
| "learning_rate": 9.971991080039912e-06, |
| "loss": 1.6665, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.209366391184573, |
| "grad_norm": 0.2716099274981758, |
| "learning_rate": 9.971208404257647e-06, |
| "loss": 1.6069, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.21120293847566574, |
| "grad_norm": 0.215642600558555, |
| "learning_rate": 9.970414974745077e-06, |
| "loss": 1.426, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.2130394857667585, |
| "grad_norm": 0.26192594390837787, |
| "learning_rate": 9.96961079321855e-06, |
| "loss": 1.4929, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.21487603305785125, |
| "grad_norm": 0.24076680870279565, |
| "learning_rate": 9.968795861417676e-06, |
| "loss": 1.5116, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.21671258034894397, |
| "grad_norm": 0.20785778608018918, |
| "learning_rate": 9.967970181105315e-06, |
| "loss": 1.4824, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.21854912764003673, |
| "grad_norm": 0.2474137833375562, |
| "learning_rate": 9.967133754067581e-06, |
| "loss": 1.6394, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.22038567493112948, |
| "grad_norm": 0.2415950163286452, |
| "learning_rate": 9.966286582113838e-06, |
| "loss": 1.4747, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.2222222222222222, |
| "grad_norm": 0.25397048864143756, |
| "learning_rate": 9.965428667076687e-06, |
| "loss": 1.626, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.22405876951331496, |
| "grad_norm": 0.27597092429735764, |
| "learning_rate": 9.964560010811972e-06, |
| "loss": 1.4853, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.22589531680440772, |
| "grad_norm": 0.2773282618369793, |
| "learning_rate": 9.963680615198774e-06, |
| "loss": 1.2673, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.22773186409550045, |
| "grad_norm": 0.2863982446521439, |
| "learning_rate": 9.962790482139402e-06, |
| "loss": 1.4531, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.2295684113865932, |
| "grad_norm": 0.24940341962059176, |
| "learning_rate": 9.961889613559396e-06, |
| "loss": 1.5392, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.23140495867768596, |
| "grad_norm": 0.2692207944659178, |
| "learning_rate": 9.960978011407516e-06, |
| "loss": 1.3452, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.23324150596877868, |
| "grad_norm": 0.2394265357414062, |
| "learning_rate": 9.960055677655743e-06, |
| "loss": 1.4267, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.23507805325987144, |
| "grad_norm": 0.2396211231483559, |
| "learning_rate": 9.95912261429927e-06, |
| "loss": 1.669, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.2369146005509642, |
| "grad_norm": 0.22480647337089882, |
| "learning_rate": 9.958178823356503e-06, |
| "loss": 1.3276, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.23875114784205692, |
| "grad_norm": 0.2322223276315898, |
| "learning_rate": 9.957224306869053e-06, |
| "loss": 1.6457, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.24058769513314968, |
| "grad_norm": 0.28675358511507826, |
| "learning_rate": 9.956259066901733e-06, |
| "loss": 1.4844, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.24242424242424243, |
| "grad_norm": 0.23506904729263256, |
| "learning_rate": 9.955283105542551e-06, |
| "loss": 1.5401, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.24426078971533516, |
| "grad_norm": 0.25409979083106216, |
| "learning_rate": 9.954296424902709e-06, |
| "loss": 1.5796, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.2460973370064279, |
| "grad_norm": 0.27537727017693314, |
| "learning_rate": 9.953299027116598e-06, |
| "loss": 1.7015, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.24793388429752067, |
| "grad_norm": 0.255527421712665, |
| "learning_rate": 9.95229091434179e-06, |
| "loss": 1.6943, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.2497704315886134, |
| "grad_norm": 0.2758931212786201, |
| "learning_rate": 9.95127208875904e-06, |
| "loss": 1.6373, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.2497704315886134, |
| "eval_loss": 2.332951307296753, |
| "eval_runtime": 38.5842, |
| "eval_samples_per_second": 5.313, |
| "eval_steps_per_second": 0.467, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.2516069788797062, |
| "grad_norm": 0.4849948087122718, |
| "learning_rate": 9.950242552572272e-06, |
| "loss": 1.5843, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.2534435261707989, |
| "grad_norm": 0.27082023040786063, |
| "learning_rate": 9.949202308008581e-06, |
| "loss": 1.466, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.25528007346189163, |
| "grad_norm": 0.21412319589684112, |
| "learning_rate": 9.948151357318228e-06, |
| "loss": 1.5575, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.2571166207529844, |
| "grad_norm": 0.2630300060870272, |
| "learning_rate": 9.94708970277463e-06, |
| "loss": 1.8459, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.25895316804407714, |
| "grad_norm": 0.29584228360860926, |
| "learning_rate": 9.946017346674362e-06, |
| "loss": 1.4765, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.26078971533516987, |
| "grad_norm": 0.20420849217608372, |
| "learning_rate": 9.944934291337146e-06, |
| "loss": 1.3963, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.26262626262626265, |
| "grad_norm": 0.2383087119690396, |
| "learning_rate": 9.943840539105853e-06, |
| "loss": 1.6948, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.2644628099173554, |
| "grad_norm": 0.22707287878559146, |
| "learning_rate": 9.942736092346487e-06, |
| "loss": 1.5627, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.2662993572084481, |
| "grad_norm": 0.2209791613669926, |
| "learning_rate": 9.941620953448195e-06, |
| "loss": 1.6011, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.2681359044995409, |
| "grad_norm": 0.23620122895922513, |
| "learning_rate": 9.940495124823241e-06, |
| "loss": 1.4172, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.2699724517906336, |
| "grad_norm": 0.4575809432111337, |
| "learning_rate": 9.939358608907026e-06, |
| "loss": 1.8045, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.27180899908172634, |
| "grad_norm": 0.25576228816691954, |
| "learning_rate": 9.938211408158063e-06, |
| "loss": 1.5559, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.2736455463728191, |
| "grad_norm": 0.32262207355286887, |
| "learning_rate": 9.937053525057977e-06, |
| "loss": 1.6491, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.27548209366391185, |
| "grad_norm": 0.29602023759033497, |
| "learning_rate": 9.935884962111506e-06, |
| "loss": 1.6518, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.2773186409550046, |
| "grad_norm": 0.24182886602369053, |
| "learning_rate": 9.934705721846487e-06, |
| "loss": 1.5457, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.27915518824609736, |
| "grad_norm": 0.2247571699952089, |
| "learning_rate": 9.933515806813856e-06, |
| "loss": 1.481, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.2809917355371901, |
| "grad_norm": 0.24890915769392072, |
| "learning_rate": 9.932315219587641e-06, |
| "loss": 1.618, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.2828282828282828, |
| "grad_norm": 0.2597194625874549, |
| "learning_rate": 9.931103962764955e-06, |
| "loss": 1.5653, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.2846648301193756, |
| "grad_norm": 0.23725030845640346, |
| "learning_rate": 9.92988203896599e-06, |
| "loss": 1.4677, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.2865013774104683, |
| "grad_norm": 0.20619150412894088, |
| "learning_rate": 9.928649450834015e-06, |
| "loss": 1.6779, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.28833792470156105, |
| "grad_norm": 0.23919303859135752, |
| "learning_rate": 9.927406201035368e-06, |
| "loss": 1.4417, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.29017447199265384, |
| "grad_norm": 0.2244668514529571, |
| "learning_rate": 9.926152292259452e-06, |
| "loss": 1.7272, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.29201101928374656, |
| "grad_norm": 0.26954841337527125, |
| "learning_rate": 9.924887727218724e-06, |
| "loss": 1.6068, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.2938475665748393, |
| "grad_norm": 0.32951953455251004, |
| "learning_rate": 9.923612508648693e-06, |
| "loss": 1.406, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.2956841138659321, |
| "grad_norm": 0.2698529792514822, |
| "learning_rate": 9.922326639307918e-06, |
| "loss": 1.683, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.2975206611570248, |
| "grad_norm": 0.22420186125700636, |
| "learning_rate": 9.921030121977992e-06, |
| "loss": 1.5398, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.29935720844811753, |
| "grad_norm": 0.24335042870850987, |
| "learning_rate": 9.919722959463545e-06, |
| "loss": 1.5752, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.3011937557392103, |
| "grad_norm": 0.272993009874352, |
| "learning_rate": 9.918405154592234e-06, |
| "loss": 1.6791, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.30303030303030304, |
| "grad_norm": 0.36447396005561716, |
| "learning_rate": 9.917076710214739e-06, |
| "loss": 1.8303, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.30486685032139577, |
| "grad_norm": 0.266743073761148, |
| "learning_rate": 9.915737629204754e-06, |
| "loss": 1.652, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.30670339761248855, |
| "grad_norm": 0.22394219745904015, |
| "learning_rate": 9.914387914458983e-06, |
| "loss": 1.5615, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.3085399449035813, |
| "grad_norm": 0.23075889412968112, |
| "learning_rate": 9.91302756889713e-06, |
| "loss": 1.6447, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.310376492194674, |
| "grad_norm": 0.2441975693564735, |
| "learning_rate": 9.911656595461899e-06, |
| "loss": 1.5981, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.3122130394857668, |
| "grad_norm": 0.21377545765414097, |
| "learning_rate": 9.910274997118982e-06, |
| "loss": 1.6066, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.3140495867768595, |
| "grad_norm": 0.21518096806340806, |
| "learning_rate": 9.908882776857057e-06, |
| "loss": 1.199, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.31588613406795224, |
| "grad_norm": 0.22989533865542952, |
| "learning_rate": 9.907479937687779e-06, |
| "loss": 1.6019, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.317722681359045, |
| "grad_norm": 0.24187157970795195, |
| "learning_rate": 9.906066482645774e-06, |
| "loss": 1.5453, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.31955922865013775, |
| "grad_norm": 0.28880156295741904, |
| "learning_rate": 9.904642414788627e-06, |
| "loss": 1.6518, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.3213957759412305, |
| "grad_norm": 0.3064989299701211, |
| "learning_rate": 9.903207737196892e-06, |
| "loss": 1.5614, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.32323232323232326, |
| "grad_norm": 0.5501951504862674, |
| "learning_rate": 9.90176245297406e-06, |
| "loss": 1.3988, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.325068870523416, |
| "grad_norm": 0.23086867624049656, |
| "learning_rate": 9.900306565246579e-06, |
| "loss": 1.4688, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.3269054178145087, |
| "grad_norm": 0.24109267951104685, |
| "learning_rate": 9.898840077163824e-06, |
| "loss": 1.342, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.3287419651056015, |
| "grad_norm": 0.221519141054226, |
| "learning_rate": 9.89736299189811e-06, |
| "loss": 1.6468, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.3305785123966942, |
| "grad_norm": 0.2134481965910651, |
| "learning_rate": 9.89587531264467e-06, |
| "loss": 1.4762, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.33241505968778695, |
| "grad_norm": 0.30273218459666634, |
| "learning_rate": 9.894377042621654e-06, |
| "loss": 1.2952, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.3342516069788797, |
| "grad_norm": 0.2643249012553608, |
| "learning_rate": 9.892868185070125e-06, |
| "loss": 1.5922, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.33608815426997246, |
| "grad_norm": 0.3017798778461224, |
| "learning_rate": 9.891348743254046e-06, |
| "loss": 1.3327, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.3379247015610652, |
| "grad_norm": 0.198099058736144, |
| "learning_rate": 9.889818720460281e-06, |
| "loss": 1.4528, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.3397612488521579, |
| "grad_norm": 0.2342720457991802, |
| "learning_rate": 9.888278119998573e-06, |
| "loss": 1.396, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.3415977961432507, |
| "grad_norm": 0.30360601805904236, |
| "learning_rate": 9.886726945201556e-06, |
| "loss": 1.5982, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.3434343434343434, |
| "grad_norm": 0.2890684354336481, |
| "learning_rate": 9.885165199424738e-06, |
| "loss": 1.3935, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.34527089072543615, |
| "grad_norm": 0.2574557316071565, |
| "learning_rate": 9.883592886046486e-06, |
| "loss": 1.449, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.34710743801652894, |
| "grad_norm": 0.23195985985184547, |
| "learning_rate": 9.882010008468038e-06, |
| "loss": 1.4909, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.34894398530762166, |
| "grad_norm": 0.2539011848151687, |
| "learning_rate": 9.880416570113472e-06, |
| "loss": 1.5123, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.3507805325987144, |
| "grad_norm": 0.26954455617431194, |
| "learning_rate": 9.878812574429722e-06, |
| "loss": 1.4445, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.3526170798898072, |
| "grad_norm": 0.23141695311529595, |
| "learning_rate": 9.877198024886553e-06, |
| "loss": 1.9045, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.3544536271808999, |
| "grad_norm": 0.2750943021523168, |
| "learning_rate": 9.875572924976568e-06, |
| "loss": 1.5868, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.3562901744719926, |
| "grad_norm": 0.23421966078059409, |
| "learning_rate": 9.873937278215181e-06, |
| "loss": 1.7534, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.3581267217630854, |
| "grad_norm": 0.24409550991932608, |
| "learning_rate": 9.87229108814063e-06, |
| "loss": 1.4753, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.35996326905417814, |
| "grad_norm": 0.24998063066925763, |
| "learning_rate": 9.870634358313956e-06, |
| "loss": 1.6574, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.36179981634527086, |
| "grad_norm": 0.2630317204109116, |
| "learning_rate": 9.868967092319003e-06, |
| "loss": 1.6794, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.36363636363636365, |
| "grad_norm": 0.21473616516429722, |
| "learning_rate": 9.867289293762403e-06, |
| "loss": 1.7626, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.3654729109274564, |
| "grad_norm": 0.3020374891635284, |
| "learning_rate": 9.865600966273576e-06, |
| "loss": 1.5234, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.3673094582185491, |
| "grad_norm": 0.24122137539024896, |
| "learning_rate": 9.863902113504713e-06, |
| "loss": 1.5807, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.3691460055096419, |
| "grad_norm": 0.2240345557985464, |
| "learning_rate": 9.86219273913078e-06, |
| "loss": 1.4635, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.3709825528007346, |
| "grad_norm": 0.2561505890958098, |
| "learning_rate": 9.860472846849498e-06, |
| "loss": 1.5781, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.37281910009182734, |
| "grad_norm": 0.2629334369467521, |
| "learning_rate": 9.858742440381343e-06, |
| "loss": 1.3698, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.3746556473829201, |
| "grad_norm": 0.2643820816427679, |
| "learning_rate": 9.857001523469534e-06, |
| "loss": 1.6831, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.37649219467401285, |
| "grad_norm": 0.27568924761133784, |
| "learning_rate": 9.855250099880026e-06, |
| "loss": 1.5586, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.3783287419651056, |
| "grad_norm": 0.23562831853645116, |
| "learning_rate": 9.853488173401504e-06, |
| "loss": 1.4529, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.38016528925619836, |
| "grad_norm": 0.24270051421435648, |
| "learning_rate": 9.851715747845372e-06, |
| "loss": 1.5449, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.3820018365472911, |
| "grad_norm": 0.3141794855298513, |
| "learning_rate": 9.849932827045746e-06, |
| "loss": 1.6011, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.3838383838383838, |
| "grad_norm": 0.2347451989106533, |
| "learning_rate": 9.848139414859441e-06, |
| "loss": 1.7994, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.3856749311294766, |
| "grad_norm": 0.22998314428761488, |
| "learning_rate": 9.846335515165974e-06, |
| "loss": 1.5024, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.3875114784205693, |
| "grad_norm": 0.2914601119925799, |
| "learning_rate": 9.844521131867546e-06, |
| "loss": 1.6718, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.38934802571166205, |
| "grad_norm": 0.27307242607178106, |
| "learning_rate": 9.842696268889032e-06, |
| "loss": 1.6705, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.39118457300275483, |
| "grad_norm": 0.26989044058601575, |
| "learning_rate": 9.840860930177984e-06, |
| "loss": 1.5762, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.39302112029384756, |
| "grad_norm": 0.23544376195311703, |
| "learning_rate": 9.839015119704607e-06, |
| "loss": 1.4928, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.3948576675849403, |
| "grad_norm": 0.40380733954206144, |
| "learning_rate": 9.837158841461767e-06, |
| "loss": 1.5587, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.39669421487603307, |
| "grad_norm": 0.2416750029017525, |
| "learning_rate": 9.835292099464965e-06, |
| "loss": 1.4265, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.3985307621671258, |
| "grad_norm": 0.20943162169896912, |
| "learning_rate": 9.833414897752346e-06, |
| "loss": 1.5836, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.4003673094582185, |
| "grad_norm": 0.24319317049956413, |
| "learning_rate": 9.831527240384677e-06, |
| "loss": 1.6154, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.4022038567493113, |
| "grad_norm": 0.2375469014345421, |
| "learning_rate": 9.829629131445342e-06, |
| "loss": 1.5652, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.40404040404040403, |
| "grad_norm": 0.27739989413008637, |
| "learning_rate": 9.827720575040335e-06, |
| "loss": 1.5439, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.40587695133149676, |
| "grad_norm": 0.24247147004672653, |
| "learning_rate": 9.825801575298248e-06, |
| "loss": 1.4613, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.40771349862258954, |
| "grad_norm": 0.29413816874548104, |
| "learning_rate": 9.82387213637027e-06, |
| "loss": 1.4005, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.40955004591368227, |
| "grad_norm": 0.21318983016138546, |
| "learning_rate": 9.821932262430164e-06, |
| "loss": 1.4747, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.411386593204775, |
| "grad_norm": 0.24655890709082884, |
| "learning_rate": 9.819981957674273e-06, |
| "loss": 1.6599, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.4132231404958678, |
| "grad_norm": 0.23420376177876423, |
| "learning_rate": 9.818021226321502e-06, |
| "loss": 1.5039, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.4150596877869605, |
| "grad_norm": 0.23379966460981552, |
| "learning_rate": 9.816050072613306e-06, |
| "loss": 1.775, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.41689623507805323, |
| "grad_norm": 0.30210714633151925, |
| "learning_rate": 9.814068500813692e-06, |
| "loss": 1.5692, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.418732782369146, |
| "grad_norm": 0.24124025669836516, |
| "learning_rate": 9.812076515209201e-06, |
| "loss": 1.5682, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.42056932966023874, |
| "grad_norm": 0.40694547415364124, |
| "learning_rate": 9.8100741201089e-06, |
| "loss": 1.3886, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.42240587695133147, |
| "grad_norm": 0.5017797555556736, |
| "learning_rate": 9.808061319844376e-06, |
| "loss": 1.5096, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.42424242424242425, |
| "grad_norm": 0.2395927998641603, |
| "learning_rate": 9.806038118769724e-06, |
| "loss": 1.6511, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.426078971533517, |
| "grad_norm": 0.26346897129489716, |
| "learning_rate": 9.804004521261537e-06, |
| "loss": 1.4852, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.4279155188246097, |
| "grad_norm": 0.21927054884906805, |
| "learning_rate": 9.801960531718898e-06, |
| "loss": 1.3796, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.4297520661157025, |
| "grad_norm": 0.21216613988150154, |
| "learning_rate": 9.79990615456337e-06, |
| "loss": 1.3236, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.4315886134067952, |
| "grad_norm": 0.2439837140444034, |
| "learning_rate": 9.797841394238987e-06, |
| "loss": 1.4752, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.43342516069788795, |
| "grad_norm": 0.20264650772176668, |
| "learning_rate": 9.795766255212242e-06, |
| "loss": 1.4633, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.43526170798898073, |
| "grad_norm": 0.2214142026219714, |
| "learning_rate": 9.793680741972084e-06, |
| "loss": 1.524, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.43709825528007346, |
| "grad_norm": 0.32400463641987054, |
| "learning_rate": 9.791584859029901e-06, |
| "loss": 1.7717, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.4389348025711662, |
| "grad_norm": 0.25353704234527624, |
| "learning_rate": 9.789478610919508e-06, |
| "loss": 1.4503, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.44077134986225897, |
| "grad_norm": 0.28666088570706005, |
| "learning_rate": 9.787362002197147e-06, |
| "loss": 1.5374, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.4426078971533517, |
| "grad_norm": 0.2575712788223055, |
| "learning_rate": 9.785235037441473e-06, |
| "loss": 1.3976, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "grad_norm": 0.235205219080783, |
| "learning_rate": 9.783097721253543e-06, |
| "loss": 1.6037, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.4462809917355372, |
| "grad_norm": 0.23780080068966725, |
| "learning_rate": 9.780950058256802e-06, |
| "loss": 1.679, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.44811753902662993, |
| "grad_norm": 0.3404150158565561, |
| "learning_rate": 9.778792053097079e-06, |
| "loss": 1.3573, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.44995408631772266, |
| "grad_norm": 0.21082516759926104, |
| "learning_rate": 9.77662371044258e-06, |
| "loss": 1.4932, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.45179063360881544, |
| "grad_norm": 0.24031601796602936, |
| "learning_rate": 9.774445034983864e-06, |
| "loss": 1.7397, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.45362718089990817, |
| "grad_norm": 0.2207009467162113, |
| "learning_rate": 9.77225603143385e-06, |
| "loss": 1.585, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.4554637281910009, |
| "grad_norm": 0.2551403718915401, |
| "learning_rate": 9.770056704527797e-06, |
| "loss": 1.4924, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.4573002754820937, |
| "grad_norm": 0.23525171894308203, |
| "learning_rate": 9.767847059023292e-06, |
| "loss": 1.5658, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.4591368227731864, |
| "grad_norm": 0.2340311514550259, |
| "learning_rate": 9.765627099700248e-06, |
| "loss": 1.7361, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.46097337006427913, |
| "grad_norm": 0.3425610884720096, |
| "learning_rate": 9.763396831360884e-06, |
| "loss": 1.4723, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.4628099173553719, |
| "grad_norm": 0.232643381531555, |
| "learning_rate": 9.761156258829723e-06, |
| "loss": 1.5543, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.46464646464646464, |
| "grad_norm": 0.3601600310692427, |
| "learning_rate": 9.75890538695358e-06, |
| "loss": 1.4195, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.46648301193755737, |
| "grad_norm": 0.2625087171937245, |
| "learning_rate": 9.756644220601541e-06, |
| "loss": 1.7209, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.46831955922865015, |
| "grad_norm": 0.36091521146763844, |
| "learning_rate": 9.75437276466497e-06, |
| "loss": 1.4435, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.4701561065197429, |
| "grad_norm": 0.22970766162190284, |
| "learning_rate": 9.752091024057485e-06, |
| "loss": 1.3894, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.4719926538108356, |
| "grad_norm": 0.2708039374301605, |
| "learning_rate": 9.749799003714954e-06, |
| "loss": 1.4717, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.4738292011019284, |
| "grad_norm": 0.22940138762480203, |
| "learning_rate": 9.747496708595482e-06, |
| "loss": 1.532, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.4756657483930211, |
| "grad_norm": 0.27196638223072084, |
| "learning_rate": 9.745184143679398e-06, |
| "loss": 1.4425, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.47750229568411384, |
| "grad_norm": 0.24883578158794614, |
| "learning_rate": 9.742861313969246e-06, |
| "loss": 1.4882, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.4793388429752066, |
| "grad_norm": 0.24541193855388124, |
| "learning_rate": 9.74052822448978e-06, |
| "loss": 1.4851, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.48117539026629935, |
| "grad_norm": 0.249310294245501, |
| "learning_rate": 9.738184880287946e-06, |
| "loss": 1.6463, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.4830119375573921, |
| "grad_norm": 0.25707806827316415, |
| "learning_rate": 9.735831286432869e-06, |
| "loss": 1.6118, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.48484848484848486, |
| "grad_norm": 0.22547510302041615, |
| "learning_rate": 9.733467448015849e-06, |
| "loss": 1.3343, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.4866850321395776, |
| "grad_norm": 0.21415297945638267, |
| "learning_rate": 9.731093370150349e-06, |
| "loss": 1.8938, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.4885215794306703, |
| "grad_norm": 0.25553884577553115, |
| "learning_rate": 9.728709057971979e-06, |
| "loss": 1.6681, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.4903581267217631, |
| "grad_norm": 0.23479399192488148, |
| "learning_rate": 9.72631451663849e-06, |
| "loss": 1.3781, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.4921946740128558, |
| "grad_norm": 0.2549932602973557, |
| "learning_rate": 9.723909751329759e-06, |
| "loss": 1.444, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.49403122130394855, |
| "grad_norm": 0.24071196589287586, |
| "learning_rate": 9.721494767247779e-06, |
| "loss": 1.5817, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.49586776859504134, |
| "grad_norm": 0.23040782897953827, |
| "learning_rate": 9.719069569616653e-06, |
| "loss": 1.2528, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.49770431588613406, |
| "grad_norm": 0.2371564859647457, |
| "learning_rate": 9.71663416368257e-06, |
| "loss": 1.7053, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.4995408631772268, |
| "grad_norm": 0.2489215879444709, |
| "learning_rate": 9.71418855471381e-06, |
| "loss": 1.4637, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.4995408631772268, |
| "eval_loss": 2.313441038131714, |
| "eval_runtime": 39.8034, |
| "eval_samples_per_second": 5.15, |
| "eval_steps_per_second": 0.452, |
| "step": 272 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 2176, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 272, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 235355418132480.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|