| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9998824773768951, | |
| "eval_steps": 500, | |
| "global_step": 2127, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004700904924197908, | |
| "grad_norm": 4.338921070098877, | |
| "learning_rate": 9.984328475160634e-05, | |
| "loss": 10.2575, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.009401809848395816, | |
| "grad_norm": 7.385047435760498, | |
| "learning_rate": 9.968656950321266e-05, | |
| "loss": 8.7185, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.014102714772593724, | |
| "grad_norm": 8.822163581848145, | |
| "learning_rate": 9.9529854254819e-05, | |
| "loss": 6.0204, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.018803619696791632, | |
| "grad_norm": 5.8417487144470215, | |
| "learning_rate": 9.937313900642533e-05, | |
| "loss": 3.0597, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.02350452462098954, | |
| "grad_norm": 1.655334711074829, | |
| "learning_rate": 9.921642375803167e-05, | |
| "loss": 1.1494, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.028205429545187448, | |
| "grad_norm": 1.3080294132232666, | |
| "learning_rate": 9.905970850963799e-05, | |
| "loss": 0.6757, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.03290633446938536, | |
| "grad_norm": 3.848752975463867, | |
| "learning_rate": 9.890299326124433e-05, | |
| "loss": 0.6138, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.037607239393583264, | |
| "grad_norm": 1.6154791116714478, | |
| "learning_rate": 9.874627801285066e-05, | |
| "loss": 0.5528, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.042308144317781175, | |
| "grad_norm": 2.7457191944122314, | |
| "learning_rate": 9.858956276445698e-05, | |
| "loss": 0.5178, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.04700904924197908, | |
| "grad_norm": 1.5741740465164185, | |
| "learning_rate": 9.843284751606332e-05, | |
| "loss": 0.4926, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.05170995416617699, | |
| "grad_norm": 1.9329739809036255, | |
| "learning_rate": 9.827613226766966e-05, | |
| "loss": 0.5021, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.056410859090374896, | |
| "grad_norm": 2.349841594696045, | |
| "learning_rate": 9.811941701927598e-05, | |
| "loss": 0.4663, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.06111176401457281, | |
| "grad_norm": 1.533464789390564, | |
| "learning_rate": 9.796270177088231e-05, | |
| "loss": 0.4732, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.06581266893877072, | |
| "grad_norm": 3.456256866455078, | |
| "learning_rate": 9.780598652248865e-05, | |
| "loss": 0.4624, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.07051357386296862, | |
| "grad_norm": 2.1807849407196045, | |
| "learning_rate": 9.764927127409498e-05, | |
| "loss": 0.4604, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.07521447878716653, | |
| "grad_norm": 2.223006010055542, | |
| "learning_rate": 9.74925560257013e-05, | |
| "loss": 0.4526, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.07991538371136443, | |
| "grad_norm": 1.5491483211517334, | |
| "learning_rate": 9.733584077730764e-05, | |
| "loss": 0.4454, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.08461628863556235, | |
| "grad_norm": 0.9801793694496155, | |
| "learning_rate": 9.717912552891398e-05, | |
| "loss": 0.4565, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.08931719355976026, | |
| "grad_norm": 1.4270861148834229, | |
| "learning_rate": 9.70224102805203e-05, | |
| "loss": 0.4499, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.09401809848395816, | |
| "grad_norm": 1.1395397186279297, | |
| "learning_rate": 9.686569503212663e-05, | |
| "loss": 0.4452, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.09871900340815606, | |
| "grad_norm": 4.200931072235107, | |
| "learning_rate": 9.670897978373297e-05, | |
| "loss": 0.4351, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.10341990833235398, | |
| "grad_norm": 0.9431071281433105, | |
| "learning_rate": 9.655226453533929e-05, | |
| "loss": 0.4445, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.10812081325655189, | |
| "grad_norm": 1.4455124139785767, | |
| "learning_rate": 9.639554928694561e-05, | |
| "loss": 0.4287, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.11282171818074979, | |
| "grad_norm": 0.823145866394043, | |
| "learning_rate": 9.623883403855195e-05, | |
| "loss": 0.4267, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.1175226231049477, | |
| "grad_norm": 1.3632913827896118, | |
| "learning_rate": 9.608211879015829e-05, | |
| "loss": 0.4237, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.12222352802914561, | |
| "grad_norm": 2.7535300254821777, | |
| "learning_rate": 9.592540354176461e-05, | |
| "loss": 0.4515, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.12692443295334352, | |
| "grad_norm": 2.66774320602417, | |
| "learning_rate": 9.576868829337094e-05, | |
| "loss": 0.433, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.13162533787754144, | |
| "grad_norm": 1.775460958480835, | |
| "learning_rate": 9.561197304497728e-05, | |
| "loss": 0.4218, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.13632624280173933, | |
| "grad_norm": 0.793785810470581, | |
| "learning_rate": 9.545525779658361e-05, | |
| "loss": 0.4276, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.14102714772593725, | |
| "grad_norm": 3.0376343727111816, | |
| "learning_rate": 9.529854254818994e-05, | |
| "loss": 0.4304, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.14572805265013516, | |
| "grad_norm": 1.9679210186004639, | |
| "learning_rate": 9.514182729979627e-05, | |
| "loss": 0.4298, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.15042895757433306, | |
| "grad_norm": 1.2378153800964355, | |
| "learning_rate": 9.498511205140261e-05, | |
| "loss": 0.4092, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.15512986249853097, | |
| "grad_norm": 1.2181652784347534, | |
| "learning_rate": 9.482839680300893e-05, | |
| "loss": 0.4088, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.15983076742272886, | |
| "grad_norm": 1.3195668458938599, | |
| "learning_rate": 9.467168155461526e-05, | |
| "loss": 0.4027, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.16453167234692678, | |
| "grad_norm": 1.101954460144043, | |
| "learning_rate": 9.45149663062216e-05, | |
| "loss": 0.4299, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.1692325772711247, | |
| "grad_norm": 3.099776029586792, | |
| "learning_rate": 9.435825105782794e-05, | |
| "loss": 0.4082, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.1739334821953226, | |
| "grad_norm": 0.898208737373352, | |
| "learning_rate": 9.420153580943426e-05, | |
| "loss": 0.4148, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.1786343871195205, | |
| "grad_norm": 1.2357901334762573, | |
| "learning_rate": 9.404482056104059e-05, | |
| "loss": 0.4085, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.18333529204371843, | |
| "grad_norm": 1.5340213775634766, | |
| "learning_rate": 9.388810531264693e-05, | |
| "loss": 0.4186, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.18803619696791632, | |
| "grad_norm": 0.8089154958724976, | |
| "learning_rate": 9.373139006425325e-05, | |
| "loss": 0.4114, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.19273710189211424, | |
| "grad_norm": 1.6185780763626099, | |
| "learning_rate": 9.357467481585959e-05, | |
| "loss": 0.411, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.19743800681631213, | |
| "grad_norm": 0.8345431089401245, | |
| "learning_rate": 9.341795956746592e-05, | |
| "loss": 0.41, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.20213891174051005, | |
| "grad_norm": 2.1437103748321533, | |
| "learning_rate": 9.326124431907224e-05, | |
| "loss": 0.4085, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.20683981666470797, | |
| "grad_norm": 6.498870372772217, | |
| "learning_rate": 9.310452907067858e-05, | |
| "loss": 0.4038, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.21154072158890586, | |
| "grad_norm": 1.2205309867858887, | |
| "learning_rate": 9.294781382228491e-05, | |
| "loss": 0.4008, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.21624162651310377, | |
| "grad_norm": 1.253982424736023, | |
| "learning_rate": 9.279109857389125e-05, | |
| "loss": 0.3977, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.2209425314373017, | |
| "grad_norm": 3.27243971824646, | |
| "learning_rate": 9.263438332549757e-05, | |
| "loss": 0.4178, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.22564343636149958, | |
| "grad_norm": 2.4955010414123535, | |
| "learning_rate": 9.247766807710391e-05, | |
| "loss": 0.3986, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.2303443412856975, | |
| "grad_norm": 1.344426155090332, | |
| "learning_rate": 9.232095282871024e-05, | |
| "loss": 0.3956, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.2350452462098954, | |
| "grad_norm": 1.3640114068984985, | |
| "learning_rate": 9.216423758031657e-05, | |
| "loss": 0.3976, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.2397461511340933, | |
| "grad_norm": 0.8424949645996094, | |
| "learning_rate": 9.20075223319229e-05, | |
| "loss": 0.3913, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.24444705605829123, | |
| "grad_norm": 1.0781666040420532, | |
| "learning_rate": 9.185080708352924e-05, | |
| "loss": 0.4075, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.24914796098248912, | |
| "grad_norm": 0.9005725979804993, | |
| "learning_rate": 9.169409183513556e-05, | |
| "loss": 0.4197, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.25384886590668704, | |
| "grad_norm": 1.195514440536499, | |
| "learning_rate": 9.15373765867419e-05, | |
| "loss": 0.4066, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.25854977083088493, | |
| "grad_norm": 1.9088181257247925, | |
| "learning_rate": 9.138066133834823e-05, | |
| "loss": 0.4091, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.2632506757550829, | |
| "grad_norm": 0.9328345060348511, | |
| "learning_rate": 9.122394608995457e-05, | |
| "loss": 0.3988, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.26795158067928077, | |
| "grad_norm": 2.4227659702301025, | |
| "learning_rate": 9.106723084156089e-05, | |
| "loss": 0.391, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.27265248560347866, | |
| "grad_norm": 1.7708094120025635, | |
| "learning_rate": 9.091051559316722e-05, | |
| "loss": 0.3982, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.2773533905276766, | |
| "grad_norm": 1.006110668182373, | |
| "learning_rate": 9.075380034477356e-05, | |
| "loss": 0.394, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.2820542954518745, | |
| "grad_norm": 0.9397707581520081, | |
| "learning_rate": 9.059708509637988e-05, | |
| "loss": 0.3898, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.2867552003760724, | |
| "grad_norm": 1.3863813877105713, | |
| "learning_rate": 9.044036984798622e-05, | |
| "loss": 0.3898, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.29145610530027033, | |
| "grad_norm": 1.7617021799087524, | |
| "learning_rate": 9.028365459959255e-05, | |
| "loss": 0.3957, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.2961570102244682, | |
| "grad_norm": 1.261723518371582, | |
| "learning_rate": 9.012693935119887e-05, | |
| "loss": 0.3964, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.3008579151486661, | |
| "grad_norm": 0.7548590302467346, | |
| "learning_rate": 8.997022410280521e-05, | |
| "loss": 0.3995, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.305558820072864, | |
| "grad_norm": 1.4837889671325684, | |
| "learning_rate": 8.981350885441154e-05, | |
| "loss": 0.3729, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.31025972499706195, | |
| "grad_norm": 1.868475317955017, | |
| "learning_rate": 8.965679360601788e-05, | |
| "loss": 0.3701, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.31496062992125984, | |
| "grad_norm": 0.9629729390144348, | |
| "learning_rate": 8.95000783576242e-05, | |
| "loss": 0.399, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.31966153484545773, | |
| "grad_norm": 1.5730236768722534, | |
| "learning_rate": 8.934336310923054e-05, | |
| "loss": 0.3854, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.3243624397696557, | |
| "grad_norm": 1.4589276313781738, | |
| "learning_rate": 8.918664786083686e-05, | |
| "loss": 0.3879, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.32906334469385357, | |
| "grad_norm": 4.129764080047607, | |
| "learning_rate": 8.90299326124432e-05, | |
| "loss": 0.3868, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.33376424961805146, | |
| "grad_norm": 1.2538539171218872, | |
| "learning_rate": 8.887321736404952e-05, | |
| "loss": 0.4014, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.3384651545422494, | |
| "grad_norm": 2.5101168155670166, | |
| "learning_rate": 8.871650211565585e-05, | |
| "loss": 0.3855, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.3431660594664473, | |
| "grad_norm": 2.11893630027771, | |
| "learning_rate": 8.855978686726219e-05, | |
| "loss": 0.3847, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.3478669643906452, | |
| "grad_norm": 2.154590129852295, | |
| "learning_rate": 8.840307161886851e-05, | |
| "loss": 0.3726, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.35256786931484313, | |
| "grad_norm": 0.9320923686027527, | |
| "learning_rate": 8.824635637047485e-05, | |
| "loss": 0.3876, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.357268774239041, | |
| "grad_norm": 1.1086455583572388, | |
| "learning_rate": 8.808964112208118e-05, | |
| "loss": 0.3813, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.3619696791632389, | |
| "grad_norm": 1.0036139488220215, | |
| "learning_rate": 8.793292587368752e-05, | |
| "loss": 0.3802, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.36667058408743686, | |
| "grad_norm": 1.2812787294387817, | |
| "learning_rate": 8.777621062529384e-05, | |
| "loss": 0.383, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.37137148901163475, | |
| "grad_norm": 1.9546316862106323, | |
| "learning_rate": 8.761949537690017e-05, | |
| "loss": 0.3775, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.37607239393583264, | |
| "grad_norm": 0.9139639139175415, | |
| "learning_rate": 8.746278012850651e-05, | |
| "loss": 0.3725, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.38077329886003053, | |
| "grad_norm": 1.282999038696289, | |
| "learning_rate": 8.730606488011283e-05, | |
| "loss": 0.382, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.3854742037842285, | |
| "grad_norm": 1.0331751108169556, | |
| "learning_rate": 8.714934963171917e-05, | |
| "loss": 0.3765, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.39017510870842637, | |
| "grad_norm": 3.0494372844696045, | |
| "learning_rate": 8.69926343833255e-05, | |
| "loss": 0.3773, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.39487601363262426, | |
| "grad_norm": 1.0024981498718262, | |
| "learning_rate": 8.683591913493183e-05, | |
| "loss": 0.3889, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.3995769185568222, | |
| "grad_norm": 1.606170892715454, | |
| "learning_rate": 8.667920388653816e-05, | |
| "loss": 0.3585, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.4042778234810201, | |
| "grad_norm": 1.0149178504943848, | |
| "learning_rate": 8.65224886381445e-05, | |
| "loss": 0.3893, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.408978728405218, | |
| "grad_norm": 1.744429588317871, | |
| "learning_rate": 8.636577338975083e-05, | |
| "loss": 0.3788, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.41367963332941593, | |
| "grad_norm": 2.3995683193206787, | |
| "learning_rate": 8.620905814135715e-05, | |
| "loss": 0.3594, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.4183805382536138, | |
| "grad_norm": 2.227412700653076, | |
| "learning_rate": 8.605234289296349e-05, | |
| "loss": 0.3853, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.4230814431778117, | |
| "grad_norm": 1.6415226459503174, | |
| "learning_rate": 8.589562764456982e-05, | |
| "loss": 0.3835, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.42778234810200966, | |
| "grad_norm": 1.140038251876831, | |
| "learning_rate": 8.573891239617615e-05, | |
| "loss": 0.3742, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.43248325302620755, | |
| "grad_norm": 2.5346789360046387, | |
| "learning_rate": 8.558219714778248e-05, | |
| "loss": 0.3886, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.43718415795040544, | |
| "grad_norm": 1.060520887374878, | |
| "learning_rate": 8.542548189938882e-05, | |
| "loss": 0.3852, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.4418850628746034, | |
| "grad_norm": 2.0687763690948486, | |
| "learning_rate": 8.526876665099514e-05, | |
| "loss": 0.3782, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.4465859677988013, | |
| "grad_norm": 3.9409232139587402, | |
| "learning_rate": 8.511205140260148e-05, | |
| "loss": 0.3907, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.45128687272299917, | |
| "grad_norm": 2.1936776638031006, | |
| "learning_rate": 8.495533615420781e-05, | |
| "loss": 0.3758, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.4559877776471971, | |
| "grad_norm": 0.9638668298721313, | |
| "learning_rate": 8.479862090581415e-05, | |
| "loss": 0.3812, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.460688682571395, | |
| "grad_norm": 1.0724684000015259, | |
| "learning_rate": 8.464190565742047e-05, | |
| "loss": 0.3649, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.4653895874955929, | |
| "grad_norm": 1.2330756187438965, | |
| "learning_rate": 8.44851904090268e-05, | |
| "loss": 0.3878, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.4700904924197908, | |
| "grad_norm": 1.3528395891189575, | |
| "learning_rate": 8.432847516063314e-05, | |
| "loss": 0.3795, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.47479139734398873, | |
| "grad_norm": 1.3239601850509644, | |
| "learning_rate": 8.417175991223946e-05, | |
| "loss": 0.3881, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.4794923022681866, | |
| "grad_norm": 0.8255568146705627, | |
| "learning_rate": 8.40150446638458e-05, | |
| "loss": 0.3712, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.4841932071923845, | |
| "grad_norm": 1.9237899780273438, | |
| "learning_rate": 8.385832941545213e-05, | |
| "loss": 0.3652, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.48889411211658246, | |
| "grad_norm": 1.6184757947921753, | |
| "learning_rate": 8.370161416705847e-05, | |
| "loss": 0.3646, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.49359501704078035, | |
| "grad_norm": 1.17612886428833, | |
| "learning_rate": 8.354489891866479e-05, | |
| "loss": 0.3751, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.49829592196497824, | |
| "grad_norm": 1.3280194997787476, | |
| "learning_rate": 8.338818367027113e-05, | |
| "loss": 0.3873, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.5029968268891761, | |
| "grad_norm": 1.4673656225204468, | |
| "learning_rate": 8.323146842187746e-05, | |
| "loss": 0.3651, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.5076977318133741, | |
| "grad_norm": 1.5769989490509033, | |
| "learning_rate": 8.307475317348378e-05, | |
| "loss": 0.3751, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.512398636737572, | |
| "grad_norm": 3.041260242462158, | |
| "learning_rate": 8.291803792509012e-05, | |
| "loss": 0.3754, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.5170995416617699, | |
| "grad_norm": 0.922623336315155, | |
| "learning_rate": 8.276132267669645e-05, | |
| "loss": 0.3722, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.5218004465859678, | |
| "grad_norm": 0.8527415990829468, | |
| "learning_rate": 8.260460742830278e-05, | |
| "loss": 0.3816, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.5265013515101657, | |
| "grad_norm": 1.1660065650939941, | |
| "learning_rate": 8.244789217990911e-05, | |
| "loss": 0.3844, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.5312022564343636, | |
| "grad_norm": 2.7265124320983887, | |
| "learning_rate": 8.229117693151545e-05, | |
| "loss": 0.3579, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.5359031613585615, | |
| "grad_norm": 0.9204573035240173, | |
| "learning_rate": 8.213446168312177e-05, | |
| "loss": 0.3848, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.5406040662827595, | |
| "grad_norm": 1.0934752225875854, | |
| "learning_rate": 8.197774643472809e-05, | |
| "loss": 0.3652, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.5453049712069573, | |
| "grad_norm": 0.9822810888290405, | |
| "learning_rate": 8.182103118633443e-05, | |
| "loss": 0.3784, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.5500058761311553, | |
| "grad_norm": 1.0165129899978638, | |
| "learning_rate": 8.166431593794076e-05, | |
| "loss": 0.3728, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.5547067810553532, | |
| "grad_norm": 1.2013338804244995, | |
| "learning_rate": 8.15076006895471e-05, | |
| "loss": 0.3552, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.559407685979551, | |
| "grad_norm": 1.170253872871399, | |
| "learning_rate": 8.135088544115342e-05, | |
| "loss": 0.3725, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.564108590903749, | |
| "grad_norm": 0.8883313536643982, | |
| "learning_rate": 8.119417019275976e-05, | |
| "loss": 0.3504, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.5688094958279469, | |
| "grad_norm": 2.107287883758545, | |
| "learning_rate": 8.103745494436609e-05, | |
| "loss": 0.3586, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.5735104007521448, | |
| "grad_norm": 1.9556785821914673, | |
| "learning_rate": 8.088073969597241e-05, | |
| "loss": 0.3596, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.5782113056763427, | |
| "grad_norm": 1.024415373802185, | |
| "learning_rate": 8.072402444757875e-05, | |
| "loss": 0.3642, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.5829122106005407, | |
| "grad_norm": 3.524789333343506, | |
| "learning_rate": 8.056730919918508e-05, | |
| "loss": 0.3695, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.5876131155247385, | |
| "grad_norm": 1.3371467590332031, | |
| "learning_rate": 8.04105939507914e-05, | |
| "loss": 0.3736, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.5923140204489364, | |
| "grad_norm": 1.91169011592865, | |
| "learning_rate": 8.025387870239774e-05, | |
| "loss": 0.3653, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.5970149253731343, | |
| "grad_norm": 0.9362313747406006, | |
| "learning_rate": 8.009716345400408e-05, | |
| "loss": 0.3813, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.6017158302973322, | |
| "grad_norm": 2.3379788398742676, | |
| "learning_rate": 7.994044820561041e-05, | |
| "loss": 0.3606, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.6064167352215302, | |
| "grad_norm": 1.9221323728561401, | |
| "learning_rate": 7.978373295721674e-05, | |
| "loss": 0.3706, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.611117640145728, | |
| "grad_norm": 2.1025121212005615, | |
| "learning_rate": 7.962701770882307e-05, | |
| "loss": 0.3585, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.615818545069926, | |
| "grad_norm": 1.7160892486572266, | |
| "learning_rate": 7.94703024604294e-05, | |
| "loss": 0.3849, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.6205194499941239, | |
| "grad_norm": 1.290818691253662, | |
| "learning_rate": 7.931358721203573e-05, | |
| "loss": 0.377, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.6252203549183217, | |
| "grad_norm": 0.9813281297683716, | |
| "learning_rate": 7.915687196364206e-05, | |
| "loss": 0.365, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.6299212598425197, | |
| "grad_norm": 0.9623616933822632, | |
| "learning_rate": 7.90001567152484e-05, | |
| "loss": 0.3496, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.6346221647667176, | |
| "grad_norm": 1.3078798055648804, | |
| "learning_rate": 7.884344146685473e-05, | |
| "loss": 0.3643, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.6393230696909155, | |
| "grad_norm": 0.9925209879875183, | |
| "learning_rate": 7.868672621846106e-05, | |
| "loss": 0.3608, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.6440239746151134, | |
| "grad_norm": 1.515331745147705, | |
| "learning_rate": 7.853001097006739e-05, | |
| "loss": 0.388, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.6487248795393113, | |
| "grad_norm": 2.109877586364746, | |
| "learning_rate": 7.837329572167373e-05, | |
| "loss": 0.3605, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.6534257844635092, | |
| "grad_norm": 1.320446491241455, | |
| "learning_rate": 7.821658047328005e-05, | |
| "loss": 0.3729, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.6581266893877071, | |
| "grad_norm": 1.2624431848526, | |
| "learning_rate": 7.805986522488639e-05, | |
| "loss": 0.3637, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.6628275943119051, | |
| "grad_norm": 1.3101025819778442, | |
| "learning_rate": 7.790314997649272e-05, | |
| "loss": 0.3738, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.6675284992361029, | |
| "grad_norm": 1.3762035369873047, | |
| "learning_rate": 7.774643472809904e-05, | |
| "loss": 0.364, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.6722294041603009, | |
| "grad_norm": 1.7407734394073486, | |
| "learning_rate": 7.758971947970538e-05, | |
| "loss": 0.3405, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.6769303090844988, | |
| "grad_norm": 1.209085464477539, | |
| "learning_rate": 7.743300423131171e-05, | |
| "loss": 0.3621, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.6816312140086966, | |
| "grad_norm": 2.3257131576538086, | |
| "learning_rate": 7.727628898291805e-05, | |
| "loss": 0.3524, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.6863321189328946, | |
| "grad_norm": 1.5234692096710205, | |
| "learning_rate": 7.711957373452437e-05, | |
| "loss": 0.3556, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.6910330238570925, | |
| "grad_norm": 2.3267507553100586, | |
| "learning_rate": 7.696285848613071e-05, | |
| "loss": 0.3589, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.6957339287812904, | |
| "grad_norm": 1.8741614818572998, | |
| "learning_rate": 7.680614323773704e-05, | |
| "loss": 0.3651, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.7004348337054883, | |
| "grad_norm": 2.3814918994903564, | |
| "learning_rate": 7.664942798934336e-05, | |
| "loss": 0.3568, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.7051357386296863, | |
| "grad_norm": 3.497832775115967, | |
| "learning_rate": 7.64927127409497e-05, | |
| "loss": 0.3723, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.7098366435538841, | |
| "grad_norm": 2.1036434173583984, | |
| "learning_rate": 7.633599749255604e-05, | |
| "loss": 0.3708, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.714537548478082, | |
| "grad_norm": 2.3688840866088867, | |
| "learning_rate": 7.617928224416236e-05, | |
| "loss": 0.3673, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.71923845340228, | |
| "grad_norm": 1.6410322189331055, | |
| "learning_rate": 7.60225669957687e-05, | |
| "loss": 0.3727, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.7239393583264778, | |
| "grad_norm": 1.0293692350387573, | |
| "learning_rate": 7.586585174737503e-05, | |
| "loss": 0.3751, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.7286402632506758, | |
| "grad_norm": 1.794756293296814, | |
| "learning_rate": 7.570913649898136e-05, | |
| "loss": 0.3612, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.7333411681748737, | |
| "grad_norm": 2.0058727264404297, | |
| "learning_rate": 7.555242125058769e-05, | |
| "loss": 0.3734, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.7380420730990715, | |
| "grad_norm": 1.8430676460266113, | |
| "learning_rate": 7.539570600219402e-05, | |
| "loss": 0.3533, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.7427429780232695, | |
| "grad_norm": 1.2136273384094238, | |
| "learning_rate": 7.523899075380036e-05, | |
| "loss": 0.3398, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.7474438829474674, | |
| "grad_norm": 0.9999972581863403, | |
| "learning_rate": 7.508227550540668e-05, | |
| "loss": 0.3322, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.7521447878716653, | |
| "grad_norm": 1.4663593769073486, | |
| "learning_rate": 7.4925560257013e-05, | |
| "loss": 0.3588, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.7568456927958632, | |
| "grad_norm": 0.8715286254882812, | |
| "learning_rate": 7.476884500861934e-05, | |
| "loss": 0.3566, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.7615465977200611, | |
| "grad_norm": 1.7180976867675781, | |
| "learning_rate": 7.461212976022567e-05, | |
| "loss": 0.3544, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.766247502644259, | |
| "grad_norm": 2.3892838954925537, | |
| "learning_rate": 7.4455414511832e-05, | |
| "loss": 0.3537, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.770948407568457, | |
| "grad_norm": 1.2725646495819092, | |
| "learning_rate": 7.429869926343833e-05, | |
| "loss": 0.3575, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.7756493124926548, | |
| "grad_norm": 1.1861917972564697, | |
| "learning_rate": 7.414198401504467e-05, | |
| "loss": 0.3588, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.7803502174168527, | |
| "grad_norm": 1.6121189594268799, | |
| "learning_rate": 7.398526876665099e-05, | |
| "loss": 0.3554, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.7850511223410507, | |
| "grad_norm": 1.183447003364563, | |
| "learning_rate": 7.382855351825732e-05, | |
| "loss": 0.3399, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.7897520272652485, | |
| "grad_norm": 0.9614543318748474, | |
| "learning_rate": 7.367183826986366e-05, | |
| "loss": 0.3505, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.7944529321894465, | |
| "grad_norm": 2.463824510574341, | |
| "learning_rate": 7.351512302147e-05, | |
| "loss": 0.3535, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.7991538371136444, | |
| "grad_norm": 1.1036416292190552, | |
| "learning_rate": 7.335840777307632e-05, | |
| "loss": 0.3492, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.8038547420378422, | |
| "grad_norm": 1.4613536596298218, | |
| "learning_rate": 7.320169252468265e-05, | |
| "loss": 0.351, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.8085556469620402, | |
| "grad_norm": 1.2614613771438599, | |
| "learning_rate": 7.304497727628899e-05, | |
| "loss": 0.3505, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.8132565518862381, | |
| "grad_norm": 1.1639400720596313, | |
| "learning_rate": 7.288826202789531e-05, | |
| "loss": 0.3657, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.817957456810436, | |
| "grad_norm": 1.6838788986206055, | |
| "learning_rate": 7.273154677950165e-05, | |
| "loss": 0.3503, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.8226583617346339, | |
| "grad_norm": 1.0945839881896973, | |
| "learning_rate": 7.257483153110798e-05, | |
| "loss": 0.3551, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.8273592666588319, | |
| "grad_norm": 1.1545921564102173, | |
| "learning_rate": 7.241811628271432e-05, | |
| "loss": 0.3488, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.8320601715830297, | |
| "grad_norm": 0.9361588954925537, | |
| "learning_rate": 7.226140103432064e-05, | |
| "loss": 0.3454, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.8367610765072276, | |
| "grad_norm": 0.959794819355011, | |
| "learning_rate": 7.210468578592697e-05, | |
| "loss": 0.3571, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.8414619814314256, | |
| "grad_norm": 1.1152044534683228, | |
| "learning_rate": 7.194797053753331e-05, | |
| "loss": 0.3713, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.8461628863556234, | |
| "grad_norm": 1.7348166704177856, | |
| "learning_rate": 7.179125528913963e-05, | |
| "loss": 0.3407, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.8508637912798214, | |
| "grad_norm": 2.3188705444335938, | |
| "learning_rate": 7.163454004074597e-05, | |
| "loss": 0.3489, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.8555646962040193, | |
| "grad_norm": 1.529905915260315, | |
| "learning_rate": 7.14778247923523e-05, | |
| "loss": 0.3385, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.8602656011282171, | |
| "grad_norm": 1.2101613283157349, | |
| "learning_rate": 7.132110954395862e-05, | |
| "loss": 0.355, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.8649665060524151, | |
| "grad_norm": 1.4338616132736206, | |
| "learning_rate": 7.116439429556496e-05, | |
| "loss": 0.3534, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.869667410976613, | |
| "grad_norm": 1.2460179328918457, | |
| "learning_rate": 7.10076790471713e-05, | |
| "loss": 0.3537, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.8743683159008109, | |
| "grad_norm": 1.6369551420211792, | |
| "learning_rate": 7.085096379877763e-05, | |
| "loss": 0.3514, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.8790692208250088, | |
| "grad_norm": 2.6983251571655273, | |
| "learning_rate": 7.069424855038395e-05, | |
| "loss": 0.348, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.8837701257492068, | |
| "grad_norm": 1.2817703485488892, | |
| "learning_rate": 7.053753330199029e-05, | |
| "loss": 0.3505, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.8884710306734046, | |
| "grad_norm": 1.0121359825134277, | |
| "learning_rate": 7.038081805359662e-05, | |
| "loss": 0.344, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.8931719355976026, | |
| "grad_norm": 0.9417720437049866, | |
| "learning_rate": 7.022410280520295e-05, | |
| "loss": 0.3462, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.8978728405218005, | |
| "grad_norm": 1.0144025087356567, | |
| "learning_rate": 7.006738755680928e-05, | |
| "loss": 0.3416, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.9025737454459983, | |
| "grad_norm": 2.533620834350586, | |
| "learning_rate": 6.991067230841562e-05, | |
| "loss": 0.3543, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.9072746503701963, | |
| "grad_norm": 1.5756601095199585, | |
| "learning_rate": 6.975395706002194e-05, | |
| "loss": 0.368, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.9119755552943942, | |
| "grad_norm": 1.2900676727294922, | |
| "learning_rate": 6.959724181162828e-05, | |
| "loss": 0.3368, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.9166764602185921, | |
| "grad_norm": 1.2235045433044434, | |
| "learning_rate": 6.944052656323461e-05, | |
| "loss": 0.3428, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.92137736514279, | |
| "grad_norm": 1.0844625234603882, | |
| "learning_rate": 6.928381131484095e-05, | |
| "loss": 0.365, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.9260782700669878, | |
| "grad_norm": 2.585019588470459, | |
| "learning_rate": 6.912709606644727e-05, | |
| "loss": 0.3394, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.9307791749911858, | |
| "grad_norm": 1.402716040611267, | |
| "learning_rate": 6.89703808180536e-05, | |
| "loss": 0.3513, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.9354800799153837, | |
| "grad_norm": 1.6604111194610596, | |
| "learning_rate": 6.881366556965994e-05, | |
| "loss": 0.3514, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.9401809848395816, | |
| "grad_norm": 1.0885512828826904, | |
| "learning_rate": 6.865695032126626e-05, | |
| "loss": 0.3504, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.9448818897637795, | |
| "grad_norm": 3.016390562057495, | |
| "learning_rate": 6.85002350728726e-05, | |
| "loss": 0.3607, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.9495827946879775, | |
| "grad_norm": 1.5509752035140991, | |
| "learning_rate": 6.834351982447893e-05, | |
| "loss": 0.3373, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.9542836996121753, | |
| "grad_norm": 1.8190217018127441, | |
| "learning_rate": 6.818680457608527e-05, | |
| "loss": 0.3412, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.9589846045363732, | |
| "grad_norm": 2.373183250427246, | |
| "learning_rate": 6.803008932769159e-05, | |
| "loss": 0.3455, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.9636855094605712, | |
| "grad_norm": 1.4944651126861572, | |
| "learning_rate": 6.787337407929793e-05, | |
| "loss": 0.3467, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.968386414384769, | |
| "grad_norm": 2.2049689292907715, | |
| "learning_rate": 6.771665883090425e-05, | |
| "loss": 0.3552, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.973087319308967, | |
| "grad_norm": 1.044244647026062, | |
| "learning_rate": 6.755994358251058e-05, | |
| "loss": 0.3475, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.9777882242331649, | |
| "grad_norm": 1.0703078508377075, | |
| "learning_rate": 6.74032283341169e-05, | |
| "loss": 0.3424, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.9824891291573628, | |
| "grad_norm": 1.2599198818206787, | |
| "learning_rate": 6.724651308572324e-05, | |
| "loss": 0.3513, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.9871900340815607, | |
| "grad_norm": 0.9976306557655334, | |
| "learning_rate": 6.708979783732958e-05, | |
| "loss": 0.3499, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.9918909390057586, | |
| "grad_norm": 1.1676925420761108, | |
| "learning_rate": 6.69330825889359e-05, | |
| "loss": 0.3362, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.9965918439299565, | |
| "grad_norm": 1.5144907236099243, | |
| "learning_rate": 6.677636734054223e-05, | |
| "loss": 0.3323, | |
| "step": 2120 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 6381, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.7331697868700385e+19, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |