| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.105875, |
| "eval_steps": 500, |
| "global_step": 2800, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00125, |
| "grad_norm": 0.32223036885261536, |
| "learning_rate": 9.890999999999999e-06, |
| "loss": 2.9122833251953124, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0025, |
| "grad_norm": 0.3097129762172699, |
| "learning_rate": 2.0881000000000002e-05, |
| "loss": 2.881389617919922, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.00375, |
| "grad_norm": 0.30452761054039, |
| "learning_rate": 3.1871e-05, |
| "loss": 2.8967803955078124, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.005, |
| "grad_norm": 0.2955208420753479, |
| "learning_rate": 4.2861e-05, |
| "loss": 2.8681930541992187, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.00625, |
| "grad_norm": 0.303114652633667, |
| "learning_rate": 5.3850999999999997e-05, |
| "loss": 2.8751144409179688, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0075, |
| "grad_norm": 0.299868106842041, |
| "learning_rate": 6.4841e-05, |
| "loss": 2.8749458312988283, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.00875, |
| "grad_norm": 0.31019559502601624, |
| "learning_rate": 7.5831e-05, |
| "loss": 2.860850524902344, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.30683860182762146, |
| "learning_rate": 8.6821e-05, |
| "loss": 2.8604888916015625, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.01125, |
| "grad_norm": 0.30718618631362915, |
| "learning_rate": 9.7811e-05, |
| "loss": 2.8761545181274415, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0125, |
| "grad_norm": 0.32211124897003174, |
| "learning_rate": 0.000108801, |
| "loss": 2.8468536376953124, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.01375, |
| "grad_norm": 0.31109386682510376, |
| "learning_rate": 0.000119791, |
| "loss": 2.8445552825927733, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.015, |
| "grad_norm": 0.3102831542491913, |
| "learning_rate": 0.000130781, |
| "loss": 2.864554214477539, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.01625, |
| "grad_norm": 0.3220812976360321, |
| "learning_rate": 0.000141771, |
| "loss": 2.8806329727172852, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.0175, |
| "grad_norm": 0.30876684188842773, |
| "learning_rate": 0.00015276099999999998, |
| "loss": 2.8452987670898438, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.01875, |
| "grad_norm": 0.31868258118629456, |
| "learning_rate": 0.000163751, |
| "loss": 2.8517589569091797, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.3087399899959564, |
| "learning_rate": 0.000174741, |
| "loss": 2.8347522735595705, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.02125, |
| "grad_norm": 0.3106062710285187, |
| "learning_rate": 0.000185731, |
| "loss": 2.85534553527832, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.0225, |
| "grad_norm": 0.32315531373023987, |
| "learning_rate": 0.00019672100000000002, |
| "loss": 2.858936309814453, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.02375, |
| "grad_norm": 0.3293415307998657, |
| "learning_rate": 0.000207711, |
| "loss": 2.8992713928222655, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.025, |
| "grad_norm": 0.3309278190135956, |
| "learning_rate": 0.000218701, |
| "loss": 2.863359832763672, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.02625, |
| "grad_norm": 0.3089866638183594, |
| "learning_rate": 0.0002197992779574687, |
| "loss": 2.8769275665283205, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.0275, |
| "grad_norm": 0.32934558391571045, |
| "learning_rate": 0.0002197967820201583, |
| "loss": 2.8595829010009766, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.02875, |
| "grad_norm": 0.3154727518558502, |
| "learning_rate": 0.00021979250331444358, |
| "loss": 2.8655704498291015, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.31462976336479187, |
| "learning_rate": 0.0002197864419097345, |
| "loss": 2.8554920196533202, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.03125, |
| "grad_norm": 0.3277083933353424, |
| "learning_rate": 0.00021977859790436047, |
| "loss": 2.896647262573242, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.0325, |
| "grad_norm": 0.3230266571044922, |
| "learning_rate": 0.00021976897142556858, |
| "loss": 2.8914859771728514, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.03375, |
| "grad_norm": 0.3097373843193054, |
| "learning_rate": 0.00021975756262952153, |
| "loss": 2.867509460449219, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.035, |
| "grad_norm": 0.31526896357536316, |
| "learning_rate": 0.00021974437170129525, |
| "loss": 2.861627388000488, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.03625, |
| "grad_norm": 0.3264971077442169, |
| "learning_rate": 0.0002197293988548756, |
| "loss": 2.8434619903564453, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.0375, |
| "grad_norm": 0.31985947489738464, |
| "learning_rate": 0.00021971264433315533, |
| "loss": 2.858683776855469, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.03875, |
| "grad_norm": 0.3007575571537018, |
| "learning_rate": 0.00021969410840792965, |
| "loss": 2.856831359863281, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.32181960344314575, |
| "learning_rate": 0.00021967379137989224, |
| "loss": 2.8669090270996094, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.04125, |
| "grad_norm": 0.3142366111278534, |
| "learning_rate": 0.00021965169357863014, |
| "loss": 2.864155578613281, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.0425, |
| "grad_norm": 0.31411442160606384, |
| "learning_rate": 0.00021962781536261853, |
| "loss": 2.8719043731689453, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.04375, |
| "grad_norm": 0.3069416880607605, |
| "learning_rate": 0.00021960215711921467, |
| "loss": 2.8788784027099608, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.045, |
| "grad_norm": 0.3287704586982727, |
| "learning_rate": 0.00021957471926465198, |
| "loss": 2.8686893463134764, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.04625, |
| "grad_norm": 0.32815802097320557, |
| "learning_rate": 0.00021954550224403304, |
| "loss": 2.872859573364258, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.0475, |
| "grad_norm": 0.3123241066932678, |
| "learning_rate": 0.0002195145065313224, |
| "loss": 2.861919975280762, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.04875, |
| "grad_norm": 0.3143039643764496, |
| "learning_rate": 0.0002194817326293389, |
| "loss": 2.8754358291625977, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.32305410504341125, |
| "learning_rate": 0.00021944718106974763, |
| "loss": 2.830820083618164, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.05125, |
| "grad_norm": 0.3187738060951233, |
| "learning_rate": 0.00021941085241305118, |
| "loss": 2.8469779968261717, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.0525, |
| "grad_norm": 0.3240358829498291, |
| "learning_rate": 0.00021937274724858052, |
| "loss": 2.872676467895508, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.05375, |
| "grad_norm": 0.3307654857635498, |
| "learning_rate": 0.00021933286619448556, |
| "loss": 2.868929862976074, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.055, |
| "grad_norm": 0.31867194175720215, |
| "learning_rate": 0.00021929120989772503, |
| "loss": 2.837067794799805, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.05625, |
| "grad_norm": 0.3109733760356903, |
| "learning_rate": 0.00021924777903405596, |
| "loss": 2.8356159210205076, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.0575, |
| "grad_norm": 0.33047595620155334, |
| "learning_rate": 0.00021920257430802295, |
| "loss": 2.859963226318359, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.05875, |
| "grad_norm": 0.3140341341495514, |
| "learning_rate": 0.00021915559645294634, |
| "loss": 2.864061737060547, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.30880865454673767, |
| "learning_rate": 0.0002191068462309107, |
| "loss": 2.8523515701293944, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.06125, |
| "grad_norm": 0.3137487769126892, |
| "learning_rate": 0.00021905632443275225, |
| "loss": 2.8639093399047852, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.0625, |
| "grad_norm": 0.340537965297699, |
| "learning_rate": 0.00021900403187804607, |
| "loss": 2.8927494049072267, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.06375, |
| "grad_norm": 0.31051260232925415, |
| "learning_rate": 0.00021894996941509282, |
| "loss": 2.840711212158203, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.065, |
| "grad_norm": 0.3152431547641754, |
| "learning_rate": 0.00021889413792090502, |
| "loss": 2.862700653076172, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.06625, |
| "grad_norm": 0.3119368553161621, |
| "learning_rate": 0.00021883653830119274, |
| "loss": 2.8526124954223633, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.0675, |
| "grad_norm": 0.31616318225860596, |
| "learning_rate": 0.00021877717149034896, |
| "loss": 2.855159378051758, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.06875, |
| "grad_norm": 0.30254286527633667, |
| "learning_rate": 0.00021871603845143443, |
| "loss": 2.854717254638672, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.3120061159133911, |
| "learning_rate": 0.000218653140176162, |
| "loss": 2.850946044921875, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.07125, |
| "grad_norm": 0.30754292011260986, |
| "learning_rate": 0.00021858847768488048, |
| "loss": 2.8386112213134767, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.0725, |
| "grad_norm": 0.3003309667110443, |
| "learning_rate": 0.0002185220520265583, |
| "loss": 2.858784294128418, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.07375, |
| "grad_norm": 0.31817367672920227, |
| "learning_rate": 0.00021845386427876622, |
| "loss": 2.8400810241699217, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.075, |
| "grad_norm": 0.31158024072647095, |
| "learning_rate": 0.00021838391554766004, |
| "loss": 2.8315425872802735, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.07625, |
| "grad_norm": 0.31356877088546753, |
| "learning_rate": 0.00021831220696796264, |
| "loss": 2.85643310546875, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.0775, |
| "grad_norm": 0.3057396411895752, |
| "learning_rate": 0.00021823873970294543, |
| "loss": 2.8644752502441406, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.07875, |
| "grad_norm": 0.30540961027145386, |
| "learning_rate": 0.00021816351494440965, |
| "loss": 2.840130615234375, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.3201405107975006, |
| "learning_rate": 0.00021808653391266697, |
| "loss": 2.81726016998291, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.08125, |
| "grad_norm": 0.31356149911880493, |
| "learning_rate": 0.0002180077978565196, |
| "loss": 2.841321563720703, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.0825, |
| "grad_norm": 0.3322322368621826, |
| "learning_rate": 0.00021792730805324023, |
| "loss": 2.833037185668945, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.08375, |
| "grad_norm": 0.3101900517940521, |
| "learning_rate": 0.0002178450658085511, |
| "loss": 2.8306228637695314, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.085, |
| "grad_norm": 0.31162750720977783, |
| "learning_rate": 0.00021776107245660307, |
| "loss": 2.849654769897461, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.08625, |
| "grad_norm": 0.3168909251689911, |
| "learning_rate": 0.00021767532935995366, |
| "loss": 2.882074737548828, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.0875, |
| "grad_norm": 0.2994805574417114, |
| "learning_rate": 0.00021758783790954515, |
| "loss": 2.834335517883301, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.08875, |
| "grad_norm": 0.3097037672996521, |
| "learning_rate": 0.0002174985995246821, |
| "loss": 2.8143672943115234, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.32182541489601135, |
| "learning_rate": 0.00021740761565300799, |
| "loss": 2.845683288574219, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.09125, |
| "grad_norm": 0.32514718174934387, |
| "learning_rate": 0.00021731488777048213, |
| "loss": 2.8221324920654296, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.0925, |
| "grad_norm": 0.3028743267059326, |
| "learning_rate": 0.0002172204173813555, |
| "loss": 2.8356349945068358, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.09375, |
| "grad_norm": 0.3133573830127716, |
| "learning_rate": 0.0002171242060181463, |
| "loss": 2.838234710693359, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.095, |
| "grad_norm": 0.3107962906360626, |
| "learning_rate": 0.00021702625524161527, |
| "loss": 2.8331020355224608, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.09625, |
| "grad_norm": 0.3266642987728119, |
| "learning_rate": 0.00021692656664074023, |
| "loss": 2.847811698913574, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.0975, |
| "grad_norm": 0.3073740303516388, |
| "learning_rate": 0.00021682514183269034, |
| "loss": 2.8351299285888674, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.09875, |
| "grad_norm": 0.3130224645137787, |
| "learning_rate": 0.00021672198246279985, |
| "loss": 2.7890214920043945, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.3218679130077362, |
| "learning_rate": 0.00021661709020454157, |
| "loss": 2.8209762573242188, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.10125, |
| "grad_norm": 0.2967888414859772, |
| "learning_rate": 0.00021651046675949938, |
| "loss": 2.819289207458496, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.1025, |
| "grad_norm": 0.32564085721969604, |
| "learning_rate": 0.000216402113857341, |
| "loss": 2.8148468017578123, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.10375, |
| "grad_norm": 0.30720430612564087, |
| "learning_rate": 0.00021629203325578962, |
| "loss": 2.832720947265625, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.105, |
| "grad_norm": 0.31553003191947937, |
| "learning_rate": 0.00021618022674059568, |
| "loss": 2.8313037872314455, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.10625, |
| "grad_norm": 0.2927679121494293, |
| "learning_rate": 0.0002160666961255076, |
| "loss": 2.822229766845703, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.1075, |
| "grad_norm": 0.3168841302394867, |
| "learning_rate": 0.00021595144325224264, |
| "loss": 2.8234331130981447, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.10875, |
| "grad_norm": 0.3195788860321045, |
| "learning_rate": 0.0002158344699904568, |
| "loss": 2.8171760559082033, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.30483055114746094, |
| "learning_rate": 0.00021571577823771462, |
| "loss": 2.82617244720459, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.11125, |
| "grad_norm": 0.31678906083106995, |
| "learning_rate": 0.00021559536991945833, |
| "loss": 2.8162193298339844, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.1125, |
| "grad_norm": 0.30715152621269226, |
| "learning_rate": 0.00021547324698897665, |
| "loss": 2.8252620697021484, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.11375, |
| "grad_norm": 0.303281307220459, |
| "learning_rate": 0.00021534941142737314, |
| "loss": 2.8220481872558594, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.115, |
| "grad_norm": 0.3042793571949005, |
| "learning_rate": 0.00021522386524353395, |
| "loss": 2.825517272949219, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.11625, |
| "grad_norm": 0.328135222196579, |
| "learning_rate": 0.00021509661047409534, |
| "loss": 2.806531524658203, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.1175, |
| "grad_norm": 0.30471575260162354, |
| "learning_rate": 0.00021496764918341058, |
| "loss": 2.8206180572509765, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.11875, |
| "grad_norm": 0.3096025884151459, |
| "learning_rate": 0.0002148369834635165, |
| "loss": 2.8001310348510744, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.30915719270706177, |
| "learning_rate": 0.0002147046154340995, |
| "loss": 2.838936996459961, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.12125, |
| "grad_norm": 0.30633190274238586, |
| "learning_rate": 0.00021457054724246125, |
| "loss": 2.8280914306640623, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.1225, |
| "grad_norm": 0.3169943392276764, |
| "learning_rate": 0.00021443478106348375, |
| "loss": 2.8208492279052733, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.12375, |
| "grad_norm": 0.31402623653411865, |
| "learning_rate": 0.00021429731909959417, |
| "loss": 2.803514099121094, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.125, |
| "grad_norm": 0.31064271926879883, |
| "learning_rate": 0.00021415816358072898, |
| "loss": 2.828254508972168, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.12625, |
| "grad_norm": 0.3190893530845642, |
| "learning_rate": 0.00021401731676429792, |
| "loss": 2.814365196228027, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.1275, |
| "grad_norm": 0.3164026141166687, |
| "learning_rate": 0.00021387478093514724, |
| "loss": 2.803851509094238, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.12875, |
| "grad_norm": 0.3159414529800415, |
| "learning_rate": 0.00021373055840552275, |
| "loss": 2.8509082794189453, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.3224294185638428, |
| "learning_rate": 0.00021358465151503225, |
| "loss": 2.789044952392578, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.13125, |
| "grad_norm": 0.31033849716186523, |
| "learning_rate": 0.00021343706263060765, |
| "loss": 2.8226268768310545, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.1325, |
| "grad_norm": 0.3086431622505188, |
| "learning_rate": 0.00021328779414646635, |
| "loss": 2.8077007293701173, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.13375, |
| "grad_norm": 0.3155769109725952, |
| "learning_rate": 0.00021313684848407282, |
| "loss": 2.8190916061401365, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.135, |
| "grad_norm": 0.3062079846858978, |
| "learning_rate": 0.0002129842280920988, |
| "loss": 2.8035049438476562, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.13625, |
| "grad_norm": 0.3113609552383423, |
| "learning_rate": 0.000212829935446384, |
| "loss": 2.808064842224121, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.1375, |
| "grad_norm": 0.3248916566371918, |
| "learning_rate": 0.0002126739730498958, |
| "loss": 2.8036418914794923, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.13875, |
| "grad_norm": 0.314177930355072, |
| "learning_rate": 0.00021251634343268845, |
| "loss": 2.8073291778564453, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.31667032837867737, |
| "learning_rate": 0.00021235704915186242, |
| "loss": 2.8247406005859377, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.14125, |
| "grad_norm": 0.32587730884552, |
| "learning_rate": 0.0002121960927915225, |
| "loss": 2.81424560546875, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.1425, |
| "grad_norm": 0.3099067509174347, |
| "learning_rate": 0.00021203347696273621, |
| "loss": 2.833042526245117, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.14375, |
| "grad_norm": 0.3176534175872803, |
| "learning_rate": 0.0002118692043034913, |
| "loss": 2.8056007385253907, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.145, |
| "grad_norm": 0.32910725474357605, |
| "learning_rate": 0.00021170327747865292, |
| "loss": 2.791951370239258, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.14625, |
| "grad_norm": 0.31169673800468445, |
| "learning_rate": 0.00021153569917992042, |
| "loss": 2.809808540344238, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.1475, |
| "grad_norm": 0.31293970346450806, |
| "learning_rate": 0.00021136647212578378, |
| "loss": 2.7925342559814452, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.14875, |
| "grad_norm": 0.3170998990535736, |
| "learning_rate": 0.00021119559906147942, |
| "loss": 2.809326934814453, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.30116304755210876, |
| "learning_rate": 0.00021102308275894555, |
| "loss": 2.7981502532958986, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.15125, |
| "grad_norm": 0.30669230222702026, |
| "learning_rate": 0.0002108489260167775, |
| "loss": 2.7857837677001953, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.1525, |
| "grad_norm": 0.30800774693489075, |
| "learning_rate": 0.00021067313166018209, |
| "loss": 2.806937408447266, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.15375, |
| "grad_norm": 0.3087230622768402, |
| "learning_rate": 0.00021049570254093184, |
| "loss": 2.8145347595214845, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.155, |
| "grad_norm": 0.30576276779174805, |
| "learning_rate": 0.00021031664153731874, |
| "loss": 2.806387710571289, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.15625, |
| "grad_norm": 0.3263702392578125, |
| "learning_rate": 0.00021013595155410756, |
| "loss": 2.773836135864258, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.1575, |
| "grad_norm": 0.3177431523799896, |
| "learning_rate": 0.00020995363552248867, |
| "loss": 2.7588844299316406, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.15875, |
| "grad_norm": 0.30336225032806396, |
| "learning_rate": 0.00020976969640003064, |
| "loss": 2.8113712310791015, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.32169830799102783, |
| "learning_rate": 0.000209584137170632, |
| "loss": 2.788315773010254, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.16125, |
| "grad_norm": 0.30413737893104553, |
| "learning_rate": 0.00020939696084447314, |
| "loss": 2.7458065032958983, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.1625, |
| "grad_norm": 0.3089154064655304, |
| "learning_rate": 0.00020920817045796727, |
| "loss": 2.7877056121826174, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.16375, |
| "grad_norm": 0.30705851316452026, |
| "learning_rate": 0.00020901776907371116, |
| "loss": 2.773893356323242, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.165, |
| "grad_norm": 0.3133976459503174, |
| "learning_rate": 0.00020882575978043566, |
| "loss": 2.784181594848633, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.16625, |
| "grad_norm": 0.31440430879592896, |
| "learning_rate": 0.00020863214569295533, |
| "loss": 2.8143083572387697, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.1675, |
| "grad_norm": 0.29583054780960083, |
| "learning_rate": 0.00020843692995211805, |
| "loss": 2.7985980987548826, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.16875, |
| "grad_norm": 0.3040190637111664, |
| "learning_rate": 0.0002082401157247541, |
| "loss": 2.774214744567871, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.30737006664276123, |
| "learning_rate": 0.00020804170620362475, |
| "loss": 2.803047943115234, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.17125, |
| "grad_norm": 0.30594661831855774, |
| "learning_rate": 0.0002078417046073704, |
| "loss": 2.7990367889404295, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.1725, |
| "grad_norm": 0.3074641823768616, |
| "learning_rate": 0.00020764011418045845, |
| "loss": 2.770071792602539, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.17375, |
| "grad_norm": 0.304598331451416, |
| "learning_rate": 0.00020743693819313063, |
| "loss": 2.7667999267578125, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.175, |
| "grad_norm": 0.32464832067489624, |
| "learning_rate": 0.00020723217994135003, |
| "loss": 2.8097129821777345, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.17625, |
| "grad_norm": 0.3164089620113373, |
| "learning_rate": 0.00020702584274674742, |
| "loss": 2.7955820083618166, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.1775, |
| "grad_norm": 0.310068279504776, |
| "learning_rate": 0.00020681792995656763, |
| "loss": 2.7704933166503904, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.17875, |
| "grad_norm": 0.3000030219554901, |
| "learning_rate": 0.00020660844494361513, |
| "loss": 2.8106201171875, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.3196377456188202, |
| "learning_rate": 0.00020639739110619917, |
| "loss": 2.7796897888183594, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.18125, |
| "grad_norm": 0.3006730079650879, |
| "learning_rate": 0.000206184771868079, |
| "loss": 2.791950225830078, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.1825, |
| "grad_norm": 0.3123355805873871, |
| "learning_rate": 0.000205970590678408, |
| "loss": 2.7847476959228517, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.18375, |
| "grad_norm": 0.31853538751602173, |
| "learning_rate": 0.00020575485101167782, |
| "loss": 2.7865251541137694, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.185, |
| "grad_norm": 0.30936041474342346, |
| "learning_rate": 0.0002055375563676622, |
| "loss": 2.7906095504760744, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.18625, |
| "grad_norm": 0.30842670798301697, |
| "learning_rate": 0.0002053187102713599, |
| "loss": 2.7754417419433595, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.1875, |
| "grad_norm": 0.3201216757297516, |
| "learning_rate": 0.00020509831627293766, |
| "loss": 2.796547698974609, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.18875, |
| "grad_norm": 0.3134450316429138, |
| "learning_rate": 0.00020487637794767275, |
| "loss": 2.7649627685546876, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.3114699721336365, |
| "learning_rate": 0.00020465289889589467, |
| "loss": 2.8279897689819338, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.19125, |
| "grad_norm": 0.3171784281730652, |
| "learning_rate": 0.00020442788274292704, |
| "loss": 2.776567840576172, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.1925, |
| "grad_norm": 0.30708587169647217, |
| "learning_rate": 0.00020420133313902856, |
| "loss": 2.786650466918945, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.19375, |
| "grad_norm": 0.3005415201187134, |
| "learning_rate": 0.00020397325375933387, |
| "loss": 2.7795650482177736, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.195, |
| "grad_norm": 0.30447477102279663, |
| "learning_rate": 0.0002037436483037941, |
| "loss": 2.7910282135009767, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.19625, |
| "grad_norm": 0.308108389377594, |
| "learning_rate": 0.0002035125204971165, |
| "loss": 2.7864933013916016, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.1975, |
| "grad_norm": 0.31156831979751587, |
| "learning_rate": 0.00020327987408870436, |
| "loss": 2.77624397277832, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.19875, |
| "grad_norm": 0.30407053232192993, |
| "learning_rate": 0.00020304571285259602, |
| "loss": 2.786225509643555, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.30873724818229675, |
| "learning_rate": 0.0002028100405874036, |
| "loss": 2.7831089019775392, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.20125, |
| "grad_norm": 0.305469274520874, |
| "learning_rate": 0.00020257286111625156, |
| "loss": 2.770510673522949, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.2025, |
| "grad_norm": 0.3133813440799713, |
| "learning_rate": 0.00020233417828671444, |
| "loss": 2.7937782287597654, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.20375, |
| "grad_norm": 0.3113247752189636, |
| "learning_rate": 0.00020209399597075463, |
| "loss": 2.811221694946289, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.205, |
| "grad_norm": 0.29653653502464294, |
| "learning_rate": 0.00020185231806465958, |
| "loss": 2.736056900024414, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.20625, |
| "grad_norm": 0.296674519777298, |
| "learning_rate": 0.00020160914848897833, |
| "loss": 2.773727035522461, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.2075, |
| "grad_norm": 0.3117091953754425, |
| "learning_rate": 0.00020136449118845828, |
| "loss": 2.7696605682373048, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.20875, |
| "grad_norm": 0.3065008819103241, |
| "learning_rate": 0.00020111835013198088, |
| "loss": 2.7859319686889648, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.30614563822746277, |
| "learning_rate": 0.00020087072931249746, |
| "loss": 2.761496734619141, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.21125, |
| "grad_norm": 0.3214632272720337, |
| "learning_rate": 0.0002006216327469644, |
| "loss": 2.795328140258789, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.2125, |
| "grad_norm": 0.3141666054725647, |
| "learning_rate": 0.00020037106447627772, |
| "loss": 2.7613990783691404, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.21375, |
| "grad_norm": 0.32107681035995483, |
| "learning_rate": 0.00020011902856520807, |
| "loss": 2.7515789031982423, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.215, |
| "grad_norm": 0.3231985867023468, |
| "learning_rate": 0.00019986552910233424, |
| "loss": 2.7852977752685546, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.21625, |
| "grad_norm": 0.3149876892566681, |
| "learning_rate": 0.00019961057019997707, |
| "loss": 2.754520225524902, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.2175, |
| "grad_norm": 0.31885862350463867, |
| "learning_rate": 0.00019935415599413287, |
| "loss": 2.7804901123046877, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.21875, |
| "grad_norm": 0.30009323358535767, |
| "learning_rate": 0.0001990962906444061, |
| "loss": 2.766156005859375, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.31249675154685974, |
| "learning_rate": 0.00019883697833394186, |
| "loss": 2.779193878173828, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.22125, |
| "grad_norm": 0.30822932720184326, |
| "learning_rate": 0.0001985762232693584, |
| "loss": 2.7579469680786133, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.2225, |
| "grad_norm": 0.3053094446659088, |
| "learning_rate": 0.00019831402968067843, |
| "loss": 2.76893310546875, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.22375, |
| "grad_norm": 0.31457704305648804, |
| "learning_rate": 0.00019805040182126077, |
| "loss": 2.781879425048828, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.225, |
| "grad_norm": 0.30379778146743774, |
| "learning_rate": 0.00019778534396773127, |
| "loss": 2.783489799499512, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.22625, |
| "grad_norm": 0.31210359930992126, |
| "learning_rate": 0.0001975188604199134, |
| "loss": 2.7574298858642576, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.2275, |
| "grad_norm": 0.3024740219116211, |
| "learning_rate": 0.00019725095550075862, |
| "loss": 2.7888748168945314, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.22875, |
| "grad_norm": 0.3073548376560211, |
| "learning_rate": 0.0001969816335562761, |
| "loss": 2.7340553283691404, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.31958791613578796, |
| "learning_rate": 0.00019671089895546232, |
| "loss": 2.804524230957031, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.23125, |
| "grad_norm": 0.3051760196685791, |
| "learning_rate": 0.00019643875609023017, |
| "loss": 2.775598907470703, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.2325, |
| "grad_norm": 0.3086925148963928, |
| "learning_rate": 0.0001961652093753377, |
| "loss": 2.7774431228637697, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.23375, |
| "grad_norm": 0.3144133388996124, |
| "learning_rate": 0.00019589026324831643, |
| "loss": 2.7702011108398437, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.235, |
| "grad_norm": 0.3036665916442871, |
| "learning_rate": 0.00019561392216939954, |
| "loss": 2.7927045822143555, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.23625, |
| "grad_norm": 0.30784451961517334, |
| "learning_rate": 0.00019533619062144934, |
| "loss": 2.741124725341797, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.2375, |
| "grad_norm": 0.29786407947540283, |
| "learning_rate": 0.00019505707310988463, |
| "loss": 2.748614501953125, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.23875, |
| "grad_norm": 0.30479830503463745, |
| "learning_rate": 0.00019477657416260764, |
| "loss": 2.7626161575317383, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.30530789494514465, |
| "learning_rate": 0.0001944946983299305, |
| "loss": 2.7705900192260744, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.24125, |
| "grad_norm": 0.30881696939468384, |
| "learning_rate": 0.00019421145018450145, |
| "loss": 2.7753509521484374, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.2425, |
| "grad_norm": 0.30990368127822876, |
| "learning_rate": 0.00019392683432123065, |
| "loss": 2.7618339538574217, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.24375, |
| "grad_norm": 0.30068239569664, |
| "learning_rate": 0.00019364085535721574, |
| "loss": 2.751456451416016, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.000875, |
| "grad_norm": 0.32766178250312805, |
| "learning_rate": 0.00019335351793166682, |
| "loss": 2.9953849792480467, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.002125, |
| "grad_norm": 0.31653207540512085, |
| "learning_rate": 0.00019306482670583127, |
| "loss": 2.7172924041748048, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.003375, |
| "grad_norm": 0.30188634991645813, |
| "learning_rate": 0.000192774786362918, |
| "loss": 2.718875503540039, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.004625, |
| "grad_norm": 0.3092830777168274, |
| "learning_rate": 0.00019248340160802165, |
| "loss": 2.6953250885009767, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.005875, |
| "grad_norm": 0.3100144863128662, |
| "learning_rate": 0.00019219067716804626, |
| "loss": 2.7128387451171876, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.007125, |
| "grad_norm": 0.32156386971473694, |
| "learning_rate": 0.00019189661779162834, |
| "loss": 2.7038270950317385, |
| "step": 2010 |
| }, |
| { |
| "epoch": 1.008375, |
| "grad_norm": 0.3106272518634796, |
| "learning_rate": 0.00019160122824906018, |
| "loss": 2.7032100677490236, |
| "step": 2020 |
| }, |
| { |
| "epoch": 1.009625, |
| "grad_norm": 0.3121194541454315, |
| "learning_rate": 0.00019130451333221226, |
| "loss": 2.6769741058349608, |
| "step": 2030 |
| }, |
| { |
| "epoch": 1.010875, |
| "grad_norm": 0.31094688177108765, |
| "learning_rate": 0.0001910064778544555, |
| "loss": 2.6934465408325194, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.012125, |
| "grad_norm": 0.3150351941585541, |
| "learning_rate": 0.00019070712665058325, |
| "loss": 2.674116325378418, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.013375, |
| "grad_norm": 0.3132378160953522, |
| "learning_rate": 0.00019040646457673294, |
| "loss": 2.667017936706543, |
| "step": 2060 |
| }, |
| { |
| "epoch": 1.014625, |
| "grad_norm": 0.30859819054603577, |
| "learning_rate": 0.000190104496510307, |
| "loss": 2.6529170989990236, |
| "step": 2070 |
| }, |
| { |
| "epoch": 1.015875, |
| "grad_norm": 0.3140536844730377, |
| "learning_rate": 0.00018980122734989425, |
| "loss": 2.649005889892578, |
| "step": 2080 |
| }, |
| { |
| "epoch": 1.017125, |
| "grad_norm": 0.3163485825061798, |
| "learning_rate": 0.00018949666201518978, |
| "loss": 2.658115005493164, |
| "step": 2090 |
| }, |
| { |
| "epoch": 1.018375, |
| "grad_norm": 0.3046296536922455, |
| "learning_rate": 0.00018919080544691573, |
| "loss": 2.637746238708496, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.019625, |
| "grad_norm": 0.30639058351516724, |
| "learning_rate": 0.00018888366260674078, |
| "loss": 2.6267181396484376, |
| "step": 2110 |
| }, |
| { |
| "epoch": 1.020875, |
| "grad_norm": 0.3216869831085205, |
| "learning_rate": 0.00018857523847719992, |
| "loss": 2.6571407318115234, |
| "step": 2120 |
| }, |
| { |
| "epoch": 1.022125, |
| "grad_norm": 0.32431310415267944, |
| "learning_rate": 0.0001882655380616133, |
| "loss": 2.6225955963134764, |
| "step": 2130 |
| }, |
| { |
| "epoch": 1.023375, |
| "grad_norm": 0.3109528720378876, |
| "learning_rate": 0.0001879545663840053, |
| "loss": 2.633950042724609, |
| "step": 2140 |
| }, |
| { |
| "epoch": 1.024625, |
| "grad_norm": 0.32065126299858093, |
| "learning_rate": 0.00018764232848902314, |
| "loss": 2.602225494384766, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.025875, |
| "grad_norm": 0.32300078868865967, |
| "learning_rate": 0.00018732882944185462, |
| "loss": 2.615239715576172, |
| "step": 2160 |
| }, |
| { |
| "epoch": 1.027125, |
| "grad_norm": 0.3188120424747467, |
| "learning_rate": 0.00018701407432814644, |
| "loss": 2.594603157043457, |
| "step": 2170 |
| }, |
| { |
| "epoch": 1.028375, |
| "grad_norm": 0.3217035233974457, |
| "learning_rate": 0.00018669806825392132, |
| "loss": 2.601702117919922, |
| "step": 2180 |
| }, |
| { |
| "epoch": 1.029625, |
| "grad_norm": 0.322839617729187, |
| "learning_rate": 0.00018638081634549534, |
| "loss": 2.597119903564453, |
| "step": 2190 |
| }, |
| { |
| "epoch": 1.030875, |
| "grad_norm": 0.33341312408447266, |
| "learning_rate": 0.00018606232374939488, |
| "loss": 2.604803466796875, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.032125, |
| "grad_norm": 0.32422640919685364, |
| "learning_rate": 0.00018574259563227289, |
| "loss": 2.622762107849121, |
| "step": 2210 |
| }, |
| { |
| "epoch": 1.033375, |
| "grad_norm": 0.3312685787677765, |
| "learning_rate": 0.00018542163718082523, |
| "loss": 2.623911666870117, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.034625, |
| "grad_norm": 0.3332018256187439, |
| "learning_rate": 0.0001850994536017065, |
| "loss": 2.5997699737548827, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.035875, |
| "grad_norm": 0.32356560230255127, |
| "learning_rate": 0.00018477605012144564, |
| "loss": 2.59320182800293, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.037125, |
| "grad_norm": 0.30938515067100525, |
| "learning_rate": 0.00018445143198636093, |
| "loss": 2.5783287048339845, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.038375, |
| "grad_norm": 0.33119791746139526, |
| "learning_rate": 0.0001841256044624752, |
| "loss": 2.6023700714111326, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.039625, |
| "grad_norm": 0.32936912775039673, |
| "learning_rate": 0.00018379857283543015, |
| "loss": 2.595666694641113, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.040875, |
| "grad_norm": 0.34784626960754395, |
| "learning_rate": 0.00018347034241040066, |
| "loss": 2.6071990966796874, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.042125, |
| "grad_norm": 0.3317442238330841, |
| "learning_rate": 0.00018314091851200881, |
| "loss": 2.5899078369140627, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.043375, |
| "grad_norm": 0.3433104157447815, |
| "learning_rate": 0.0001828103064842375, |
| "loss": 2.6167388916015626, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.044625, |
| "grad_norm": 0.3177641034126282, |
| "learning_rate": 0.00018247851169034358, |
| "loss": 2.5915859222412108, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.045875, |
| "grad_norm": 0.33989644050598145, |
| "learning_rate": 0.00018214553951277114, |
| "loss": 2.5995319366455076, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.047125, |
| "grad_norm": 0.3309226930141449, |
| "learning_rate": 0.00018181139535306383, |
| "loss": 2.5778053283691404, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.048375, |
| "grad_norm": 0.33091750741004944, |
| "learning_rate": 0.00018147608463177768, |
| "loss": 2.6125743865966795, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.049625, |
| "grad_norm": 0.32603928446769714, |
| "learning_rate": 0.00018113961278839268, |
| "loss": 2.5618928909301757, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.050875, |
| "grad_norm": 0.3253335654735565, |
| "learning_rate": 0.00018080198528122495, |
| "loss": 2.592588424682617, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.052125, |
| "grad_norm": 0.3284412622451782, |
| "learning_rate": 0.000180463207587338, |
| "loss": 2.568330764770508, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.053375, |
| "grad_norm": 0.32107362151145935, |
| "learning_rate": 0.00018012328520245385, |
| "loss": 2.5809921264648437, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.054625, |
| "grad_norm": 0.3348993957042694, |
| "learning_rate": 0.000179782223640864, |
| "loss": 2.5713642120361326, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.055875, |
| "grad_norm": 0.3235042095184326, |
| "learning_rate": 0.00017944002843533986, |
| "loss": 2.608296203613281, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.057125, |
| "grad_norm": 0.33322450518608093, |
| "learning_rate": 0.00017909670513704306, |
| "loss": 2.587118911743164, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.058375, |
| "grad_norm": 0.32530325651168823, |
| "learning_rate": 0.00017875225931543543, |
| "loss": 2.5887866973876954, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.059625, |
| "grad_norm": 0.3360804319381714, |
| "learning_rate": 0.00017840669655818856, |
| "loss": 2.598593902587891, |
| "step": 2430 |
| }, |
| { |
| "epoch": 1.060875, |
| "grad_norm": 0.3203558921813965, |
| "learning_rate": 0.00017806002247109317, |
| "loss": 2.5644474029541016, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.062125, |
| "grad_norm": 0.34525611996650696, |
| "learning_rate": 0.00017771224267796828, |
| "loss": 2.5811479568481444, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.063375, |
| "grad_norm": 0.33284473419189453, |
| "learning_rate": 0.00017736336282056986, |
| "loss": 2.5817935943603514, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.064625, |
| "grad_norm": 0.34238749742507935, |
| "learning_rate": 0.00017701338855849938, |
| "loss": 2.570195770263672, |
| "step": 2470 |
| }, |
| { |
| "epoch": 1.065875, |
| "grad_norm": 0.330721378326416, |
| "learning_rate": 0.0001766623255691119, |
| "loss": 2.5676502227783202, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.067125, |
| "grad_norm": 0.33618465065956116, |
| "learning_rate": 0.00017631017954742415, |
| "loss": 2.581513595581055, |
| "step": 2490 |
| }, |
| { |
| "epoch": 1.068375, |
| "grad_norm": 0.3335385322570801, |
| "learning_rate": 0.00017595695620602192, |
| "loss": 2.6056888580322264, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.069625, |
| "grad_norm": 0.3303595781326294, |
| "learning_rate": 0.00017560266127496753, |
| "loss": 2.5539363861083983, |
| "step": 2510 |
| }, |
| { |
| "epoch": 1.070875, |
| "grad_norm": 0.32198089361190796, |
| "learning_rate": 0.00017524730050170697, |
| "loss": 2.569991683959961, |
| "step": 2520 |
| }, |
| { |
| "epoch": 1.072125, |
| "grad_norm": 0.3235105872154236, |
| "learning_rate": 0.0001748908796509764, |
| "loss": 2.5943014144897463, |
| "step": 2530 |
| }, |
| { |
| "epoch": 1.073375, |
| "grad_norm": 0.3448514938354492, |
| "learning_rate": 0.00017453340450470885, |
| "loss": 2.5967823028564454, |
| "step": 2540 |
| }, |
| { |
| "epoch": 1.074625, |
| "grad_norm": 0.32868677377700806, |
| "learning_rate": 0.00017417488086194028, |
| "loss": 2.5600149154663088, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.075875, |
| "grad_norm": 0.3214341104030609, |
| "learning_rate": 0.00017381531453871567, |
| "loss": 2.5800102233886717, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.077125, |
| "grad_norm": 0.33103859424591064, |
| "learning_rate": 0.00017345471136799454, |
| "loss": 2.568808364868164, |
| "step": 2570 |
| }, |
| { |
| "epoch": 1.078375, |
| "grad_norm": 0.31372782588005066, |
| "learning_rate": 0.00017309307719955632, |
| "loss": 2.554553413391113, |
| "step": 2580 |
| }, |
| { |
| "epoch": 1.079625, |
| "grad_norm": 0.3474419116973877, |
| "learning_rate": 0.00017273041789990558, |
| "loss": 2.540375900268555, |
| "step": 2590 |
| }, |
| { |
| "epoch": 1.080875, |
| "grad_norm": 0.3302421569824219, |
| "learning_rate": 0.0001723667393521767, |
| "loss": 2.5536571502685548, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.082125, |
| "grad_norm": 0.3372521996498108, |
| "learning_rate": 0.00017200204745603854, |
| "loss": 2.5786903381347654, |
| "step": 2610 |
| }, |
| { |
| "epoch": 1.083375, |
| "grad_norm": 0.34310850501060486, |
| "learning_rate": 0.00017163634812759882, |
| "loss": 2.56533203125, |
| "step": 2620 |
| }, |
| { |
| "epoch": 1.084625, |
| "grad_norm": 0.3463296890258789, |
| "learning_rate": 0.00017126964729930784, |
| "loss": 2.5742265701293947, |
| "step": 2630 |
| }, |
| { |
| "epoch": 1.085875, |
| "grad_norm": 0.3372081220149994, |
| "learning_rate": 0.00017090195091986254, |
| "loss": 2.5609130859375, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.087125, |
| "grad_norm": 0.33471760153770447, |
| "learning_rate": 0.00017053326495410998, |
| "loss": 2.570426177978516, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.088375, |
| "grad_norm": 0.3420524299144745, |
| "learning_rate": 0.0001701635953829503, |
| "loss": 2.5492122650146483, |
| "step": 2660 |
| }, |
| { |
| "epoch": 1.089625, |
| "grad_norm": 0.33050400018692017, |
| "learning_rate": 0.0001697929482032401, |
| "loss": 2.5594730377197266, |
| "step": 2670 |
| }, |
| { |
| "epoch": 1.090875, |
| "grad_norm": 0.33682385087013245, |
| "learning_rate": 0.00016942132942769476, |
| "loss": 2.560088348388672, |
| "step": 2680 |
| }, |
| { |
| "epoch": 1.092125, |
| "grad_norm": 0.34267619252204895, |
| "learning_rate": 0.00016904874508479127, |
| "loss": 2.5474054336547853, |
| "step": 2690 |
| }, |
| { |
| "epoch": 1.093375, |
| "grad_norm": 0.33607542514801025, |
| "learning_rate": 0.00016867520121867006, |
| "loss": 2.5770172119140624, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.094625, |
| "grad_norm": 0.3332061171531677, |
| "learning_rate": 0.0001683007038890373, |
| "loss": 2.5588443756103514, |
| "step": 2710 |
| }, |
| { |
| "epoch": 1.095875, |
| "grad_norm": 0.34043437242507935, |
| "learning_rate": 0.00016792525917106642, |
| "loss": 2.5765233993530274, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.097125, |
| "grad_norm": 0.3437064290046692, |
| "learning_rate": 0.00016754887315529948, |
| "loss": 2.598227691650391, |
| "step": 2730 |
| }, |
| { |
| "epoch": 1.098375, |
| "grad_norm": 0.3502216935157776, |
| "learning_rate": 0.0001671715519475486, |
| "loss": 2.5620880126953125, |
| "step": 2740 |
| }, |
| { |
| "epoch": 1.099625, |
| "grad_norm": 0.32694822549819946, |
| "learning_rate": 0.00016679330166879665, |
| "loss": 2.5393630981445314, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.100875, |
| "grad_norm": 0.3365384042263031, |
| "learning_rate": 0.00016641412845509818, |
| "loss": 2.5454193115234376, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.102125, |
| "grad_norm": 0.3421364426612854, |
| "learning_rate": 0.00016603403845747984, |
| "loss": 2.5687324523925783, |
| "step": 2770 |
| }, |
| { |
| "epoch": 1.103375, |
| "grad_norm": 0.32685622572898865, |
| "learning_rate": 0.0001656530378418403, |
| "loss": 2.564802551269531, |
| "step": 2780 |
| }, |
| { |
| "epoch": 1.104625, |
| "grad_norm": 0.32674023509025574, |
| "learning_rate": 0.0001652711327888507, |
| "loss": 2.5603107452392577, |
| "step": 2790 |
| }, |
| { |
| "epoch": 1.105875, |
| "grad_norm": 0.3370579481124878, |
| "learning_rate": 0.00016488832949385402, |
| "loss": 2.537816619873047, |
| "step": 2800 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 8000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.7508190343633306e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|