| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.19085225403093123, | |
| "eval_steps": 500, | |
| "global_step": 580, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0016452780519907865, | |
| "grad_norm": 1.5217232704162598, | |
| "learning_rate": 0.00012, | |
| "loss": 4.5224, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.003290556103981573, | |
| "grad_norm": 0.574784517288208, | |
| "learning_rate": 0.0001998022412656559, | |
| "loss": 3.2397, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.004935834155972359, | |
| "grad_norm": 0.5069302320480347, | |
| "learning_rate": 0.0001994726433750824, | |
| "loss": 2.7151, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.006581112207963146, | |
| "grad_norm": 0.5865616202354431, | |
| "learning_rate": 0.00019914304548450891, | |
| "loss": 2.3507, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.008226390259953932, | |
| "grad_norm": 0.5909593105316162, | |
| "learning_rate": 0.0001988134475939354, | |
| "loss": 2.3338, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.009871668311944718, | |
| "grad_norm": 0.4913259744644165, | |
| "learning_rate": 0.0001984838497033619, | |
| "loss": 2.3544, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.011516946363935505, | |
| "grad_norm": 0.4331064522266388, | |
| "learning_rate": 0.00019815425181278842, | |
| "loss": 2.2672, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.013162224415926292, | |
| "grad_norm": 0.42509177327156067, | |
| "learning_rate": 0.0001978246539222149, | |
| "loss": 2.2764, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.014807502467917079, | |
| "grad_norm": 0.415414035320282, | |
| "learning_rate": 0.0001974950560316414, | |
| "loss": 2.1035, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.016452780519907863, | |
| "grad_norm": 0.5336166024208069, | |
| "learning_rate": 0.0001971654581410679, | |
| "loss": 2.2284, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.01809805857189865, | |
| "grad_norm": 0.3719511926174164, | |
| "learning_rate": 0.0001968358602504944, | |
| "loss": 2.1656, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.019743336623889437, | |
| "grad_norm": 0.49993693828582764, | |
| "learning_rate": 0.00019650626235992092, | |
| "loss": 2.0299, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.021388614675880224, | |
| "grad_norm": 0.5019952654838562, | |
| "learning_rate": 0.0001961766644693474, | |
| "loss": 2.0951, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.02303389272787101, | |
| "grad_norm": 0.4793168306350708, | |
| "learning_rate": 0.0001958470665787739, | |
| "loss": 2.1505, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.024679170779861797, | |
| "grad_norm": 0.47024038434028625, | |
| "learning_rate": 0.0001955174686882004, | |
| "loss": 2.054, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.026324448831852584, | |
| "grad_norm": 0.5920702815055847, | |
| "learning_rate": 0.0001951878707976269, | |
| "loss": 2.1802, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02796972688384337, | |
| "grad_norm": 0.4485560655593872, | |
| "learning_rate": 0.0001948582729070534, | |
| "loss": 2.1609, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.029615004935834157, | |
| "grad_norm": 0.41952958703041077, | |
| "learning_rate": 0.0001945286750164799, | |
| "loss": 2.0584, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.03126028298782494, | |
| "grad_norm": 0.40542706847190857, | |
| "learning_rate": 0.00019419907712590643, | |
| "loss": 2.22, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.03290556103981573, | |
| "grad_norm": 0.438912570476532, | |
| "learning_rate": 0.0001938694792353329, | |
| "loss": 2.1478, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.034550839091806514, | |
| "grad_norm": 0.46580272912979126, | |
| "learning_rate": 0.0001935398813447594, | |
| "loss": 2.1227, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.0361961171437973, | |
| "grad_norm": 0.5012261271476746, | |
| "learning_rate": 0.0001932102834541859, | |
| "loss": 2.1274, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.03784139519578809, | |
| "grad_norm": 0.5175459384918213, | |
| "learning_rate": 0.0001928806855636124, | |
| "loss": 2.0726, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.039486673247778874, | |
| "grad_norm": 0.5441685318946838, | |
| "learning_rate": 0.00019255108767303892, | |
| "loss": 2.1269, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.04113195129976966, | |
| "grad_norm": 0.4177902042865753, | |
| "learning_rate": 0.0001922214897824654, | |
| "loss": 2.164, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.04277722935176045, | |
| "grad_norm": 0.44649720191955566, | |
| "learning_rate": 0.0001918918918918919, | |
| "loss": 2.0883, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.044422507403751234, | |
| "grad_norm": 0.46839994192123413, | |
| "learning_rate": 0.0001915622940013184, | |
| "loss": 2.0448, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.04606778545574202, | |
| "grad_norm": 0.4343637228012085, | |
| "learning_rate": 0.0001912326961107449, | |
| "loss": 2.0282, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.04771306350773281, | |
| "grad_norm": 0.4241706132888794, | |
| "learning_rate": 0.00019090309822017141, | |
| "loss": 2.0895, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.049358341559723594, | |
| "grad_norm": 0.44053712487220764, | |
| "learning_rate": 0.0001905735003295979, | |
| "loss": 2.0925, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.05100361961171438, | |
| "grad_norm": 0.39026254415512085, | |
| "learning_rate": 0.0001902439024390244, | |
| "loss": 2.1519, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.05264889766370517, | |
| "grad_norm": 0.455168217420578, | |
| "learning_rate": 0.0001899143045484509, | |
| "loss": 2.1623, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.054294175715695954, | |
| "grad_norm": 0.4873504042625427, | |
| "learning_rate": 0.0001895847066578774, | |
| "loss": 1.9314, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.05593945376768674, | |
| "grad_norm": 0.5435200929641724, | |
| "learning_rate": 0.0001892551087673039, | |
| "loss": 2.0674, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.05758473181967753, | |
| "grad_norm": 0.44813185930252075, | |
| "learning_rate": 0.0001889255108767304, | |
| "loss": 2.0042, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.059230009871668314, | |
| "grad_norm": 0.5993271470069885, | |
| "learning_rate": 0.0001885959129861569, | |
| "loss": 2.0942, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.0608752879236591, | |
| "grad_norm": 0.5044869780540466, | |
| "learning_rate": 0.0001882663150955834, | |
| "loss": 2.118, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.06252056597564988, | |
| "grad_norm": 0.4713231325149536, | |
| "learning_rate": 0.00018793671720500988, | |
| "loss": 2.1232, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.06416584402764067, | |
| "grad_norm": 0.5351199507713318, | |
| "learning_rate": 0.0001876071193144364, | |
| "loss": 2.0956, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.06581112207963145, | |
| "grad_norm": 0.380096971988678, | |
| "learning_rate": 0.0001872775214238629, | |
| "loss": 2.0466, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.06745640013162224, | |
| "grad_norm": 0.4392818510532379, | |
| "learning_rate": 0.0001869479235332894, | |
| "loss": 2.0503, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.06910167818361303, | |
| "grad_norm": 0.49540552496910095, | |
| "learning_rate": 0.00018661832564271588, | |
| "loss": 2.0771, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.07074695623560381, | |
| "grad_norm": 0.5129232406616211, | |
| "learning_rate": 0.00018628872775214238, | |
| "loss": 1.9724, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.0723922342875946, | |
| "grad_norm": 0.4697638750076294, | |
| "learning_rate": 0.0001859591298615689, | |
| "loss": 2.03, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.07403751233958539, | |
| "grad_norm": 0.4250948131084442, | |
| "learning_rate": 0.0001856295319709954, | |
| "loss": 2.0235, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.07568279039157617, | |
| "grad_norm": 0.5197622776031494, | |
| "learning_rate": 0.0001852999340804219, | |
| "loss": 1.991, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.07732806844356696, | |
| "grad_norm": 0.45986393094062805, | |
| "learning_rate": 0.0001849703361898484, | |
| "loss": 2.0191, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.07897334649555775, | |
| "grad_norm": 0.5618834495544434, | |
| "learning_rate": 0.00018464073829927487, | |
| "loss": 1.9859, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.08061862454754853, | |
| "grad_norm": 0.5400542616844177, | |
| "learning_rate": 0.0001843111404087014, | |
| "loss": 1.9633, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.08226390259953932, | |
| "grad_norm": 0.5259667038917542, | |
| "learning_rate": 0.0001839815425181279, | |
| "loss": 1.8768, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.08390918065153011, | |
| "grad_norm": 0.5015618801116943, | |
| "learning_rate": 0.0001836519446275544, | |
| "loss": 2.0329, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.0855544587035209, | |
| "grad_norm": 0.4835856854915619, | |
| "learning_rate": 0.0001833223467369809, | |
| "loss": 1.9815, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.08719973675551168, | |
| "grad_norm": 0.4211411774158478, | |
| "learning_rate": 0.0001829927488464074, | |
| "loss": 1.9899, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.08884501480750247, | |
| "grad_norm": 0.4507792294025421, | |
| "learning_rate": 0.0001826631509558339, | |
| "loss": 1.9865, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.09049029285949325, | |
| "grad_norm": 0.5402964353561401, | |
| "learning_rate": 0.00018233355306526038, | |
| "loss": 1.902, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.09213557091148404, | |
| "grad_norm": 0.4574088454246521, | |
| "learning_rate": 0.0001820039551746869, | |
| "loss": 1.9727, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.09378084896347483, | |
| "grad_norm": 0.4615534842014313, | |
| "learning_rate": 0.0001816743572841134, | |
| "loss": 2.0582, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.09542612701546561, | |
| "grad_norm": 0.5126486420631409, | |
| "learning_rate": 0.0001813447593935399, | |
| "loss": 2.0274, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.0970714050674564, | |
| "grad_norm": 0.6757667660713196, | |
| "learning_rate": 0.00018101516150296638, | |
| "loss": 1.879, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.09871668311944719, | |
| "grad_norm": 0.49488508701324463, | |
| "learning_rate": 0.00018068556361239288, | |
| "loss": 1.9348, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.10036196117143797, | |
| "grad_norm": 0.5860428810119629, | |
| "learning_rate": 0.0001803559657218194, | |
| "loss": 1.9276, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.10200723922342876, | |
| "grad_norm": 0.5148414373397827, | |
| "learning_rate": 0.0001800263678312459, | |
| "loss": 2.0563, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.10365251727541955, | |
| "grad_norm": 0.5046892762184143, | |
| "learning_rate": 0.00017969676994067238, | |
| "loss": 2.0009, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.10529779532741033, | |
| "grad_norm": 0.4465779960155487, | |
| "learning_rate": 0.0001793671720500989, | |
| "loss": 1.9842, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.10694307337940112, | |
| "grad_norm": 0.4488319158554077, | |
| "learning_rate": 0.00017903757415952537, | |
| "loss": 1.9881, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.10858835143139191, | |
| "grad_norm": 0.46680882573127747, | |
| "learning_rate": 0.0001787079762689519, | |
| "loss": 2.0471, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.1102336294833827, | |
| "grad_norm": 0.5483986139297485, | |
| "learning_rate": 0.00017837837837837839, | |
| "loss": 2.0581, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.11187890753537348, | |
| "grad_norm": 0.4938408434391022, | |
| "learning_rate": 0.00017804878048780488, | |
| "loss": 1.9712, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.11352418558736427, | |
| "grad_norm": 0.4176371991634369, | |
| "learning_rate": 0.0001777191825972314, | |
| "loss": 1.9019, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.11516946363935505, | |
| "grad_norm": 0.45936137437820435, | |
| "learning_rate": 0.0001773895847066579, | |
| "loss": 1.8773, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.11681474169134584, | |
| "grad_norm": 0.5166374444961548, | |
| "learning_rate": 0.0001770599868160844, | |
| "loss": 2.0059, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.11846001974333663, | |
| "grad_norm": 0.5485665202140808, | |
| "learning_rate": 0.00017673038892551088, | |
| "loss": 1.9634, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.12010529779532741, | |
| "grad_norm": 0.44683098793029785, | |
| "learning_rate": 0.00017640079103493737, | |
| "loss": 1.9239, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.1217505758473182, | |
| "grad_norm": 0.4426558315753937, | |
| "learning_rate": 0.0001760711931443639, | |
| "loss": 1.9467, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.12339585389930899, | |
| "grad_norm": 0.45059794187545776, | |
| "learning_rate": 0.0001757415952537904, | |
| "loss": 1.9099, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.12504113195129976, | |
| "grad_norm": 0.47326767444610596, | |
| "learning_rate": 0.00017541199736321688, | |
| "loss": 1.9521, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.12668641000329056, | |
| "grad_norm": 0.4886496961116791, | |
| "learning_rate": 0.00017508239947264337, | |
| "loss": 1.9336, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.12833168805528133, | |
| "grad_norm": 0.4394533336162567, | |
| "learning_rate": 0.00017475280158206987, | |
| "loss": 1.8583, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.12997696610727213, | |
| "grad_norm": 0.5217518210411072, | |
| "learning_rate": 0.0001744232036914964, | |
| "loss": 1.9916, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.1316222441592629, | |
| "grad_norm": 0.44888633489608765, | |
| "learning_rate": 0.00017409360580092288, | |
| "loss": 2.0022, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.1332675222112537, | |
| "grad_norm": 0.5385366678237915, | |
| "learning_rate": 0.0001737640079103494, | |
| "loss": 1.9492, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.13491280026324448, | |
| "grad_norm": 0.4314708113670349, | |
| "learning_rate": 0.00017343441001977587, | |
| "loss": 2.0088, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.13655807831523528, | |
| "grad_norm": 0.4006335735321045, | |
| "learning_rate": 0.00017310481212920236, | |
| "loss": 1.9524, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.13820335636722605, | |
| "grad_norm": 0.5291544198989868, | |
| "learning_rate": 0.00017277521423862888, | |
| "loss": 1.8593, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.13984863441921686, | |
| "grad_norm": 0.47129592299461365, | |
| "learning_rate": 0.00017244561634805538, | |
| "loss": 1.9542, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.14149391247120763, | |
| "grad_norm": 0.449595183134079, | |
| "learning_rate": 0.0001721160184574819, | |
| "loss": 1.9294, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.14313919052319843, | |
| "grad_norm": 0.4410437047481537, | |
| "learning_rate": 0.0001717864205669084, | |
| "loss": 1.9209, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.1447844685751892, | |
| "grad_norm": 0.5655415654182434, | |
| "learning_rate": 0.00017145682267633488, | |
| "loss": 1.9486, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.14642974662718, | |
| "grad_norm": 0.5219452381134033, | |
| "learning_rate": 0.00017112722478576138, | |
| "loss": 1.8922, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.14807502467917077, | |
| "grad_norm": 0.49918806552886963, | |
| "learning_rate": 0.00017079762689518787, | |
| "loss": 1.92, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.14972030273116158, | |
| "grad_norm": 0.4410875141620636, | |
| "learning_rate": 0.0001704680290046144, | |
| "loss": 1.9363, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.15136558078315235, | |
| "grad_norm": 0.4709133207798004, | |
| "learning_rate": 0.00017013843111404089, | |
| "loss": 1.8803, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.15301085883514315, | |
| "grad_norm": 0.447390079498291, | |
| "learning_rate": 0.00016980883322346738, | |
| "loss": 2.0388, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.15465613688713392, | |
| "grad_norm": 0.4439023435115814, | |
| "learning_rate": 0.00016947923533289387, | |
| "loss": 1.9112, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.15630141493912472, | |
| "grad_norm": 0.5134996175765991, | |
| "learning_rate": 0.00016914963744232037, | |
| "loss": 1.9556, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.1579466929911155, | |
| "grad_norm": 0.5412283539772034, | |
| "learning_rate": 0.0001688200395517469, | |
| "loss": 1.899, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.1595919710431063, | |
| "grad_norm": 0.46328097581863403, | |
| "learning_rate": 0.00016849044166117338, | |
| "loss": 1.8476, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.16123724909509707, | |
| "grad_norm": 0.43716996908187866, | |
| "learning_rate": 0.00016816084377059987, | |
| "loss": 1.9059, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.16288252714708787, | |
| "grad_norm": 0.4769724905490875, | |
| "learning_rate": 0.00016783124588002637, | |
| "loss": 1.9186, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.16452780519907864, | |
| "grad_norm": 0.5047943592071533, | |
| "learning_rate": 0.00016750164798945286, | |
| "loss": 1.9161, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.16617308325106944, | |
| "grad_norm": 0.4556055963039398, | |
| "learning_rate": 0.00016717205009887938, | |
| "loss": 1.8834, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.16781836130306022, | |
| "grad_norm": 0.4692705571651459, | |
| "learning_rate": 0.00016684245220830588, | |
| "loss": 1.9091, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.16946363935505102, | |
| "grad_norm": 0.43482905626296997, | |
| "learning_rate": 0.00016651285431773237, | |
| "loss": 1.9405, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.1711089174070418, | |
| "grad_norm": 0.5708907246589661, | |
| "learning_rate": 0.0001661832564271589, | |
| "loss": 1.9027, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.1727541954590326, | |
| "grad_norm": 0.49181491136550903, | |
| "learning_rate": 0.00016585365853658536, | |
| "loss": 1.8526, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.17439947351102336, | |
| "grad_norm": 0.5000940561294556, | |
| "learning_rate": 0.00016552406064601188, | |
| "loss": 1.8588, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.17604475156301416, | |
| "grad_norm": 0.45289117097854614, | |
| "learning_rate": 0.00016519446275543837, | |
| "loss": 2.0358, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.17769002961500494, | |
| "grad_norm": 0.5227617621421814, | |
| "learning_rate": 0.00016486486486486486, | |
| "loss": 1.8512, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.17933530766699574, | |
| "grad_norm": 0.512492299079895, | |
| "learning_rate": 0.00016453526697429138, | |
| "loss": 1.9014, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.1809805857189865, | |
| "grad_norm": 0.4832890033721924, | |
| "learning_rate": 0.00016420566908371785, | |
| "loss": 1.9765, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.18262586377097728, | |
| "grad_norm": 0.4797350764274597, | |
| "learning_rate": 0.00016387607119314437, | |
| "loss": 1.8907, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.18427114182296808, | |
| "grad_norm": 0.48889973759651184, | |
| "learning_rate": 0.00016354647330257086, | |
| "loss": 1.9369, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.18591641987495885, | |
| "grad_norm": 0.44188860058784485, | |
| "learning_rate": 0.00016321687541199739, | |
| "loss": 1.8547, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.18756169792694966, | |
| "grad_norm": 0.5300605893135071, | |
| "learning_rate": 0.00016288727752142388, | |
| "loss": 1.8603, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.18920697597894043, | |
| "grad_norm": 0.5411728620529175, | |
| "learning_rate": 0.00016255767963085037, | |
| "loss": 1.8067, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.19085225403093123, | |
| "grad_norm": 0.5871673226356506, | |
| "learning_rate": 0.00016222808174027687, | |
| "loss": 1.8875, | |
| "step": 580 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 3039, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 10, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.1842159022647296e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |