| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9997185477061638, | |
| "eval_steps": 500, | |
| "global_step": 1776, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0005629045876723895, | |
| "grad_norm": 0.8329170942306519, | |
| "learning_rate": 1.1235955056179775e-06, | |
| "loss": 0.8354, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0028145229383619475, | |
| "grad_norm": 1.3410223722457886, | |
| "learning_rate": 5.617977528089888e-06, | |
| "loss": 1.725, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.005629045876723895, | |
| "grad_norm": 3.013509511947632, | |
| "learning_rate": 1.1235955056179776e-05, | |
| "loss": 1.6574, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.008443568815085843, | |
| "grad_norm": 2.5674707889556885, | |
| "learning_rate": 1.6853932584269665e-05, | |
| "loss": 1.2345, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.01125809175344779, | |
| "grad_norm": 2.4031896591186523, | |
| "learning_rate": 2.2471910112359552e-05, | |
| "loss": 1.5218, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.014072614691809739, | |
| "grad_norm": 1.1180998086929321, | |
| "learning_rate": 2.8089887640449443e-05, | |
| "loss": 1.2468, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.016887137630171686, | |
| "grad_norm": 3.0892934799194336, | |
| "learning_rate": 3.370786516853933e-05, | |
| "loss": 1.2287, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.019701660568533633, | |
| "grad_norm": 1.6239148378372192, | |
| "learning_rate": 3.9325842696629214e-05, | |
| "loss": 1.1948, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.02251618350689558, | |
| "grad_norm": 1.0267276763916016, | |
| "learning_rate": 4.4943820224719104e-05, | |
| "loss": 1.0779, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.02533070644525753, | |
| "grad_norm": 0.9497658610343933, | |
| "learning_rate": 5.0561797752808995e-05, | |
| "loss": 0.5863, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.028145229383619477, | |
| "grad_norm": 1.321252703666687, | |
| "learning_rate": 5.6179775280898885e-05, | |
| "loss": 0.9368, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.030959752321981424, | |
| "grad_norm": 1.583485722541809, | |
| "learning_rate": 6.179775280898876e-05, | |
| "loss": 0.7446, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.03377427526034337, | |
| "grad_norm": 1.6845484972000122, | |
| "learning_rate": 6.741573033707866e-05, | |
| "loss": 0.6362, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.03658879819870532, | |
| "grad_norm": 1.6922656297683716, | |
| "learning_rate": 7.303370786516854e-05, | |
| "loss": 0.6806, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.039403321137067265, | |
| "grad_norm": 1.3192064762115479, | |
| "learning_rate": 7.865168539325843e-05, | |
| "loss": 0.6754, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.042217844075429216, | |
| "grad_norm": 1.6908262968063354, | |
| "learning_rate": 8.426966292134831e-05, | |
| "loss": 0.7982, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.04503236701379116, | |
| "grad_norm": 1.8348135948181152, | |
| "learning_rate": 8.988764044943821e-05, | |
| "loss": 0.5052, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.04784688995215311, | |
| "grad_norm": 1.3342186212539673, | |
| "learning_rate": 9.550561797752809e-05, | |
| "loss": 0.8893, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.05066141289051506, | |
| "grad_norm": 1.4185314178466797, | |
| "learning_rate": 0.00010112359550561799, | |
| "loss": 0.8553, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.053475935828877004, | |
| "grad_norm": 2.145559310913086, | |
| "learning_rate": 0.00010674157303370786, | |
| "loss": 0.4912, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.056290458767238954, | |
| "grad_norm": 0.9930405020713806, | |
| "learning_rate": 0.00011235955056179777, | |
| "loss": 0.5233, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0591049817056009, | |
| "grad_norm": 1.733180046081543, | |
| "learning_rate": 0.00011797752808988764, | |
| "loss": 0.8077, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.06191950464396285, | |
| "grad_norm": 2.0119330883026123, | |
| "learning_rate": 0.00012359550561797752, | |
| "loss": 0.3477, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.0647340275823248, | |
| "grad_norm": 0.9684231281280518, | |
| "learning_rate": 0.00012921348314606744, | |
| "loss": 0.6151, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.06754855052068674, | |
| "grad_norm": 0.8802971839904785, | |
| "learning_rate": 0.00013483146067415732, | |
| "loss": 0.7356, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.07036307345904869, | |
| "grad_norm": 1.386469841003418, | |
| "learning_rate": 0.0001404494382022472, | |
| "loss": 0.6414, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.07317759639741064, | |
| "grad_norm": 1.002143144607544, | |
| "learning_rate": 0.0001460674157303371, | |
| "loss": 0.5492, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.07599211933577259, | |
| "grad_norm": 1.686691164970398, | |
| "learning_rate": 0.00015168539325842697, | |
| "loss": 0.5085, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.07880664227413453, | |
| "grad_norm": 1.0069544315338135, | |
| "learning_rate": 0.00015730337078651685, | |
| "loss": 0.7826, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.08162116521249649, | |
| "grad_norm": 1.2447969913482666, | |
| "learning_rate": 0.00016292134831460674, | |
| "loss": 0.648, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.08443568815085843, | |
| "grad_norm": 1.3154926300048828, | |
| "learning_rate": 0.00016853932584269662, | |
| "loss": 0.5664, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.08725021108922038, | |
| "grad_norm": 0.6880718469619751, | |
| "learning_rate": 0.00017415730337078653, | |
| "loss": 0.7669, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.09006473402758232, | |
| "grad_norm": 2.1054015159606934, | |
| "learning_rate": 0.00017977528089887642, | |
| "loss": 0.2233, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.09287925696594428, | |
| "grad_norm": 2.41121506690979, | |
| "learning_rate": 0.0001853932584269663, | |
| "loss": 0.496, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.09569377990430622, | |
| "grad_norm": 1.5123809576034546, | |
| "learning_rate": 0.00019101123595505618, | |
| "loss": 0.834, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.09850830284266816, | |
| "grad_norm": 0.9215847849845886, | |
| "learning_rate": 0.00019662921348314607, | |
| "loss": 0.5, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.10132282578103012, | |
| "grad_norm": 1.3919601440429688, | |
| "learning_rate": 0.00019999922700687455, | |
| "loss": 0.4441, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.10413734871939206, | |
| "grad_norm": 0.9048366546630859, | |
| "learning_rate": 0.00019999053097145492, | |
| "loss": 0.6331, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.10695187165775401, | |
| "grad_norm": 0.9864197969436646, | |
| "learning_rate": 0.000199972173502251, | |
| "loss": 0.7634, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.10976639459611595, | |
| "grad_norm": 1.0270127058029175, | |
| "learning_rate": 0.00019994415637302547, | |
| "loss": 0.5543, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.11258091753447791, | |
| "grad_norm": 1.1281390190124512, | |
| "learning_rate": 0.00019990648229089103, | |
| "loss": 0.7303, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.11539544047283985, | |
| "grad_norm": 0.7723137140274048, | |
| "learning_rate": 0.0001998591548960489, | |
| "loss": 0.5619, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.1182099634112018, | |
| "grad_norm": 0.5689103603363037, | |
| "learning_rate": 0.00019980217876143698, | |
| "loss": 0.6684, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.12102448634956375, | |
| "grad_norm": 2.17149019241333, | |
| "learning_rate": 0.0001997355593922881, | |
| "loss": 0.6625, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.1238390092879257, | |
| "grad_norm": 1.15530526638031, | |
| "learning_rate": 0.000199659303225598, | |
| "loss": 0.5027, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.12665353222628764, | |
| "grad_norm": 0.933122456073761, | |
| "learning_rate": 0.00019957341762950344, | |
| "loss": 0.4687, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.1294680551646496, | |
| "grad_norm": 0.9144341945648193, | |
| "learning_rate": 0.0001994779109025702, | |
| "loss": 0.855, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.13228257810301153, | |
| "grad_norm": 0.7820045351982117, | |
| "learning_rate": 0.00019937279227299131, | |
| "loss": 0.5393, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.13509710104137349, | |
| "grad_norm": 1.2164809703826904, | |
| "learning_rate": 0.00019925807189769533, | |
| "loss": 0.8503, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.13791162397973544, | |
| "grad_norm": 0.8118152022361755, | |
| "learning_rate": 0.0001991337608613649, | |
| "loss": 0.6439, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.14072614691809737, | |
| "grad_norm": 1.0747621059417725, | |
| "learning_rate": 0.00019899987117536587, | |
| "loss": 0.5872, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.14354066985645933, | |
| "grad_norm": 2.370304822921753, | |
| "learning_rate": 0.00019885641577658666, | |
| "loss": 0.5854, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.1463551927948213, | |
| "grad_norm": 0.8851784467697144, | |
| "learning_rate": 0.00019870340852618803, | |
| "loss": 0.6521, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.14916971573318322, | |
| "grad_norm": 1.0250051021575928, | |
| "learning_rate": 0.00019854086420826418, | |
| "loss": 0.7139, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.15198423867154517, | |
| "grad_norm": 0.45537930727005005, | |
| "learning_rate": 0.00019836879852841387, | |
| "loss": 0.5634, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.15479876160990713, | |
| "grad_norm": 0.8726896047592163, | |
| "learning_rate": 0.0001981872281122231, | |
| "loss": 0.4579, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.15761328454826906, | |
| "grad_norm": 1.0558394193649292, | |
| "learning_rate": 0.0001979961705036587, | |
| "loss": 0.7129, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.16042780748663102, | |
| "grad_norm": 0.9016023874282837, | |
| "learning_rate": 0.000197795644163373, | |
| "loss": 0.67, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.16324233042499298, | |
| "grad_norm": 1.2795695066452026, | |
| "learning_rate": 0.00019758566846692029, | |
| "loss": 0.6221, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.1660568533633549, | |
| "grad_norm": 0.9220066666603088, | |
| "learning_rate": 0.00019736626370288457, | |
| "loss": 0.6101, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.16887137630171686, | |
| "grad_norm": 0.8552654385566711, | |
| "learning_rate": 0.00019713745107091923, | |
| "loss": 0.5985, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.1716858992400788, | |
| "grad_norm": 0.9308118224143982, | |
| "learning_rate": 0.0001968992526796987, | |
| "loss": 0.7646, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.17450042217844075, | |
| "grad_norm": 1.162402868270874, | |
| "learning_rate": 0.00019665169154478213, | |
| "loss": 0.8352, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.1773149451168027, | |
| "grad_norm": 0.7268826961517334, | |
| "learning_rate": 0.00019639479158638972, | |
| "loss": 0.5086, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.18012946805516464, | |
| "grad_norm": 0.9068782329559326, | |
| "learning_rate": 0.00019612857762709124, | |
| "loss": 0.7053, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.1829439909935266, | |
| "grad_norm": 1.0525766611099243, | |
| "learning_rate": 0.0001958530753894078, | |
| "loss": 0.8175, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.18575851393188855, | |
| "grad_norm": 0.8571164608001709, | |
| "learning_rate": 0.0001955683114933263, | |
| "loss": 0.6137, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.18857303687025048, | |
| "grad_norm": 1.1700630187988281, | |
| "learning_rate": 0.00019527431345372738, | |
| "loss": 0.4982, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.19138755980861244, | |
| "grad_norm": 0.9724971055984497, | |
| "learning_rate": 0.00019497110967772692, | |
| "loss": 0.603, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.1942020827469744, | |
| "grad_norm": 1.852655053138733, | |
| "learning_rate": 0.000194658729461931, | |
| "loss": 0.5137, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.19701660568533633, | |
| "grad_norm": 1.018691062927246, | |
| "learning_rate": 0.00019433720298960537, | |
| "loss": 0.7321, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.19983112862369828, | |
| "grad_norm": 1.1386247873306274, | |
| "learning_rate": 0.00019400656132775908, | |
| "loss": 0.6923, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.20264565156206024, | |
| "grad_norm": 0.7724803686141968, | |
| "learning_rate": 0.0001936668364241424, | |
| "loss": 0.4945, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.20546017450042217, | |
| "grad_norm": 0.49107155203819275, | |
| "learning_rate": 0.00019331806110416027, | |
| "loss": 0.6003, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.20827469743878413, | |
| "grad_norm": 0.6642640829086304, | |
| "learning_rate": 0.00019296026906770027, | |
| "loss": 0.2862, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.2110892203771461, | |
| "grad_norm": 1.6255823373794556, | |
| "learning_rate": 0.0001925934948858767, | |
| "loss": 0.6002, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.21390374331550802, | |
| "grad_norm": 1.1237847805023193, | |
| "learning_rate": 0.00019221777399768998, | |
| "loss": 0.6876, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.21671826625386997, | |
| "grad_norm": 1.4545127153396606, | |
| "learning_rate": 0.00019183314270660248, | |
| "loss": 0.648, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.2195327891922319, | |
| "grad_norm": 1.7229652404785156, | |
| "learning_rate": 0.00019143963817703087, | |
| "loss": 0.4523, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.22234731213059386, | |
| "grad_norm": 1.0194050073623657, | |
| "learning_rate": 0.00019103729843075498, | |
| "loss": 0.5688, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.22516183506895582, | |
| "grad_norm": 1.1462711095809937, | |
| "learning_rate": 0.0001906261623432441, | |
| "loss": 0.608, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.22797635800731775, | |
| "grad_norm": 0.7898936867713928, | |
| "learning_rate": 0.00019020626963990074, | |
| "loss": 0.5458, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.2307908809456797, | |
| "grad_norm": 3.30757474899292, | |
| "learning_rate": 0.00018977766089222208, | |
| "loss": 0.491, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.23360540388404166, | |
| "grad_norm": 1.2833960056304932, | |
| "learning_rate": 0.00018934037751387997, | |
| "loss": 0.5706, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.2364199268224036, | |
| "grad_norm": 1.2525941133499146, | |
| "learning_rate": 0.00018889446175671926, | |
| "loss": 0.7208, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.23923444976076555, | |
| "grad_norm": 0.9608438014984131, | |
| "learning_rate": 0.00018843995670667543, | |
| "loss": 0.5394, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.2420489726991275, | |
| "grad_norm": 0.8090662360191345, | |
| "learning_rate": 0.00018797690627961132, | |
| "loss": 0.4402, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.24486349563748944, | |
| "grad_norm": 0.8819811344146729, | |
| "learning_rate": 0.00018750535521707396, | |
| "loss": 0.4728, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.2476780185758514, | |
| "grad_norm": 0.9342750906944275, | |
| "learning_rate": 0.0001870253490819713, | |
| "loss": 0.5013, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.25049254151421335, | |
| "grad_norm": 0.7526805400848389, | |
| "learning_rate": 0.00018653693425417002, | |
| "loss": 0.4606, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.2533070644525753, | |
| "grad_norm": 1.2073928117752075, | |
| "learning_rate": 0.00018604015792601396, | |
| "loss": 0.7602, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2561215873909372, | |
| "grad_norm": 0.48888304829597473, | |
| "learning_rate": 0.00018553506809776424, | |
| "loss": 0.8642, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.2589361103292992, | |
| "grad_norm": 1.424832820892334, | |
| "learning_rate": 0.00018502171357296144, | |
| "loss": 0.7515, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.2617506332676611, | |
| "grad_norm": 0.4335147738456726, | |
| "learning_rate": 0.00018450014395370983, | |
| "loss": 0.6242, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.26456515620602306, | |
| "grad_norm": 1.4052842855453491, | |
| "learning_rate": 0.00018397040963588488, | |
| "loss": 0.405, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.26737967914438504, | |
| "grad_norm": 0.8757149577140808, | |
| "learning_rate": 0.0001834325618042636, | |
| "loss": 0.6849, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.27019420208274697, | |
| "grad_norm": 2.241838216781616, | |
| "learning_rate": 0.00018288665242757903, | |
| "loss": 0.5653, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.2730087250211089, | |
| "grad_norm": 1.2316606044769287, | |
| "learning_rate": 0.00018233273425349885, | |
| "loss": 0.3658, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.2758232479594709, | |
| "grad_norm": 1.174371361732483, | |
| "learning_rate": 0.0001817708608035286, | |
| "loss": 0.6299, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.2786377708978328, | |
| "grad_norm": 0.7687806487083435, | |
| "learning_rate": 0.00018120108636784034, | |
| "loss": 0.4896, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.28145229383619474, | |
| "grad_norm": 1.2060645818710327, | |
| "learning_rate": 0.00018062346600002699, | |
| "loss": 0.6195, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.28426681677455673, | |
| "grad_norm": 0.9059650897979736, | |
| "learning_rate": 0.0001800380555117827, | |
| "loss": 0.7028, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.28708133971291866, | |
| "grad_norm": 1.1394221782684326, | |
| "learning_rate": 0.00017944491146751026, | |
| "loss": 0.5942, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.2898958626512806, | |
| "grad_norm": 0.8699471354484558, | |
| "learning_rate": 0.0001788440911788556, | |
| "loss": 0.336, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.2927103855896426, | |
| "grad_norm": 1.5057145357131958, | |
| "learning_rate": 0.0001782356526991702, | |
| "loss": 0.5421, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.2955249085280045, | |
| "grad_norm": 1.7533260583877563, | |
| "learning_rate": 0.00017761965481790162, | |
| "loss": 0.5161, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.29833943146636643, | |
| "grad_norm": 0.7779251933097839, | |
| "learning_rate": 0.00017699615705491325, | |
| "loss": 0.7673, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.3011539544047284, | |
| "grad_norm": 1.0537793636322021, | |
| "learning_rate": 0.00017636521965473323, | |
| "loss": 0.5704, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.30396847734309035, | |
| "grad_norm": 1.4870874881744385, | |
| "learning_rate": 0.00017572690358073326, | |
| "loss": 0.5846, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.3067830002814523, | |
| "grad_norm": 0.5922363996505737, | |
| "learning_rate": 0.00017508127050923835, | |
| "loss": 0.7014, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.30959752321981426, | |
| "grad_norm": 0.8268614411354065, | |
| "learning_rate": 0.00017442838282356727, | |
| "loss": 0.7332, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.3124120461581762, | |
| "grad_norm": 0.6626672148704529, | |
| "learning_rate": 0.00017376830360800498, | |
| "loss": 0.6285, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.3152265690965381, | |
| "grad_norm": 0.7634074091911316, | |
| "learning_rate": 0.00017310109664170703, | |
| "loss": 0.5794, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.3180410920349001, | |
| "grad_norm": 0.6927458047866821, | |
| "learning_rate": 0.00017242682639253718, | |
| "loss": 0.3854, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.32085561497326204, | |
| "grad_norm": 0.7036980390548706, | |
| "learning_rate": 0.00017174555801083814, | |
| "loss": 0.4077, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.32367013791162397, | |
| "grad_norm": 0.5988078117370605, | |
| "learning_rate": 0.00017105735732313667, | |
| "loss": 0.4527, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.32648466084998595, | |
| "grad_norm": 1.1813197135925293, | |
| "learning_rate": 0.00017036229082578307, | |
| "loss": 0.8273, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.3292991837883479, | |
| "grad_norm": 0.7539934515953064, | |
| "learning_rate": 0.00016966042567852615, | |
| "loss": 0.6084, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.3321137067267098, | |
| "grad_norm": 1.3654534816741943, | |
| "learning_rate": 0.00016895182969802386, | |
| "loss": 0.8049, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.3349282296650718, | |
| "grad_norm": 1.3293468952178955, | |
| "learning_rate": 0.00016823657135129087, | |
| "loss": 0.5769, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.3377427526034337, | |
| "grad_norm": 0.7219970226287842, | |
| "learning_rate": 0.00016751471974908288, | |
| "loss": 0.6883, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.34055727554179566, | |
| "grad_norm": 0.3910175561904907, | |
| "learning_rate": 0.00016678634463921884, | |
| "loss": 0.4406, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.3433717984801576, | |
| "grad_norm": 0.6810513138771057, | |
| "learning_rate": 0.00016605151639984187, | |
| "loss": 0.532, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.34618632141851957, | |
| "grad_norm": 1.6218785047531128, | |
| "learning_rate": 0.00016531030603261884, | |
| "loss": 0.8189, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.3490008443568815, | |
| "grad_norm": 1.138554334640503, | |
| "learning_rate": 0.00016456278515588024, | |
| "loss": 0.5942, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.35181536729524343, | |
| "grad_norm": 0.9244788289070129, | |
| "learning_rate": 0.00016380902599769982, | |
| "loss": 0.7253, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.3546298902336054, | |
| "grad_norm": 1.2546799182891846, | |
| "learning_rate": 0.00016304910138891597, | |
| "loss": 0.6356, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.35744441317196735, | |
| "grad_norm": 0.9498732089996338, | |
| "learning_rate": 0.00016228308475609433, | |
| "loss": 0.6773, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.3602589361103293, | |
| "grad_norm": 0.605629026889801, | |
| "learning_rate": 0.00016151105011443314, | |
| "loss": 0.5681, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.36307345904869126, | |
| "grad_norm": 0.8649378418922424, | |
| "learning_rate": 0.00016073307206061177, | |
| "loss": 0.657, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.3658879819870532, | |
| "grad_norm": 0.9192129969596863, | |
| "learning_rate": 0.00015994922576558263, | |
| "loss": 0.501, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.3687025049254151, | |
| "grad_norm": 0.9517626166343689, | |
| "learning_rate": 0.00015915958696730814, | |
| "loss": 0.4914, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.3715170278637771, | |
| "grad_norm": 0.7139463424682617, | |
| "learning_rate": 0.0001583642319634426, | |
| "loss": 0.6748, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.37433155080213903, | |
| "grad_norm": 0.7017699480056763, | |
| "learning_rate": 0.00015756323760396002, | |
| "loss": 0.4724, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.37714607374050096, | |
| "grad_norm": 0.69412761926651, | |
| "learning_rate": 0.00015675668128372854, | |
| "loss": 0.3082, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.37996059667886295, | |
| "grad_norm": 0.9961652755737305, | |
| "learning_rate": 0.00015594464093503246, | |
| "loss": 0.5198, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.3827751196172249, | |
| "grad_norm": 0.8897234797477722, | |
| "learning_rate": 0.00015512719502004197, | |
| "loss": 0.6707, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.3855896425555868, | |
| "grad_norm": 1.5628478527069092, | |
| "learning_rate": 0.0001543044225232319, | |
| "loss": 0.7418, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.3884041654939488, | |
| "grad_norm": 0.98553866147995, | |
| "learning_rate": 0.00015347640294375005, | |
| "loss": 0.6553, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.3912186884323107, | |
| "grad_norm": 1.6320457458496094, | |
| "learning_rate": 0.0001526432162877356, | |
| "loss": 0.7794, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.39403321137067265, | |
| "grad_norm": 0.7922711372375488, | |
| "learning_rate": 0.0001518049430605887, | |
| "loss": 0.4737, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.39684773430903464, | |
| "grad_norm": 2.2716257572174072, | |
| "learning_rate": 0.00015096166425919175, | |
| "loss": 0.6215, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.39966225724739657, | |
| "grad_norm": 0.6248972415924072, | |
| "learning_rate": 0.0001501134613640832, | |
| "loss": 0.431, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.4024767801857585, | |
| "grad_norm": 0.7897845506668091, | |
| "learning_rate": 0.00014926041633158454, | |
| "loss": 0.6622, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.4052913031241205, | |
| "grad_norm": 0.35586240887641907, | |
| "learning_rate": 0.0001484026115858815, | |
| "loss": 0.2137, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.4081058260624824, | |
| "grad_norm": 1.1606733798980713, | |
| "learning_rate": 0.00014754013001105998, | |
| "loss": 0.5948, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.41092034900084434, | |
| "grad_norm": 1.2706083059310913, | |
| "learning_rate": 0.00014667305494309727, | |
| "loss": 0.9048, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.41373487193920633, | |
| "grad_norm": 0.4990655779838562, | |
| "learning_rate": 0.00014580147016181005, | |
| "loss": 0.6926, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.41654939487756826, | |
| "grad_norm": 0.9936563968658447, | |
| "learning_rate": 0.00014492545988275933, | |
| "loss": 0.8315, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.4193639178159302, | |
| "grad_norm": 0.9066222906112671, | |
| "learning_rate": 0.0001440451087491129, | |
| "loss": 0.4171, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.4221784407542922, | |
| "grad_norm": 1.0959652662277222, | |
| "learning_rate": 0.00014316050182346733, | |
| "loss": 0.586, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.4249929636926541, | |
| "grad_norm": 0.726209819316864, | |
| "learning_rate": 0.0001422717245796285, | |
| "loss": 0.485, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.42780748663101603, | |
| "grad_norm": 1.6239256858825684, | |
| "learning_rate": 0.00014137886289435295, | |
| "loss": 0.4708, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.430622009569378, | |
| "grad_norm": 1.3807324171066284, | |
| "learning_rate": 0.00014048200303905034, | |
| "loss": 0.4916, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.43343653250773995, | |
| "grad_norm": 0.9176377654075623, | |
| "learning_rate": 0.00013958123167144733, | |
| "loss": 0.5487, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.4362510554461019, | |
| "grad_norm": 1.0780225992202759, | |
| "learning_rate": 0.0001386766358272146, | |
| "loss": 0.6175, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.4390655783844638, | |
| "grad_norm": 2.6700491905212402, | |
| "learning_rate": 0.00013776830291155703, | |
| "loss": 0.7033, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.4418801013228258, | |
| "grad_norm": 0.9618711471557617, | |
| "learning_rate": 0.00013685632069076846, | |
| "loss": 0.6698, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.4446946242611877, | |
| "grad_norm": 1.3910859823226929, | |
| "learning_rate": 0.00013594077728375128, | |
| "loss": 0.8375, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.44750914719954965, | |
| "grad_norm": 0.755133867263794, | |
| "learning_rate": 0.00013502176115350213, | |
| "loss": 0.3719, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.45032367013791164, | |
| "grad_norm": 0.5240243077278137, | |
| "learning_rate": 0.00013409936109856424, | |
| "loss": 0.3539, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.45313819307627357, | |
| "grad_norm": 1.1372967958450317, | |
| "learning_rate": 0.00013317366624444744, | |
| "loss": 0.7051, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.4559527160146355, | |
| "grad_norm": 1.4525517225265503, | |
| "learning_rate": 0.00013224476603501662, | |
| "loss": 0.6504, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.4587672389529975, | |
| "grad_norm": 0.658799946308136, | |
| "learning_rate": 0.00013131275022384918, | |
| "loss": 0.4961, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.4615817618913594, | |
| "grad_norm": 0.8785876631736755, | |
| "learning_rate": 0.00013037770886556294, | |
| "loss": 0.4614, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.46439628482972134, | |
| "grad_norm": 1.465197205543518, | |
| "learning_rate": 0.0001294397323071145, | |
| "loss": 0.4032, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.4672108077680833, | |
| "grad_norm": 1.070615291595459, | |
| "learning_rate": 0.00012849891117906978, | |
| "loss": 0.9079, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.47002533070644525, | |
| "grad_norm": 0.5417959690093994, | |
| "learning_rate": 0.00012755533638684704, | |
| "loss": 0.33, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.4728398536448072, | |
| "grad_norm": 0.9827715754508972, | |
| "learning_rate": 0.00012660909910193303, | |
| "loss": 0.5105, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.47565437658316917, | |
| "grad_norm": 0.5091266632080078, | |
| "learning_rate": 0.0001256602907530739, | |
| "loss": 0.2981, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.4784688995215311, | |
| "grad_norm": 0.7054405212402344, | |
| "learning_rate": 0.000124709003017441, | |
| "loss": 0.3981, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.48128342245989303, | |
| "grad_norm": 0.7525174617767334, | |
| "learning_rate": 0.00012375532781177257, | |
| "loss": 0.6399, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.484097945398255, | |
| "grad_norm": 1.1248599290847778, | |
| "learning_rate": 0.0001227993572834926, | |
| "loss": 0.624, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.48691246833661694, | |
| "grad_norm": 0.6768050193786621, | |
| "learning_rate": 0.00012184118380180716, | |
| "loss": 0.6998, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.4897269912749789, | |
| "grad_norm": 0.8580127954483032, | |
| "learning_rate": 0.0001208808999487793, | |
| "loss": 0.6036, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.49254151421334086, | |
| "grad_norm": 1.0916856527328491, | |
| "learning_rate": 0.0001199185985103836, | |
| "loss": 0.6118, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.4953560371517028, | |
| "grad_norm": 0.9223160147666931, | |
| "learning_rate": 0.00011895437246754074, | |
| "loss": 0.5047, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.4981705600900647, | |
| "grad_norm": 2.015151262283325, | |
| "learning_rate": 0.00011798831498713334, | |
| "loss": 0.4731, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.5009850830284267, | |
| "grad_norm": 1.1331337690353394, | |
| "learning_rate": 0.00011702051941300396, | |
| "loss": 0.5136, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.5037996059667886, | |
| "grad_norm": 1.0576162338256836, | |
| "learning_rate": 0.00011605107925693582, | |
| "loss": 0.4992, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.5066141289051506, | |
| "grad_norm": 0.8101834654808044, | |
| "learning_rate": 0.00011508008818961731, | |
| "loss": 0.4718, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.5094286518435125, | |
| "grad_norm": 0.922261655330658, | |
| "learning_rate": 0.00011410764003159147, | |
| "loss": 0.3622, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.5122431747818744, | |
| "grad_norm": 0.6725795865058899, | |
| "learning_rate": 0.00011313382874419031, | |
| "loss": 0.4803, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.5150576977202365, | |
| "grad_norm": 1.7679307460784912, | |
| "learning_rate": 0.00011215874842045631, | |
| "loss": 0.4316, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.5178722206585984, | |
| "grad_norm": 0.7769971489906311, | |
| "learning_rate": 0.00011118249327605055, | |
| "loss": 0.3043, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.5206867435969603, | |
| "grad_norm": 1.3879740238189697, | |
| "learning_rate": 0.00011020515764014942, | |
| "loss": 0.4857, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.5235012665353223, | |
| "grad_norm": 0.9753875136375427, | |
| "learning_rate": 0.00010922683594633021, | |
| "loss": 0.5712, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.5263157894736842, | |
| "grad_norm": 1.2720959186553955, | |
| "learning_rate": 0.00010824762272344651, | |
| "loss": 0.509, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.5291303124120461, | |
| "grad_norm": 0.8127233386039734, | |
| "learning_rate": 0.00010726761258649461, | |
| "loss": 0.57, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.5319448353504082, | |
| "grad_norm": 1.2257153987884521, | |
| "learning_rate": 0.00010628690022747132, | |
| "loss": 0.5516, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.5347593582887701, | |
| "grad_norm": 0.7144739627838135, | |
| "learning_rate": 0.00010530558040622472, | |
| "loss": 0.6066, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.537573881227132, | |
| "grad_norm": 0.7518823146820068, | |
| "learning_rate": 0.00010432374794129791, | |
| "loss": 0.6092, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.5403884041654939, | |
| "grad_norm": 0.9559532403945923, | |
| "learning_rate": 0.00010334149770076747, | |
| "loss": 0.5019, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.5432029271038559, | |
| "grad_norm": 1.0719863176345825, | |
| "learning_rate": 0.00010235892459307688, | |
| "loss": 0.4403, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.5460174500422178, | |
| "grad_norm": 0.7807120084762573, | |
| "learning_rate": 0.00010137612355786618, | |
| "loss": 0.5249, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.5488319729805798, | |
| "grad_norm": 1.4584418535232544, | |
| "learning_rate": 0.00010039318955679857, | |
| "loss": 0.475, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.5516464959189418, | |
| "grad_norm": 0.7352472543716431, | |
| "learning_rate": 9.941021756438488e-05, | |
| "loss": 0.64, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.5544610188573037, | |
| "grad_norm": 1.0791490077972412, | |
| "learning_rate": 9.842730255880678e-05, | |
| "loss": 0.5755, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.5572755417956656, | |
| "grad_norm": 0.9303123354911804, | |
| "learning_rate": 9.744453951273968e-05, | |
| "loss": 0.6485, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.5600900647340276, | |
| "grad_norm": 0.6764497756958008, | |
| "learning_rate": 9.646202338417613e-05, | |
| "loss": 0.4572, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.5629045876723895, | |
| "grad_norm": 0.9425994157791138, | |
| "learning_rate": 9.547984910725064e-05, | |
| "loss": 0.733, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5657191106107515, | |
| "grad_norm": 0.9432514905929565, | |
| "learning_rate": 9.449811158306684e-05, | |
| "loss": 0.4557, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.5685336335491135, | |
| "grad_norm": 0.999247133731842, | |
| "learning_rate": 9.35169056705278e-05, | |
| "loss": 0.6724, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.5713481564874754, | |
| "grad_norm": 1.0435142517089844, | |
| "learning_rate": 9.253632617717038e-05, | |
| "loss": 0.5394, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.5741626794258373, | |
| "grad_norm": 1.8879817724227905, | |
| "learning_rate": 9.155646785000467e-05, | |
| "loss": 0.5066, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.5769772023641992, | |
| "grad_norm": 0.7503070831298828, | |
| "learning_rate": 9.057742536635913e-05, | |
| "loss": 0.6197, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.5797917253025612, | |
| "grad_norm": 0.9366083741188049, | |
| "learning_rate": 8.959929332473262e-05, | |
| "loss": 0.5373, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.5826062482409231, | |
| "grad_norm": 0.5995301008224487, | |
| "learning_rate": 8.86221662356539e-05, | |
| "loss": 0.4054, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.5854207711792851, | |
| "grad_norm": 0.5817670226097107, | |
| "learning_rate": 8.764613851254968e-05, | |
| "loss": 0.5262, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.5882352941176471, | |
| "grad_norm": 0.5300823450088501, | |
| "learning_rate": 8.667130446262214e-05, | |
| "loss": 0.678, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.591049817056009, | |
| "grad_norm": 0.9397202134132385, | |
| "learning_rate": 8.569775827773656e-05, | |
| "loss": 0.6118, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.5938643399943709, | |
| "grad_norm": 0.8151072859764099, | |
| "learning_rate": 8.472559402532021e-05, | |
| "loss": 0.5034, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.5966788629327329, | |
| "grad_norm": 0.7164279818534851, | |
| "learning_rate": 8.375490563927328e-05, | |
| "loss": 0.2909, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.5994933858710948, | |
| "grad_norm": 0.7771260738372803, | |
| "learning_rate": 8.278578691089249e-05, | |
| "loss": 0.5102, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.6023079088094568, | |
| "grad_norm": 0.5859299302101135, | |
| "learning_rate": 8.181833147980894e-05, | |
| "loss": 0.3361, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.6051224317478188, | |
| "grad_norm": 1.3162269592285156, | |
| "learning_rate": 8.085263282493998e-05, | |
| "loss": 0.5598, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.6079369546861807, | |
| "grad_norm": 1.9372239112854004, | |
| "learning_rate": 7.98887842554572e-05, | |
| "loss": 0.5755, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.6107514776245426, | |
| "grad_norm": 1.0530599355697632, | |
| "learning_rate": 7.892687890177044e-05, | |
| "loss": 0.6622, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.6135660005629046, | |
| "grad_norm": 0.8324519991874695, | |
| "learning_rate": 7.796700970652932e-05, | |
| "loss": 0.4768, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.6163805235012665, | |
| "grad_norm": 1.280251145362854, | |
| "learning_rate": 7.700926941564262e-05, | |
| "loss": 0.3465, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.6191950464396285, | |
| "grad_norm": 1.4136282205581665, | |
| "learning_rate": 7.605375056931712e-05, | |
| "loss": 0.4388, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.6220095693779905, | |
| "grad_norm": 0.9467165470123291, | |
| "learning_rate": 7.510054549311573e-05, | |
| "loss": 0.4631, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.6248240923163524, | |
| "grad_norm": 0.4942507743835449, | |
| "learning_rate": 7.41497462890369e-05, | |
| "loss": 0.5125, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.6276386152547143, | |
| "grad_norm": 0.5631272196769714, | |
| "learning_rate": 7.320144482661533e-05, | |
| "loss": 0.5205, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.6304531381930762, | |
| "grad_norm": 1.3057935237884521, | |
| "learning_rate": 7.225573273404513e-05, | |
| "loss": 0.7294, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.6332676611314382, | |
| "grad_norm": 0.9623622298240662, | |
| "learning_rate": 7.131270138932655e-05, | |
| "loss": 0.5844, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.6360821840698002, | |
| "grad_norm": 1.0017409324645996, | |
| "learning_rate": 7.037244191143661e-05, | |
| "loss": 0.3777, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.6388967070081621, | |
| "grad_norm": 0.9390413165092468, | |
| "learning_rate": 6.943504515152491e-05, | |
| "loss": 0.6049, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.6417112299465241, | |
| "grad_norm": 0.5731076598167419, | |
| "learning_rate": 6.850060168413518e-05, | |
| "loss": 0.4316, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.644525752884886, | |
| "grad_norm": 0.42738592624664307, | |
| "learning_rate": 6.756920179845383e-05, | |
| "loss": 0.4463, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.6473402758232479, | |
| "grad_norm": 1.3607089519500732, | |
| "learning_rate": 6.66409354895857e-05, | |
| "loss": 0.5199, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.6501547987616099, | |
| "grad_norm": 0.9413872361183167, | |
| "learning_rate": 6.57158924498586e-05, | |
| "loss": 0.37, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.6529693216999719, | |
| "grad_norm": 0.8918866515159607, | |
| "learning_rate": 6.479416206015679e-05, | |
| "loss": 0.595, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.6557838446383338, | |
| "grad_norm": 1.254056453704834, | |
| "learning_rate": 6.387583338128471e-05, | |
| "loss": 0.7506, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.6585983675766958, | |
| "grad_norm": 0.7493451833724976, | |
| "learning_rate": 6.296099514536167e-05, | |
| "loss": 0.4956, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.6614128905150577, | |
| "grad_norm": 0.6410611271858215, | |
| "learning_rate": 6.20497357472482e-05, | |
| "loss": 0.5979, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.6642274134534196, | |
| "grad_norm": 0.6682597994804382, | |
| "learning_rate": 6.114214323600504e-05, | |
| "loss": 0.4579, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.6670419363917816, | |
| "grad_norm": 1.117984652519226, | |
| "learning_rate": 6.023830530638559e-05, | |
| "loss": 0.4828, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.6698564593301436, | |
| "grad_norm": 0.8667401075363159, | |
| "learning_rate": 5.9338309290362324e-05, | |
| "loss": 0.6108, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.6726709822685055, | |
| "grad_norm": 0.5072731971740723, | |
| "learning_rate": 5.844224214868881e-05, | |
| "loss": 0.4543, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.6754855052068675, | |
| "grad_norm": 1.5477889776229858, | |
| "learning_rate": 5.7550190462496946e-05, | |
| "loss": 0.4118, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.6783000281452294, | |
| "grad_norm": 1.0128791332244873, | |
| "learning_rate": 5.66622404249314e-05, | |
| "loss": 0.5121, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.6811145510835913, | |
| "grad_norm": 0.8333756327629089, | |
| "learning_rate": 5.577847783282122e-05, | |
| "loss": 0.3744, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.6839290740219532, | |
| "grad_norm": 0.6459574103355408, | |
| "learning_rate": 5.48989880783898e-05, | |
| "loss": 0.5707, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.6867435969603152, | |
| "grad_norm": 1.1848655939102173, | |
| "learning_rate": 5.4023856141004236e-05, | |
| "loss": 0.6197, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.6895581198986772, | |
| "grad_norm": 1.0240199565887451, | |
| "learning_rate": 5.3153166578963965e-05, | |
| "loss": 0.544, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.6923726428370391, | |
| "grad_norm": 0.6922760605812073, | |
| "learning_rate": 5.228700352133071e-05, | |
| "loss": 0.4769, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.6951871657754011, | |
| "grad_norm": 0.7377022504806519, | |
| "learning_rate": 5.142545065979955e-05, | |
| "loss": 0.5942, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.698001688713763, | |
| "grad_norm": 0.8629385828971863, | |
| "learning_rate": 5.05685912406123e-05, | |
| "loss": 0.7816, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.7008162116521249, | |
| "grad_norm": 0.9569929242134094, | |
| "learning_rate": 4.971650805651406e-05, | |
| "loss": 0.6106, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.7036307345904869, | |
| "grad_norm": 0.49353519082069397, | |
| "learning_rate": 4.886928343875341e-05, | |
| "loss": 0.4153, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.7064452575288489, | |
| "grad_norm": 0.9434248805046082, | |
| "learning_rate": 4.8026999249127315e-05, | |
| "loss": 0.4643, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.7092597804672108, | |
| "grad_norm": 0.5096941590309143, | |
| "learning_rate": 4.71897368720714e-05, | |
| "loss": 0.9337, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.7120743034055728, | |
| "grad_norm": 0.9566155672073364, | |
| "learning_rate": 4.6357577206796096e-05, | |
| "loss": 0.7062, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.7148888263439347, | |
| "grad_norm": 0.6990448832511902, | |
| "learning_rate": 4.553060065947013e-05, | |
| "loss": 0.4033, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.7177033492822966, | |
| "grad_norm": 0.9153978228569031, | |
| "learning_rate": 4.4708887135451396e-05, | |
| "loss": 0.5575, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.7205178722206586, | |
| "grad_norm": 1.2567604780197144, | |
| "learning_rate": 4.3892516031565954e-05, | |
| "loss": 0.45, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.7233323951590206, | |
| "grad_norm": 1.2241476774215698, | |
| "learning_rate": 4.3081566228436686e-05, | |
| "loss": 0.7022, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.7261469180973825, | |
| "grad_norm": 1.0247058868408203, | |
| "learning_rate": 4.227611608286147e-05, | |
| "loss": 0.5987, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.7289614410357445, | |
| "grad_norm": 0.9229673147201538, | |
| "learning_rate": 4.147624342024209e-05, | |
| "loss": 0.5189, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.7317759639741064, | |
| "grad_norm": 0.6648756861686707, | |
| "learning_rate": 4.0682025527064486e-05, | |
| "loss": 0.4758, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.7345904869124683, | |
| "grad_norm": 0.40781381726264954, | |
| "learning_rate": 3.9893539143431044e-05, | |
| "loss": 0.451, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.7374050098508302, | |
| "grad_norm": 0.7853880524635315, | |
| "learning_rate": 3.911086045564575e-05, | |
| "loss": 0.6077, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.7402195327891923, | |
| "grad_norm": 0.7318276166915894, | |
| "learning_rate": 3.83340650888527e-05, | |
| "loss": 0.4472, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.7430340557275542, | |
| "grad_norm": 1.5267870426177979, | |
| "learning_rate": 3.756322809972905e-05, | |
| "loss": 0.8982, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.7458485786659161, | |
| "grad_norm": 0.5616324543952942, | |
| "learning_rate": 3.679842396923271e-05, | |
| "loss": 0.5635, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.7486631016042781, | |
| "grad_norm": 0.7878595590591431, | |
| "learning_rate": 3.6039726595405755e-05, | |
| "loss": 0.5973, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.75147762454264, | |
| "grad_norm": 0.3910659849643707, | |
| "learning_rate": 3.528720928623414e-05, | |
| "loss": 0.462, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.7542921474810019, | |
| "grad_norm": 0.7616758346557617, | |
| "learning_rate": 3.4540944752564406e-05, | |
| "loss": 0.4222, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.757106670419364, | |
| "grad_norm": 1.3086707592010498, | |
| "learning_rate": 3.380100510107814e-05, | |
| "loss": 0.6267, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 0.7599211933577259, | |
| "grad_norm": 0.7841249108314514, | |
| "learning_rate": 3.3067461827324755e-05, | |
| "loss": 0.4136, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.7627357162960878, | |
| "grad_norm": 0.47329601645469666, | |
| "learning_rate": 3.2340385808813315e-05, | |
| "loss": 0.485, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.7655502392344498, | |
| "grad_norm": 0.719576895236969, | |
| "learning_rate": 3.161984729816415e-05, | |
| "loss": 0.4311, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.7683647621728117, | |
| "grad_norm": 1.036814570426941, | |
| "learning_rate": 3.090591591632082e-05, | |
| "loss": 0.4341, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.7711792851111736, | |
| "grad_norm": 1.327804684638977, | |
| "learning_rate": 3.0198660645822985e-05, | |
| "loss": 0.4251, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.7739938080495357, | |
| "grad_norm": 0.880226731300354, | |
| "learning_rate": 2.9498149824141196e-05, | |
| "loss": 0.3617, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.7768083309878976, | |
| "grad_norm": 0.7395775318145752, | |
| "learning_rate": 2.880445113707384e-05, | |
| "loss": 0.6637, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.7796228539262595, | |
| "grad_norm": 1.0488626956939697, | |
| "learning_rate": 2.8117631612207084e-05, | |
| "loss": 0.4081, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.7824373768646214, | |
| "grad_norm": 0.8878235220909119, | |
| "learning_rate": 2.743775761243843e-05, | |
| "loss": 0.5031, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.7852518998029834, | |
| "grad_norm": 0.6816940903663635, | |
| "learning_rate": 2.6764894829564613e-05, | |
| "loss": 0.4063, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.7880664227413453, | |
| "grad_norm": 0.4741825759410858, | |
| "learning_rate": 2.6099108277934103e-05, | |
| "loss": 0.3073, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.7908809456797072, | |
| "grad_norm": 0.8777612447738647, | |
| "learning_rate": 2.5440462288165146e-05, | |
| "loss": 0.6016, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 0.7936954686180693, | |
| "grad_norm": 0.6074641346931458, | |
| "learning_rate": 2.4789020500930095e-05, | |
| "loss": 0.5282, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.7965099915564312, | |
| "grad_norm": 0.8305478692054749, | |
| "learning_rate": 2.414484586080612e-05, | |
| "loss": 0.6746, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 0.7993245144947931, | |
| "grad_norm": 1.8334358930587769, | |
| "learning_rate": 2.3508000610193258e-05, | |
| "loss": 0.3076, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.8021390374331551, | |
| "grad_norm": 0.8415083885192871, | |
| "learning_rate": 2.287854628330043e-05, | |
| "loss": 0.4482, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.804953560371517, | |
| "grad_norm": 1.1256593465805054, | |
| "learning_rate": 2.2256543700199685e-05, | |
| "loss": 0.4828, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.8077680833098789, | |
| "grad_norm": 0.6625217795372009, | |
| "learning_rate": 2.164205296094961e-05, | |
| "loss": 0.2526, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 0.810582606248241, | |
| "grad_norm": 1.2664328813552856, | |
| "learning_rate": 2.1035133439788236e-05, | |
| "loss": 0.5837, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.8133971291866029, | |
| "grad_norm": 0.6024655699729919, | |
| "learning_rate": 2.0435843779396156e-05, | |
| "loss": 0.5919, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 0.8162116521249648, | |
| "grad_norm": 0.660294234752655, | |
| "learning_rate": 1.9844241885230163e-05, | |
| "loss": 0.4167, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.8190261750633268, | |
| "grad_norm": 0.7786351442337036, | |
| "learning_rate": 1.9260384919928266e-05, | |
| "loss": 0.5695, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.8218406980016887, | |
| "grad_norm": 0.7518659830093384, | |
| "learning_rate": 1.8684329297786453e-05, | |
| "loss": 0.7747, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.8246552209400506, | |
| "grad_norm": 0.7769446969032288, | |
| "learning_rate": 1.8116130679307708e-05, | |
| "loss": 0.6442, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 0.8274697438784127, | |
| "grad_norm": 0.49770888686180115, | |
| "learning_rate": 1.7555843965823992e-05, | |
| "loss": 0.4515, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.8302842668167746, | |
| "grad_norm": 0.9085186719894409, | |
| "learning_rate": 1.7003523294191294e-05, | |
| "loss": 0.419, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.8330987897551365, | |
| "grad_norm": 1.536907434463501, | |
| "learning_rate": 1.6459222031558974e-05, | |
| "loss": 0.5198, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.8359133126934984, | |
| "grad_norm": 0.6355032920837402, | |
| "learning_rate": 1.5922992770213064e-05, | |
| "loss": 0.524, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 0.8387278356318604, | |
| "grad_norm": 0.7401185631752014, | |
| "learning_rate": 1.5394887322494732e-05, | |
| "loss": 0.362, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.8415423585702223, | |
| "grad_norm": 0.7638711333274841, | |
| "learning_rate": 1.4874956715793886e-05, | |
| "loss": 0.363, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 0.8443568815085843, | |
| "grad_norm": 1.4240124225616455, | |
| "learning_rate": 1.4363251187618854e-05, | |
| "loss": 0.7827, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.8471714044469463, | |
| "grad_norm": 0.8892093896865845, | |
| "learning_rate": 1.3859820180742156e-05, | |
| "loss": 0.3835, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 0.8499859273853082, | |
| "grad_norm": 0.8796232342720032, | |
| "learning_rate": 1.3364712338423214e-05, | |
| "loss": 0.6749, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.8528004503236701, | |
| "grad_norm": 0.7944719195365906, | |
| "learning_rate": 1.287797549970826e-05, | |
| "loss": 0.5354, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 0.8556149732620321, | |
| "grad_norm": 0.6994941830635071, | |
| "learning_rate": 1.2399656694807971e-05, | |
| "loss": 0.5961, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.858429496200394, | |
| "grad_norm": 0.8955370187759399, | |
| "learning_rate": 1.1929802140553258e-05, | |
| "loss": 0.2639, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.861244019138756, | |
| "grad_norm": 0.9208924174308777, | |
| "learning_rate": 1.1468457235929597e-05, | |
| "loss": 0.5161, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.864058542077118, | |
| "grad_norm": 1.6513868570327759, | |
| "learning_rate": 1.1015666557690452e-05, | |
| "loss": 0.3097, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 0.8668730650154799, | |
| "grad_norm": 0.9617025256156921, | |
| "learning_rate": 1.0571473856050107e-05, | |
| "loss": 0.5886, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.8696875879538418, | |
| "grad_norm": 1.428849458694458, | |
| "learning_rate": 1.0135922050456347e-05, | |
| "loss": 0.5518, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 0.8725021108922038, | |
| "grad_norm": 0.9242206811904907, | |
| "learning_rate": 9.709053225443487e-06, | |
| "loss": 0.6922, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.8753166338305657, | |
| "grad_norm": 0.9717757701873779, | |
| "learning_rate": 9.29090862656593e-06, | |
| "loss": 0.3653, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 0.8781311567689276, | |
| "grad_norm": 0.48359620571136475, | |
| "learning_rate": 8.881528656412963e-06, | |
| "loss": 0.2516, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.8809456797072897, | |
| "grad_norm": 1.1804348230361938, | |
| "learning_rate": 8.480952870704873e-06, | |
| "loss": 0.3171, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 0.8837602026456516, | |
| "grad_norm": 0.8391767740249634, | |
| "learning_rate": 8.08921997447094e-06, | |
| "loss": 0.4603, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.8865747255840135, | |
| "grad_norm": 0.37848615646362305, | |
| "learning_rate": 7.706367818309624e-06, | |
| "loss": 0.3601, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.8893892485223754, | |
| "grad_norm": 0.328762412071228, | |
| "learning_rate": 7.332433394731331e-06, | |
| "loss": 0.4663, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.8922037714607374, | |
| "grad_norm": 1.1124379634857178, | |
| "learning_rate": 6.967452834584009e-06, | |
| "loss": 0.7712, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 0.8950182943990993, | |
| "grad_norm": 0.946675181388855, | |
| "learning_rate": 6.611461403562147e-06, | |
| "loss": 0.5769, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.8978328173374613, | |
| "grad_norm": 1.3897684812545776, | |
| "learning_rate": 6.264493498799185e-06, | |
| "loss": 0.4381, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 0.9006473402758233, | |
| "grad_norm": 0.5598315596580505, | |
| "learning_rate": 5.92658264554401e-06, | |
| "loss": 0.4289, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.9034618632141852, | |
| "grad_norm": 0.6429581046104431, | |
| "learning_rate": 5.597761493921627e-06, | |
| "loss": 0.7875, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 0.9062763861525471, | |
| "grad_norm": 0.8496813178062439, | |
| "learning_rate": 5.278061815778313e-06, | |
| "loss": 0.4269, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.9090909090909091, | |
| "grad_norm": 0.9900699257850647, | |
| "learning_rate": 4.967514501611881e-06, | |
| "loss": 0.2818, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 0.911905432029271, | |
| "grad_norm": 0.8361634016036987, | |
| "learning_rate": 4.666149557586697e-06, | |
| "loss": 0.4749, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.914719954967633, | |
| "grad_norm": 0.5384243130683899, | |
| "learning_rate": 4.3739961026345586e-06, | |
| "loss": 0.5043, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.917534477905995, | |
| "grad_norm": 0.8283894658088684, | |
| "learning_rate": 4.091082365641085e-06, | |
| "loss": 0.7093, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.9203490008443569, | |
| "grad_norm": 0.5737767219543457, | |
| "learning_rate": 3.817435682718096e-06, | |
| "loss": 0.4823, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 0.9231635237827188, | |
| "grad_norm": 0.4942363500595093, | |
| "learning_rate": 3.5530824945623542e-06, | |
| "loss": 0.6501, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.9259780467210807, | |
| "grad_norm": 0.9362422823905945, | |
| "learning_rate": 3.298048343900717e-06, | |
| "loss": 0.2883, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 0.9287925696594427, | |
| "grad_norm": 0.9241194725036621, | |
| "learning_rate": 3.0523578730221713e-06, | |
| "loss": 0.6112, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.9316070925978047, | |
| "grad_norm": 1.3847359418869019, | |
| "learning_rate": 2.8160348213967848e-06, | |
| "loss": 0.5209, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 0.9344216155361666, | |
| "grad_norm": 1.0031473636627197, | |
| "learning_rate": 2.589102023381895e-06, | |
| "loss": 0.3663, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.9372361384745286, | |
| "grad_norm": 0.616346538066864, | |
| "learning_rate": 2.3715814060157772e-06, | |
| "loss": 0.4261, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 0.9400506614128905, | |
| "grad_norm": 0.8073914647102356, | |
| "learning_rate": 2.1634939868990235e-06, | |
| "loss": 0.3541, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.9428651843512524, | |
| "grad_norm": 0.64414381980896, | |
| "learning_rate": 1.9648598721637045e-06, | |
| "loss": 0.5996, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.9456797072896144, | |
| "grad_norm": 0.7799990177154541, | |
| "learning_rate": 1.7756982545306443e-06, | |
| "loss": 0.4095, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.9484942302279764, | |
| "grad_norm": 0.6466760039329529, | |
| "learning_rate": 1.596027411454981e-06, | |
| "loss": 0.3666, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 0.9513087531663383, | |
| "grad_norm": 0.728364884853363, | |
| "learning_rate": 1.4258647033601024e-06, | |
| "loss": 0.6216, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.9541232761047003, | |
| "grad_norm": 0.9482190012931824, | |
| "learning_rate": 1.265226571960254e-06, | |
| "loss": 0.5574, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 0.9569377990430622, | |
| "grad_norm": 0.9594746232032776, | |
| "learning_rate": 1.1141285386718437e-06, | |
| "loss": 0.6466, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.9597523219814241, | |
| "grad_norm": 1.3607548475265503, | |
| "learning_rate": 9.72585203113774e-07, | |
| "loss": 0.5807, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 0.9625668449197861, | |
| "grad_norm": 2.761012554168701, | |
| "learning_rate": 8.406102416967043e-07, | |
| "loss": 0.4588, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.9653813678581481, | |
| "grad_norm": 0.7540304660797119, | |
| "learning_rate": 7.182164063015973e-07, | |
| "loss": 0.5414, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 0.96819589079651, | |
| "grad_norm": 1.1836090087890625, | |
| "learning_rate": 6.054155230476699e-07, | |
| "loss": 0.4274, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.971010413734872, | |
| "grad_norm": 0.6484697461128235, | |
| "learning_rate": 5.022184911495864e-07, | |
| "loss": 0.4756, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.9738249366732339, | |
| "grad_norm": 0.72726970911026, | |
| "learning_rate": 4.0863528186445564e-07, | |
| "loss": 0.5541, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.9766394596115958, | |
| "grad_norm": 0.7867249846458435, | |
| "learning_rate": 3.246749375282909e-07, | |
| "loss": 0.4513, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 0.9794539825499577, | |
| "grad_norm": 0.7862501740455627, | |
| "learning_rate": 2.50345570682331e-07, | |
| "loss": 0.4277, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.9822685054883197, | |
| "grad_norm": 0.9948648810386658, | |
| "learning_rate": 1.856543632892116e-07, | |
| "loss": 0.5665, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 0.9850830284266817, | |
| "grad_norm": 0.7089385390281677, | |
| "learning_rate": 1.3060756603897605e-07, | |
| "loss": 0.4658, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.9878975513650436, | |
| "grad_norm": 0.5250968933105469, | |
| "learning_rate": 8.521049774512513e-08, | |
| "loss": 0.4507, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 0.9907120743034056, | |
| "grad_norm": 0.6175426244735718, | |
| "learning_rate": 4.946754483071692e-08, | |
| "loss": 0.3314, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.9935265972417675, | |
| "grad_norm": 0.6678758263587952, | |
| "learning_rate": 2.3382160904483753e-08, | |
| "loss": 0.3931, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 0.9963411201801294, | |
| "grad_norm": 0.7605099081993103, | |
| "learning_rate": 6.95686642719906e-09, | |
| "loss": 0.3326, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.9991556431184914, | |
| "grad_norm": 0.5569049119949341, | |
| "learning_rate": 1.932484680944313e-10, | |
| "loss": 0.4035, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.9997185477061638, | |
| "step": 1776, | |
| "total_flos": 3.866808531592151e+17, | |
| "train_loss": 0.5711438672759713, | |
| "train_runtime": 4148.7499, | |
| "train_samples_per_second": 3.425, | |
| "train_steps_per_second": 0.428 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1776, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 20, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.866808531592151e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |