| { |
| "best_global_step": 10000, |
| "best_metric": 0.6085147261619568, |
| "best_model_checkpoint": "/workspace/rails-finetune/adapters-qwen3-8b/checkpoint-10000", |
| "epoch": 1.7793594306049823, |
| "eval_steps": 500, |
| "global_step": 10000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0017793594306049821, |
| "grad_norm": 1.1857829093933105, |
| "learning_rate": 9.000000000000001e-07, |
| "loss": 1.7382530212402343, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0035587188612099642, |
| "grad_norm": 1.1971900463104248, |
| "learning_rate": 1.9000000000000002e-06, |
| "loss": 1.7001752853393555, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.005338078291814947, |
| "grad_norm": 1.2064718008041382, |
| "learning_rate": 2.9e-06, |
| "loss": 1.7585294723510743, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0071174377224199285, |
| "grad_norm": 1.1466728448867798, |
| "learning_rate": 3.900000000000001e-06, |
| "loss": 1.6992141723632812, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.008896797153024912, |
| "grad_norm": 1.0449944734573364, |
| "learning_rate": 4.9000000000000005e-06, |
| "loss": 1.7329919815063477, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.010676156583629894, |
| "grad_norm": 1.25763738155365, |
| "learning_rate": 5.9e-06, |
| "loss": 1.5657649040222168, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.012455516014234875, |
| "grad_norm": 0.9314267039299011, |
| "learning_rate": 6.9e-06, |
| "loss": 1.5626873970031738, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.014234875444839857, |
| "grad_norm": 0.8502155542373657, |
| "learning_rate": 7.9e-06, |
| "loss": 1.3948446273803712, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.01601423487544484, |
| "grad_norm": 0.505824625492096, |
| "learning_rate": 8.900000000000001e-06, |
| "loss": 1.318073558807373, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.017793594306049824, |
| "grad_norm": 0.4828319251537323, |
| "learning_rate": 9.9e-06, |
| "loss": 1.2034348487854003, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.019572953736654804, |
| "grad_norm": 0.4351405203342438, |
| "learning_rate": 1.0900000000000002e-05, |
| "loss": 1.2164586067199707, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.021352313167259787, |
| "grad_norm": 0.3060351014137268, |
| "learning_rate": 1.1900000000000001e-05, |
| "loss": 1.1403413772583009, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.023131672597864767, |
| "grad_norm": 0.30274662375450134, |
| "learning_rate": 1.2900000000000002e-05, |
| "loss": 1.0329069137573241, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.02491103202846975, |
| "grad_norm": 0.27452540397644043, |
| "learning_rate": 1.39e-05, |
| "loss": 1.0137288093566894, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.026690391459074734, |
| "grad_norm": 0.22874200344085693, |
| "learning_rate": 1.4900000000000001e-05, |
| "loss": 0.9552822113037109, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.028469750889679714, |
| "grad_norm": 0.20503273606300354, |
| "learning_rate": 1.5900000000000004e-05, |
| "loss": 0.9580234527587891, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.030249110320284697, |
| "grad_norm": 0.20734967291355133, |
| "learning_rate": 1.69e-05, |
| "loss": 0.9269493103027344, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.03202846975088968, |
| "grad_norm": 0.2046293467283249, |
| "learning_rate": 1.79e-05, |
| "loss": 0.898008155822754, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.033807829181494664, |
| "grad_norm": 0.20902672410011292, |
| "learning_rate": 1.8900000000000002e-05, |
| "loss": 0.8822259902954102, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.03558718861209965, |
| "grad_norm": 0.265747606754303, |
| "learning_rate": 1.9900000000000003e-05, |
| "loss": 0.9367627143859864, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.037366548042704624, |
| "grad_norm": 0.21941998600959778, |
| "learning_rate": 1.9999967204339314e-05, |
| "loss": 0.8784577369689941, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.03914590747330961, |
| "grad_norm": 0.23806284368038177, |
| "learning_rate": 1.999985383689953e-05, |
| "loss": 0.8498974800109863, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.04092526690391459, |
| "grad_norm": 0.2539118826389313, |
| "learning_rate": 1.9999659493713742e-05, |
| "loss": 0.8517349243164063, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.042704626334519574, |
| "grad_norm": 0.279153436422348, |
| "learning_rate": 1.9999384176355685e-05, |
| "loss": 0.8303974151611329, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.04448398576512456, |
| "grad_norm": 0.2661009132862091, |
| "learning_rate": 1.9999027887054793e-05, |
| "loss": 0.8362269401550293, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.046263345195729534, |
| "grad_norm": 0.2643781006336212, |
| "learning_rate": 1.9998590628696186e-05, |
| "loss": 0.7995978832244873, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.04804270462633452, |
| "grad_norm": 0.22854603826999664, |
| "learning_rate": 1.9998072404820648e-05, |
| "loss": 0.8313385009765625, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.0498220640569395, |
| "grad_norm": 0.25225695967674255, |
| "learning_rate": 1.99974732196246e-05, |
| "loss": 0.8484455108642578, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.051601423487544484, |
| "grad_norm": 0.2538894712924957, |
| "learning_rate": 1.999679307796006e-05, |
| "loss": 0.8153322219848633, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.05338078291814947, |
| "grad_norm": 0.257697194814682, |
| "learning_rate": 1.9996031985334604e-05, |
| "loss": 0.8145216941833496, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.05516014234875445, |
| "grad_norm": 0.2465839385986328, |
| "learning_rate": 1.9995189947911323e-05, |
| "loss": 0.8290293693542481, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.05693950177935943, |
| "grad_norm": 0.2330036759376526, |
| "learning_rate": 1.9994266972508786e-05, |
| "loss": 0.8094453811645508, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.05871886120996441, |
| "grad_norm": 0.24966399371623993, |
| "learning_rate": 1.9993263066600956e-05, |
| "loss": 0.7964043140411377, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.060498220640569395, |
| "grad_norm": 0.2663300335407257, |
| "learning_rate": 1.9992178238317156e-05, |
| "loss": 0.7674037456512451, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.06227758007117438, |
| "grad_norm": 0.29889997839927673, |
| "learning_rate": 1.9991012496442e-05, |
| "loss": 0.7757882118225098, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.06405693950177936, |
| "grad_norm": 0.2283693104982376, |
| "learning_rate": 1.9989765850415303e-05, |
| "loss": 0.7695906162261963, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.06583629893238434, |
| "grad_norm": 0.2109132707118988, |
| "learning_rate": 1.9988438310332015e-05, |
| "loss": 0.7833895683288574, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.06761565836298933, |
| "grad_norm": 0.2546916604042053, |
| "learning_rate": 1.998702988694216e-05, |
| "loss": 0.8128045082092286, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.0693950177935943, |
| "grad_norm": 0.2531549632549286, |
| "learning_rate": 1.998554059165071e-05, |
| "loss": 0.7902643203735351, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.0711743772241993, |
| "grad_norm": 0.21756362915039062, |
| "learning_rate": 1.9983970436517523e-05, |
| "loss": 0.7454084873199462, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.07295373665480427, |
| "grad_norm": 0.2294779121875763, |
| "learning_rate": 1.9982319434257236e-05, |
| "loss": 0.7487451553344726, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.07473309608540925, |
| "grad_norm": 0.26356714963912964, |
| "learning_rate": 1.9980587598239155e-05, |
| "loss": 0.748570442199707, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.07651245551601424, |
| "grad_norm": 0.22382904589176178, |
| "learning_rate": 1.9978774942487155e-05, |
| "loss": 0.7261887073516846, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.07829181494661921, |
| "grad_norm": 0.22544927895069122, |
| "learning_rate": 1.997688148167957e-05, |
| "loss": 0.7761906623840332, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.0800711743772242, |
| "grad_norm": 0.2679840922355652, |
| "learning_rate": 1.997490723114906e-05, |
| "loss": 0.7872249603271484, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.08185053380782918, |
| "grad_norm": 0.20914621651172638, |
| "learning_rate": 1.9972852206882504e-05, |
| "loss": 0.7876029014587402, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.08362989323843416, |
| "grad_norm": 0.36782026290893555, |
| "learning_rate": 1.9970716425520854e-05, |
| "loss": 0.7812703132629395, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.08540925266903915, |
| "grad_norm": 0.2317405343055725, |
| "learning_rate": 1.9968499904359017e-05, |
| "loss": 0.7694793701171875, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.08718861209964412, |
| "grad_norm": 0.23587286472320557, |
| "learning_rate": 1.9966202661345694e-05, |
| "loss": 0.7593471050262451, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.08896797153024912, |
| "grad_norm": 0.3179067075252533, |
| "learning_rate": 1.9963824715083255e-05, |
| "loss": 0.804302978515625, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.08896797153024912, |
| "eval_loss": 0.7566477656364441, |
| "eval_runtime": 406.3608, |
| "eval_samples_per_second": 12.376, |
| "eval_steps_per_second": 6.189, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.09074733096085409, |
| "grad_norm": 0.2583659291267395, |
| "learning_rate": 1.9961366084827584e-05, |
| "loss": 0.7231699466705322, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.09252669039145907, |
| "grad_norm": 0.3117372393608093, |
| "learning_rate": 1.9958826790487905e-05, |
| "loss": 0.7830834865570069, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.09430604982206406, |
| "grad_norm": 0.26633507013320923, |
| "learning_rate": 1.995620685262665e-05, |
| "loss": 0.7565378189086914, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.09608540925266904, |
| "grad_norm": 0.2491220384836197, |
| "learning_rate": 1.9953506292459275e-05, |
| "loss": 0.770139503479004, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.09786476868327403, |
| "grad_norm": 0.33897528052330017, |
| "learning_rate": 1.9950725131854082e-05, |
| "loss": 0.8139609336853028, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.099644128113879, |
| "grad_norm": 0.27655312418937683, |
| "learning_rate": 1.9947863393332053e-05, |
| "loss": 0.7632327556610108, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.10142348754448399, |
| "grad_norm": 0.2421133816242218, |
| "learning_rate": 1.994492110006667e-05, |
| "loss": 0.7782410621643067, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.10320284697508897, |
| "grad_norm": 0.26968705654144287, |
| "learning_rate": 1.994189827588372e-05, |
| "loss": 0.7438684940338135, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.10498220640569395, |
| "grad_norm": 0.28697794675827026, |
| "learning_rate": 1.993879494526111e-05, |
| "loss": 0.7320491313934326, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.10676156583629894, |
| "grad_norm": 0.327286034822464, |
| "learning_rate": 1.9935611133328657e-05, |
| "loss": 0.7248147010803223, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.10854092526690391, |
| "grad_norm": 0.3679325580596924, |
| "learning_rate": 1.9932346865867885e-05, |
| "loss": 0.7461765766143799, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.1103202846975089, |
| "grad_norm": 0.3118029832839966, |
| "learning_rate": 1.992900216931184e-05, |
| "loss": 0.764728593826294, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.11209964412811388, |
| "grad_norm": 0.2683190107345581, |
| "learning_rate": 1.992557707074484e-05, |
| "loss": 0.7256179809570312, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.11387900355871886, |
| "grad_norm": 0.32056325674057007, |
| "learning_rate": 1.9922071597902286e-05, |
| "loss": 0.7553198337554932, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.11565836298932385, |
| "grad_norm": 0.4004197418689728, |
| "learning_rate": 1.9918485779170417e-05, |
| "loss": 0.7440505027770996, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.11743772241992882, |
| "grad_norm": 0.3497128486633301, |
| "learning_rate": 1.9914819643586096e-05, |
| "loss": 0.7672629833221436, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.11921708185053381, |
| "grad_norm": 0.3187069594860077, |
| "learning_rate": 1.9911073220836562e-05, |
| "loss": 0.7420164585113526, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.12099644128113879, |
| "grad_norm": 0.29900017380714417, |
| "learning_rate": 1.9907246541259194e-05, |
| "loss": 0.7458691120147705, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.12277580071174377, |
| "grad_norm": 0.298501193523407, |
| "learning_rate": 1.9903339635841274e-05, |
| "loss": 0.7346842765808106, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.12455516014234876, |
| "grad_norm": 0.38077300786972046, |
| "learning_rate": 1.9899352536219713e-05, |
| "loss": 0.7980701446533203, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.12633451957295375, |
| "grad_norm": 0.2950842082500458, |
| "learning_rate": 1.9895285274680826e-05, |
| "loss": 0.7282920837402344, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.12811387900355872, |
| "grad_norm": 0.2746928632259369, |
| "learning_rate": 1.989113788416005e-05, |
| "loss": 0.7119527816772461, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.1298932384341637, |
| "grad_norm": 0.30003389716148376, |
| "learning_rate": 1.9886910398241673e-05, |
| "loss": 0.7120148181915283, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.13167259786476868, |
| "grad_norm": 0.3238595128059387, |
| "learning_rate": 1.9882602851158584e-05, |
| "loss": 0.7219894886016845, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.13345195729537365, |
| "grad_norm": 0.4421483278274536, |
| "learning_rate": 1.9878215277791977e-05, |
| "loss": 0.6878085613250733, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.13523131672597866, |
| "grad_norm": 0.3009251058101654, |
| "learning_rate": 1.9873747713671073e-05, |
| "loss": 0.777537488937378, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.13701067615658363, |
| "grad_norm": 0.3649790287017822, |
| "learning_rate": 1.9869200194972828e-05, |
| "loss": 0.7608931541442872, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.1387900355871886, |
| "grad_norm": 0.38479843735694885, |
| "learning_rate": 1.986457275852166e-05, |
| "loss": 0.7644641876220704, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.14056939501779359, |
| "grad_norm": 0.3267346918582916, |
| "learning_rate": 1.9859865441789126e-05, |
| "loss": 0.7437977313995361, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.1423487544483986, |
| "grad_norm": 0.3238297402858734, |
| "learning_rate": 1.985507828289363e-05, |
| "loss": 0.7596290111541748, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.14412811387900357, |
| "grad_norm": 0.4067242443561554, |
| "learning_rate": 1.985021132060012e-05, |
| "loss": 0.723486328125, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.14590747330960854, |
| "grad_norm": 0.3294743299484253, |
| "learning_rate": 1.9845264594319755e-05, |
| "loss": 0.8035991668701172, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.14768683274021352, |
| "grad_norm": 0.2861204445362091, |
| "learning_rate": 1.9840238144109613e-05, |
| "loss": 0.7118996620178223, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.1494661921708185, |
| "grad_norm": 0.3281143605709076, |
| "learning_rate": 1.9835132010672334e-05, |
| "loss": 0.7610855102539062, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.1512455516014235, |
| "grad_norm": 0.3176390826702118, |
| "learning_rate": 1.982994623535583e-05, |
| "loss": 0.7489484310150146, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.15302491103202848, |
| "grad_norm": 0.34338897466659546, |
| "learning_rate": 1.9824680860152914e-05, |
| "loss": 0.7180755615234375, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.15480427046263345, |
| "grad_norm": 0.41952571272850037, |
| "learning_rate": 1.9819335927700975e-05, |
| "loss": 0.6884951591491699, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.15658362989323843, |
| "grad_norm": 0.29155367612838745, |
| "learning_rate": 1.9813911481281637e-05, |
| "loss": 0.683270263671875, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.1583629893238434, |
| "grad_norm": 0.3815101981163025, |
| "learning_rate": 1.98084075648204e-05, |
| "loss": 0.714734411239624, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.1601423487544484, |
| "grad_norm": 0.364044189453125, |
| "learning_rate": 1.980282422288629e-05, |
| "loss": 0.7451518535614013, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.1619217081850534, |
| "grad_norm": 0.4181061387062073, |
| "learning_rate": 1.9797161500691496e-05, |
| "loss": 0.7484359741210938, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.16370106761565836, |
| "grad_norm": 0.3803650140762329, |
| "learning_rate": 1.9791419444091006e-05, |
| "loss": 0.7413453578948974, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.16548042704626334, |
| "grad_norm": 0.3450170159339905, |
| "learning_rate": 1.9785598099582225e-05, |
| "loss": 0.732274341583252, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.16725978647686832, |
| "grad_norm": 0.31141409277915955, |
| "learning_rate": 1.9779697514304624e-05, |
| "loss": 0.768674898147583, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.16903914590747332, |
| "grad_norm": 0.33961221575737, |
| "learning_rate": 1.977371773603932e-05, |
| "loss": 0.7539153099060059, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.1708185053380783, |
| "grad_norm": 0.3468526303768158, |
| "learning_rate": 1.9767658813208725e-05, |
| "loss": 0.6995216369628906, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.17259786476868327, |
| "grad_norm": 0.3776821196079254, |
| "learning_rate": 1.976152079487614e-05, |
| "loss": 0.7344133853912354, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.17437722419928825, |
| "grad_norm": 0.43669673800468445, |
| "learning_rate": 1.9755303730745344e-05, |
| "loss": 0.7037209510803223, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.17615658362989323, |
| "grad_norm": 0.43197065591812134, |
| "learning_rate": 1.9749007671160223e-05, |
| "loss": 0.7632620334625244, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.17793594306049823, |
| "grad_norm": 0.36352500319480896, |
| "learning_rate": 1.9742632667104332e-05, |
| "loss": 0.7493629455566406, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.17793594306049823, |
| "eval_loss": 0.7127183079719543, |
| "eval_runtime": 407.5587, |
| "eval_samples_per_second": 12.339, |
| "eval_steps_per_second": 6.171, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.1797153024911032, |
| "grad_norm": 0.4177298843860626, |
| "learning_rate": 1.9736178770200492e-05, |
| "loss": 0.7822850227355957, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.18149466192170818, |
| "grad_norm": 0.4624420404434204, |
| "learning_rate": 1.972964603271038e-05, |
| "loss": 0.7450732707977294, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.18327402135231316, |
| "grad_norm": 0.35002920031547546, |
| "learning_rate": 1.97230345075341e-05, |
| "loss": 0.7509373188018799, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.18505338078291814, |
| "grad_norm": 0.4136241674423218, |
| "learning_rate": 1.9716344248209754e-05, |
| "loss": 0.6727419853210449, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.18683274021352314, |
| "grad_norm": 0.41807156801223755, |
| "learning_rate": 1.9709575308913004e-05, |
| "loss": 0.7258425712585449, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.18861209964412812, |
| "grad_norm": 0.4383244514465332, |
| "learning_rate": 1.9702727744456645e-05, |
| "loss": 0.7470430374145508, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.1903914590747331, |
| "grad_norm": 0.44305190443992615, |
| "learning_rate": 1.969580161029015e-05, |
| "loss": 0.6925637722015381, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.19217081850533807, |
| "grad_norm": 0.45914319157600403, |
| "learning_rate": 1.9688796962499228e-05, |
| "loss": 0.7320804595947266, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.19395017793594305, |
| "grad_norm": 0.37220245599746704, |
| "learning_rate": 1.9681713857805367e-05, |
| "loss": 0.6863605499267578, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.19572953736654805, |
| "grad_norm": 0.473320871591568, |
| "learning_rate": 1.9674552353565374e-05, |
| "loss": 0.7069521427154541, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.19750889679715303, |
| "grad_norm": 0.34435564279556274, |
| "learning_rate": 1.9667312507770905e-05, |
| "loss": 0.7488323211669922, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.199288256227758, |
| "grad_norm": 0.4495692551136017, |
| "learning_rate": 1.9659994379048015e-05, |
| "loss": 0.7306941032409668, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.20106761565836298, |
| "grad_norm": 0.37990689277648926, |
| "learning_rate": 1.9652598026656666e-05, |
| "loss": 0.6781065464019775, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.20284697508896798, |
| "grad_norm": 0.374970942735672, |
| "learning_rate": 1.9645123510490242e-05, |
| "loss": 0.732900333404541, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.20462633451957296, |
| "grad_norm": 0.42331087589263916, |
| "learning_rate": 1.963757089107508e-05, |
| "loss": 0.7150296211242676, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.20640569395017794, |
| "grad_norm": 0.39357990026474, |
| "learning_rate": 1.962994022956998e-05, |
| "loss": 0.70610032081604, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.20818505338078291, |
| "grad_norm": 0.3616408407688141, |
| "learning_rate": 1.9622231587765688e-05, |
| "loss": 0.6808771610260009, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.2099644128113879, |
| "grad_norm": 0.4579455554485321, |
| "learning_rate": 1.9614445028084424e-05, |
| "loss": 0.7024923801422119, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.2117437722419929, |
| "grad_norm": 0.47354868054389954, |
| "learning_rate": 1.9606580613579352e-05, |
| "loss": 0.7523046493530273, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.21352313167259787, |
| "grad_norm": 0.3599710166454315, |
| "learning_rate": 1.9598638407934096e-05, |
| "loss": 0.7396236419677734, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.21530249110320285, |
| "grad_norm": 0.37311187386512756, |
| "learning_rate": 1.959061847546219e-05, |
| "loss": 0.6676182746887207, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.21708185053380782, |
| "grad_norm": 0.4571494460105896, |
| "learning_rate": 1.9582520881106585e-05, |
| "loss": 0.7176971435546875, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.2188612099644128, |
| "grad_norm": 0.39556118845939636, |
| "learning_rate": 1.9574345690439113e-05, |
| "loss": 0.6899125576019287, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.2206405693950178, |
| "grad_norm": 0.4152670204639435, |
| "learning_rate": 1.9566092969659964e-05, |
| "loss": 0.7056239128112793, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.22241992882562278, |
| "grad_norm": 0.5533052086830139, |
| "learning_rate": 1.9557762785597133e-05, |
| "loss": 0.7325549602508545, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.22419928825622776, |
| "grad_norm": 0.457738995552063, |
| "learning_rate": 1.9549355205705895e-05, |
| "loss": 0.7097938060760498, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.22597864768683273, |
| "grad_norm": 0.4812074303627014, |
| "learning_rate": 1.9540870298068247e-05, |
| "loss": 0.6999053478240966, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.2277580071174377, |
| "grad_norm": 0.42034661769866943, |
| "learning_rate": 1.9532308131392365e-05, |
| "loss": 0.7124747276306153, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.22953736654804271, |
| "grad_norm": 0.44799792766571045, |
| "learning_rate": 1.9523668775012053e-05, |
| "loss": 0.7096034049987793, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.2313167259786477, |
| "grad_norm": 0.4658997654914856, |
| "learning_rate": 1.9514952298886157e-05, |
| "loss": 0.7080921649932861, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.23309608540925267, |
| "grad_norm": 0.5269479751586914, |
| "learning_rate": 1.9506158773598035e-05, |
| "loss": 0.6962251663208008, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.23487544483985764, |
| "grad_norm": 0.45521607995033264, |
| "learning_rate": 1.9497288270354944e-05, |
| "loss": 0.7323726177215576, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.23665480427046262, |
| "grad_norm": 0.4322509169578552, |
| "learning_rate": 1.9488340860987504e-05, |
| "loss": 0.7227589607238769, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.23843416370106763, |
| "grad_norm": 0.47730758786201477, |
| "learning_rate": 1.9479316617949084e-05, |
| "loss": 0.702051305770874, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.2402135231316726, |
| "grad_norm": 0.42763814330101013, |
| "learning_rate": 1.9470215614315232e-05, |
| "loss": 0.7380130767822266, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.24199288256227758, |
| "grad_norm": 0.4630064070224762, |
| "learning_rate": 1.9461037923783087e-05, |
| "loss": 0.7470481395721436, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.24377224199288255, |
| "grad_norm": 0.367767870426178, |
| "learning_rate": 1.9451783620670767e-05, |
| "loss": 0.7009376049041748, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.24555160142348753, |
| "grad_norm": 0.38925161957740784, |
| "learning_rate": 1.9442452779916775e-05, |
| "loss": 0.7043869972229004, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.24733096085409254, |
| "grad_norm": 0.37959718704223633, |
| "learning_rate": 1.943304547707939e-05, |
| "loss": 0.7024062633514404, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.2491103202846975, |
| "grad_norm": 0.3639119267463684, |
| "learning_rate": 1.9423561788336073e-05, |
| "loss": 0.7025011539459228, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.2508896797153025, |
| "grad_norm": 0.5123258233070374, |
| "learning_rate": 1.9414001790482815e-05, |
| "loss": 0.7173181533813476, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.2526690391459075, |
| "grad_norm": 0.5145444273948669, |
| "learning_rate": 1.940436556093355e-05, |
| "loss": 0.6975203514099121, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.25444839857651247, |
| "grad_norm": 0.41073641180992126, |
| "learning_rate": 1.93946531777195e-05, |
| "loss": 0.7116940021514893, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.25622775800711745, |
| "grad_norm": 0.5241482853889465, |
| "learning_rate": 1.9384864719488562e-05, |
| "loss": 0.7322525024414063, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.2580071174377224, |
| "grad_norm": 0.41522547602653503, |
| "learning_rate": 1.9375000265504673e-05, |
| "loss": 0.7099958419799804, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.2597864768683274, |
| "grad_norm": 0.4306512773036957, |
| "learning_rate": 1.9365059895647146e-05, |
| "loss": 0.6963861465454102, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.2615658362989324, |
| "grad_norm": 0.4515567421913147, |
| "learning_rate": 1.935504369041004e-05, |
| "loss": 0.7317886829376221, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.26334519572953735, |
| "grad_norm": 0.46133843064308167, |
| "learning_rate": 1.9344951730901523e-05, |
| "loss": 0.6936720371246338, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.26512455516014233, |
| "grad_norm": 0.4332071840763092, |
| "learning_rate": 1.933478409884317e-05, |
| "loss": 0.7092292308807373, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.2669039145907473, |
| "grad_norm": 0.44672346115112305, |
| "learning_rate": 1.9324540876569356e-05, |
| "loss": 0.7138745784759521, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.2669039145907473, |
| "eval_loss": 0.6928849816322327, |
| "eval_runtime": 409.2608, |
| "eval_samples_per_second": 12.288, |
| "eval_steps_per_second": 6.145, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.26868327402135234, |
| "grad_norm": 0.4207611680030823, |
| "learning_rate": 1.9314222147026538e-05, |
| "loss": 0.682267141342163, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.2704626334519573, |
| "grad_norm": 0.4715620279312134, |
| "learning_rate": 1.9303827993772627e-05, |
| "loss": 0.6928095340728759, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.2722419928825623, |
| "grad_norm": 0.5061793327331543, |
| "learning_rate": 1.9293358500976284e-05, |
| "loss": 0.7393241405487061, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.27402135231316727, |
| "grad_norm": 0.46995067596435547, |
| "learning_rate": 1.9282813753416247e-05, |
| "loss": 0.7240311622619628, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.27580071174377224, |
| "grad_norm": 0.35846948623657227, |
| "learning_rate": 1.927219383648064e-05, |
| "loss": 0.6961266994476318, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.2775800711743772, |
| "grad_norm": 0.4667574465274811, |
| "learning_rate": 1.9261498836166297e-05, |
| "loss": 0.7295796871185303, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.2793594306049822, |
| "grad_norm": 0.5029377341270447, |
| "learning_rate": 1.9250728839078043e-05, |
| "loss": 0.7046424865722656, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.28113879003558717, |
| "grad_norm": 0.46402570605278015, |
| "learning_rate": 1.9239883932428002e-05, |
| "loss": 0.7341272830963135, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.28291814946619215, |
| "grad_norm": 0.45273569226264954, |
| "learning_rate": 1.9228964204034906e-05, |
| "loss": 0.7231084823608398, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.2846975088967972, |
| "grad_norm": 0.44853758811950684, |
| "learning_rate": 1.9217969742323358e-05, |
| "loss": 0.7228631019592285, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.28647686832740216, |
| "grad_norm": 0.4073372483253479, |
| "learning_rate": 1.9206900636323138e-05, |
| "loss": 0.7120136737823486, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.28825622775800713, |
| "grad_norm": 0.4837400019168854, |
| "learning_rate": 1.9195756975668463e-05, |
| "loss": 0.7246061325073242, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.2900355871886121, |
| "grad_norm": 0.48374027013778687, |
| "learning_rate": 1.918453885059728e-05, |
| "loss": 0.6975275039672851, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.2918149466192171, |
| "grad_norm": 0.40771543979644775, |
| "learning_rate": 1.9173246351950515e-05, |
| "loss": 0.7376579284667969, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.29359430604982206, |
| "grad_norm": 0.47605007886886597, |
| "learning_rate": 1.916187957117136e-05, |
| "loss": 0.7509649753570556, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.29537366548042704, |
| "grad_norm": 0.4246625006198883, |
| "learning_rate": 1.9150438600304514e-05, |
| "loss": 0.7086214065551758, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.297153024911032, |
| "grad_norm": 0.4986574053764343, |
| "learning_rate": 1.9138923531995448e-05, |
| "loss": 0.6780657768249512, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.298932384341637, |
| "grad_norm": 0.5241477489471436, |
| "learning_rate": 1.912733445948965e-05, |
| "loss": 0.7178135395050049, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.30071174377224197, |
| "grad_norm": 0.48822927474975586, |
| "learning_rate": 1.9115671476631865e-05, |
| "loss": 0.6914261817932129, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.302491103202847, |
| "grad_norm": 0.4603191018104553, |
| "learning_rate": 1.910393467786535e-05, |
| "loss": 0.7189798355102539, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.304270462633452, |
| "grad_norm": 0.5957316160202026, |
| "learning_rate": 1.90921241582311e-05, |
| "loss": 0.6929316520690918, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.30604982206405695, |
| "grad_norm": 0.4563639163970947, |
| "learning_rate": 1.9080240013367075e-05, |
| "loss": 0.6874090194702148, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.30782918149466193, |
| "grad_norm": 0.4933296740055084, |
| "learning_rate": 1.9068282339507433e-05, |
| "loss": 0.7047487258911133, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.3096085409252669, |
| "grad_norm": 0.46199893951416016, |
| "learning_rate": 1.9056251233481747e-05, |
| "loss": 0.6585260391235351, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.3113879003558719, |
| "grad_norm": 0.4224775433540344, |
| "learning_rate": 1.904414679271421e-05, |
| "loss": 0.7062453269958496, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.31316725978647686, |
| "grad_norm": 0.5592194199562073, |
| "learning_rate": 1.9031969115222876e-05, |
| "loss": 0.7372538089752197, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.31494661921708184, |
| "grad_norm": 0.447518527507782, |
| "learning_rate": 1.9019718299618836e-05, |
| "loss": 0.6815076351165772, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.3167259786476868, |
| "grad_norm": 0.4872969686985016, |
| "learning_rate": 1.9007394445105433e-05, |
| "loss": 0.6925329685211181, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.3185053380782918, |
| "grad_norm": 0.5796515941619873, |
| "learning_rate": 1.8994997651477457e-05, |
| "loss": 0.6911862850189209, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.3202846975088968, |
| "grad_norm": 0.45499077439308167, |
| "learning_rate": 1.8982528019120335e-05, |
| "loss": 0.689606761932373, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.3220640569395018, |
| "grad_norm": 0.5108110308647156, |
| "learning_rate": 1.8969985649009325e-05, |
| "loss": 0.698862886428833, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.3238434163701068, |
| "grad_norm": 0.5635261535644531, |
| "learning_rate": 1.8957370642708682e-05, |
| "loss": 0.7093265533447266, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.32562277580071175, |
| "grad_norm": 0.5035433769226074, |
| "learning_rate": 1.8944683102370862e-05, |
| "loss": 0.6829179763793946, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.3274021352313167, |
| "grad_norm": 0.5030480623245239, |
| "learning_rate": 1.8931923130735667e-05, |
| "loss": 0.6931506633758545, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.3291814946619217, |
| "grad_norm": 0.42200684547424316, |
| "learning_rate": 1.891909083112943e-05, |
| "loss": 0.7182873725891114, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.3309608540925267, |
| "grad_norm": 0.5390200614929199, |
| "learning_rate": 1.8906186307464168e-05, |
| "loss": 0.7040542602539063, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.33274021352313166, |
| "grad_norm": 0.4840771555900574, |
| "learning_rate": 1.889320966423676e-05, |
| "loss": 0.71949143409729, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.33451957295373663, |
| "grad_norm": 0.46363523602485657, |
| "learning_rate": 1.8880161006528075e-05, |
| "loss": 0.7095215797424317, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.33629893238434166, |
| "grad_norm": 0.5143831372261047, |
| "learning_rate": 1.8867040440002137e-05, |
| "loss": 0.6844244003295898, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.33807829181494664, |
| "grad_norm": 0.42344361543655396, |
| "learning_rate": 1.8853848070905264e-05, |
| "loss": 0.7266733169555664, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.3398576512455516, |
| "grad_norm": 0.5283271074295044, |
| "learning_rate": 1.884058400606521e-05, |
| "loss": 0.6759650707244873, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.3416370106761566, |
| "grad_norm": 0.6041154861450195, |
| "learning_rate": 1.88272483528903e-05, |
| "loss": 0.7138604164123535, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.34341637010676157, |
| "grad_norm": 0.4607132077217102, |
| "learning_rate": 1.8813841219368562e-05, |
| "loss": 0.7108243465423584, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.34519572953736655, |
| "grad_norm": 0.4770022928714752, |
| "learning_rate": 1.880036271406684e-05, |
| "loss": 0.700477123260498, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.3469750889679715, |
| "grad_norm": 0.5347304940223694, |
| "learning_rate": 1.8786812946129934e-05, |
| "loss": 0.7176999092102051, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.3487544483985765, |
| "grad_norm": 0.4638780355453491, |
| "learning_rate": 1.8773192025279712e-05, |
| "loss": 0.6859623908996582, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.3505338078291815, |
| "grad_norm": 0.4621883034706116, |
| "learning_rate": 1.87595000618142e-05, |
| "loss": 0.6809545516967773, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.35231316725978645, |
| "grad_norm": 0.4492979943752289, |
| "learning_rate": 1.8745737166606716e-05, |
| "loss": 0.7484791278839111, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.3540925266903915, |
| "grad_norm": 0.5058820843696594, |
| "learning_rate": 1.873190345110496e-05, |
| "loss": 0.6860589027404785, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.35587188612099646, |
| "grad_norm": 0.467847615480423, |
| "learning_rate": 1.8717999027330114e-05, |
| "loss": 0.6946381092071533, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.35587188612099646, |
| "eval_loss": 0.6786053776741028, |
| "eval_runtime": 408.2053, |
| "eval_samples_per_second": 12.32, |
| "eval_steps_per_second": 6.161, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.35765124555160144, |
| "grad_norm": 0.4924924373626709, |
| "learning_rate": 1.870402400787593e-05, |
| "loss": 0.7328177452087402, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.3594306049822064, |
| "grad_norm": 0.529029369354248, |
| "learning_rate": 1.8689978505907828e-05, |
| "loss": 0.6788232803344727, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.3612099644128114, |
| "grad_norm": 0.5123940110206604, |
| "learning_rate": 1.8675862635161968e-05, |
| "loss": 0.6493151664733887, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.36298932384341637, |
| "grad_norm": 0.4411087930202484, |
| "learning_rate": 1.866167650994434e-05, |
| "loss": 0.6652609825134277, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.36476868327402134, |
| "grad_norm": 0.5237643122673035, |
| "learning_rate": 1.8647420245129822e-05, |
| "loss": 0.6707428932189942, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.3665480427046263, |
| "grad_norm": 0.5926516056060791, |
| "learning_rate": 1.863309395616128e-05, |
| "loss": 0.7144505023956299, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.3683274021352313, |
| "grad_norm": 0.5331065058708191, |
| "learning_rate": 1.86186977590486e-05, |
| "loss": 0.6472876071929932, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.3701067615658363, |
| "grad_norm": 0.6008846163749695, |
| "learning_rate": 1.860423177036776e-05, |
| "loss": 0.6628296852111817, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.3718861209964413, |
| "grad_norm": 0.49397945404052734, |
| "learning_rate": 1.85896961072599e-05, |
| "loss": 0.7032846927642822, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.3736654804270463, |
| "grad_norm": 0.5322824716567993, |
| "learning_rate": 1.8575090887430354e-05, |
| "loss": 0.6903145790100098, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.37544483985765126, |
| "grad_norm": 0.45118311047554016, |
| "learning_rate": 1.8560416229147718e-05, |
| "loss": 0.7186954975128174, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.37722419928825623, |
| "grad_norm": 0.47973230481147766, |
| "learning_rate": 1.8545672251242855e-05, |
| "loss": 0.677478837966919, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.3790035587188612, |
| "grad_norm": 0.5959491729736328, |
| "learning_rate": 1.8530859073107973e-05, |
| "loss": 0.7285795211791992, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.3807829181494662, |
| "grad_norm": 0.5627938508987427, |
| "learning_rate": 1.851597681469565e-05, |
| "loss": 0.6971286773681641, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.38256227758007116, |
| "grad_norm": 0.5175175666809082, |
| "learning_rate": 1.850102559651784e-05, |
| "loss": 0.7243900775909424, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.38434163701067614, |
| "grad_norm": 0.5537509918212891, |
| "learning_rate": 1.848600553964491e-05, |
| "loss": 0.6653794765472412, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.3861209964412811, |
| "grad_norm": 0.6098501086235046, |
| "learning_rate": 1.847091676570468e-05, |
| "loss": 0.6803691864013672, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.3879003558718861, |
| "grad_norm": 0.5026776790618896, |
| "learning_rate": 1.8455759396881402e-05, |
| "loss": 0.7138524055480957, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.3896797153024911, |
| "grad_norm": 0.5244454741477966, |
| "learning_rate": 1.8440533555914795e-05, |
| "loss": 0.7204444885253907, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.3914590747330961, |
| "grad_norm": 0.5315225124359131, |
| "learning_rate": 1.842523936609905e-05, |
| "loss": 0.7181321144104004, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.3932384341637011, |
| "grad_norm": 0.5039063096046448, |
| "learning_rate": 1.8409876951281814e-05, |
| "loss": 0.7282841205596924, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.39501779359430605, |
| "grad_norm": 0.47619393467903137, |
| "learning_rate": 1.8394446435863206e-05, |
| "loss": 0.7016836643218994, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.39679715302491103, |
| "grad_norm": 0.5733043551445007, |
| "learning_rate": 1.8378947944794806e-05, |
| "loss": 0.6978562355041504, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.398576512455516, |
| "grad_norm": 0.5004534125328064, |
| "learning_rate": 1.8363381603578628e-05, |
| "loss": 0.6713067531585694, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.400355871886121, |
| "grad_norm": 0.6110662817955017, |
| "learning_rate": 1.8347747538266133e-05, |
| "loss": 0.6862231254577636, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.40213523131672596, |
| "grad_norm": 0.5308374166488647, |
| "learning_rate": 1.8332045875457174e-05, |
| "loss": 0.6602601528167724, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.40391459074733094, |
| "grad_norm": 0.6257572174072266, |
| "learning_rate": 1.8316276742299e-05, |
| "loss": 0.6580804347991943, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.40569395017793597, |
| "grad_norm": 0.6403529047966003, |
| "learning_rate": 1.830044026648521e-05, |
| "loss": 0.6985883712768555, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.40747330960854095, |
| "grad_norm": 0.5609093308448792, |
| "learning_rate": 1.828453657625472e-05, |
| "loss": 0.7306273937225342, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.4092526690391459, |
| "grad_norm": 0.5089350342750549, |
| "learning_rate": 1.8268565800390733e-05, |
| "loss": 0.6552363395690918, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.4110320284697509, |
| "grad_norm": 0.5988994240760803, |
| "learning_rate": 1.8252528068219683e-05, |
| "loss": 0.6836632251739502, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.4128113879003559, |
| "grad_norm": 0.558757483959198, |
| "learning_rate": 1.8236423509610207e-05, |
| "loss": 0.6945361137390137, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.41459074733096085, |
| "grad_norm": 0.5903414487838745, |
| "learning_rate": 1.8220252254972077e-05, |
| "loss": 0.6987195014953613, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.41637010676156583, |
| "grad_norm": 0.5187668800354004, |
| "learning_rate": 1.8204014435255136e-05, |
| "loss": 0.7381620407104492, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.4181494661921708, |
| "grad_norm": 0.5883176922798157, |
| "learning_rate": 1.8187710181948274e-05, |
| "loss": 0.6803225994110107, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.4199288256227758, |
| "grad_norm": 0.5465121269226074, |
| "learning_rate": 1.817133962707833e-05, |
| "loss": 0.6955878257751464, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.42170818505338076, |
| "grad_norm": 0.5035154223442078, |
| "learning_rate": 1.815490290320902e-05, |
| "loss": 0.6446046829223633, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.4234875444839858, |
| "grad_norm": 0.561638593673706, |
| "learning_rate": 1.8138400143439892e-05, |
| "loss": 0.6994437217712403, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.42526690391459077, |
| "grad_norm": 0.547978937625885, |
| "learning_rate": 1.812183148140523e-05, |
| "loss": 0.6748724937438965, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.42704626334519574, |
| "grad_norm": 0.7230744957923889, |
| "learning_rate": 1.8105197051272974e-05, |
| "loss": 0.6960064888000488, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.4288256227758007, |
| "grad_norm": 0.6158634424209595, |
| "learning_rate": 1.8088496987743623e-05, |
| "loss": 0.6599089622497558, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.4306049822064057, |
| "grad_norm": 0.5255537629127502, |
| "learning_rate": 1.807173142604917e-05, |
| "loss": 0.7354787349700928, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.43238434163701067, |
| "grad_norm": 0.5891067385673523, |
| "learning_rate": 1.8054900501951988e-05, |
| "loss": 0.6725128650665283, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.43416370106761565, |
| "grad_norm": 0.6183168888092041, |
| "learning_rate": 1.8038004351743726e-05, |
| "loss": 0.6672306537628174, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.4359430604982206, |
| "grad_norm": 0.5241208672523499, |
| "learning_rate": 1.8021043112244222e-05, |
| "loss": 0.6435032367706299, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.4377224199288256, |
| "grad_norm": 0.47406890988349915, |
| "learning_rate": 1.8004016920800392e-05, |
| "loss": 0.7117865085601807, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.4395017793594306, |
| "grad_norm": 0.45818889141082764, |
| "learning_rate": 1.7986925915285098e-05, |
| "loss": 0.7240960597991943, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.4412811387900356, |
| "grad_norm": 0.5167953372001648, |
| "learning_rate": 1.796977023409606e-05, |
| "loss": 0.6828316211700439, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.4430604982206406, |
| "grad_norm": 0.48911231756210327, |
| "learning_rate": 1.795255001615472e-05, |
| "loss": 0.6907057285308837, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.44483985765124556, |
| "grad_norm": 0.4384164810180664, |
| "learning_rate": 1.7935265400905107e-05, |
| "loss": 0.6647283554077148, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.44483985765124556, |
| "eval_loss": 0.667682409286499, |
| "eval_runtime": 408.4893, |
| "eval_samples_per_second": 12.311, |
| "eval_steps_per_second": 6.157, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.44661921708185054, |
| "grad_norm": 0.5545853972434998, |
| "learning_rate": 1.7917916528312735e-05, |
| "loss": 0.6821264743804931, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.4483985765124555, |
| "grad_norm": 0.5834780335426331, |
| "learning_rate": 1.7900503538863446e-05, |
| "loss": 0.6818623542785645, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.4501779359430605, |
| "grad_norm": 0.6038497090339661, |
| "learning_rate": 1.7883026573562278e-05, |
| "loss": 0.6842663764953614, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.45195729537366547, |
| "grad_norm": 0.5673312544822693, |
| "learning_rate": 1.7865485773932336e-05, |
| "loss": 0.6788026809692382, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.45373665480427045, |
| "grad_norm": 0.6115579605102539, |
| "learning_rate": 1.7847881282013623e-05, |
| "loss": 0.724776029586792, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.4555160142348754, |
| "grad_norm": 0.6251904964447021, |
| "learning_rate": 1.7830213240361916e-05, |
| "loss": 0.6907171249389649, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.45729537366548045, |
| "grad_norm": 0.5565618872642517, |
| "learning_rate": 1.7812481792047587e-05, |
| "loss": 0.6535940647125245, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.45907473309608543, |
| "grad_norm": 0.5508270263671875, |
| "learning_rate": 1.7794687080654462e-05, |
| "loss": 0.6779204845428467, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.4608540925266904, |
| "grad_norm": 0.6121755242347717, |
| "learning_rate": 1.777682925027865e-05, |
| "loss": 0.6833740234375, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.4626334519572954, |
| "grad_norm": 0.5710318684577942, |
| "learning_rate": 1.7758908445527376e-05, |
| "loss": 0.6820470333099365, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.46441281138790036, |
| "grad_norm": 0.624721884727478, |
| "learning_rate": 1.774092481151782e-05, |
| "loss": 0.7131676197052002, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.46619217081850534, |
| "grad_norm": 0.5539907813072205, |
| "learning_rate": 1.7722878493875922e-05, |
| "loss": 0.6834908962249756, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.4679715302491103, |
| "grad_norm": 0.7775362730026245, |
| "learning_rate": 1.7704769638735225e-05, |
| "loss": 0.7060842990875245, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.4697508896797153, |
| "grad_norm": 0.5198112726211548, |
| "learning_rate": 1.7686598392735678e-05, |
| "loss": 0.6912449836730957, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.47153024911032027, |
| "grad_norm": 0.5231760144233704, |
| "learning_rate": 1.766836490302245e-05, |
| "loss": 0.7053235054016114, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.47330960854092524, |
| "grad_norm": 0.7802064418792725, |
| "learning_rate": 1.7650069317244743e-05, |
| "loss": 0.6698042392730713, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.4750889679715303, |
| "grad_norm": 0.5244280099868774, |
| "learning_rate": 1.763171178355459e-05, |
| "loss": 0.6572854042053222, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.47686832740213525, |
| "grad_norm": 0.6323163509368896, |
| "learning_rate": 1.761329245060567e-05, |
| "loss": 0.6909306526184082, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.4786476868327402, |
| "grad_norm": 0.636985182762146, |
| "learning_rate": 1.7594811467552076e-05, |
| "loss": 0.7432591915130615, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.4804270462633452, |
| "grad_norm": 0.5935755372047424, |
| "learning_rate": 1.7576268984047146e-05, |
| "loss": 0.6589434146881104, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.4822064056939502, |
| "grad_norm": 0.5581539869308472, |
| "learning_rate": 1.7557665150242205e-05, |
| "loss": 0.6235795974731445, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.48398576512455516, |
| "grad_norm": 0.5456061363220215, |
| "learning_rate": 1.7539000116785402e-05, |
| "loss": 0.6965986251831054, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.48576512455516013, |
| "grad_norm": 0.6723548769950867, |
| "learning_rate": 1.752027403482043e-05, |
| "loss": 0.6792643070220947, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.4875444839857651, |
| "grad_norm": 0.5704858899116516, |
| "learning_rate": 1.7501487055985364e-05, |
| "loss": 0.6697192192077637, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.4893238434163701, |
| "grad_norm": 0.559579074382782, |
| "learning_rate": 1.7482639332411386e-05, |
| "loss": 0.6839393615722656, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.49110320284697506, |
| "grad_norm": 0.6328978538513184, |
| "learning_rate": 1.7463731016721574e-05, |
| "loss": 0.6757444858551025, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.4928825622775801, |
| "grad_norm": 0.5843029022216797, |
| "learning_rate": 1.744476226202966e-05, |
| "loss": 0.6616747379302979, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.49466192170818507, |
| "grad_norm": 0.593262791633606, |
| "learning_rate": 1.7425733221938802e-05, |
| "loss": 0.6773699283599853, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.49644128113879005, |
| "grad_norm": 0.4597082734107971, |
| "learning_rate": 1.740664405054032e-05, |
| "loss": 0.6922731876373291, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.498220640569395, |
| "grad_norm": 0.5115553140640259, |
| "learning_rate": 1.7387494902412462e-05, |
| "loss": 0.6650360107421875, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.5256341695785522, |
| "learning_rate": 1.7368285932619152e-05, |
| "loss": 0.6744340896606446, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.501779359430605, |
| "grad_norm": 0.5476647019386292, |
| "learning_rate": 1.734901729670873e-05, |
| "loss": 0.6778338432312012, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.50355871886121, |
| "grad_norm": 0.5267303586006165, |
| "learning_rate": 1.7329689150712692e-05, |
| "loss": 0.6595091342926025, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.505338078291815, |
| "grad_norm": 0.49511632323265076, |
| "learning_rate": 1.7310301651144427e-05, |
| "loss": 0.6996967315673828, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.5071174377224199, |
| "grad_norm": 0.5894142985343933, |
| "learning_rate": 1.729085495499796e-05, |
| "loss": 0.7025831699371338, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.5088967971530249, |
| "grad_norm": 0.5248763561248779, |
| "learning_rate": 1.727134921974666e-05, |
| "loss": 0.6990334510803222, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.5106761565836299, |
| "grad_norm": 0.6369486451148987, |
| "learning_rate": 1.7251784603341984e-05, |
| "loss": 0.6775143146514893, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.5124555160142349, |
| "grad_norm": 0.7855329513549805, |
| "learning_rate": 1.723216126421219e-05, |
| "loss": 0.6654418468475342, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.5142348754448398, |
| "grad_norm": 0.5383808016777039, |
| "learning_rate": 1.7212479361261047e-05, |
| "loss": 0.7224104404449463, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.5160142348754448, |
| "grad_norm": 0.5875473618507385, |
| "learning_rate": 1.7192739053866568e-05, |
| "loss": 0.6993866920471191, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.5177935943060499, |
| "grad_norm": 0.5973880290985107, |
| "learning_rate": 1.7172940501879702e-05, |
| "loss": 0.6883405685424805, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.5195729537366548, |
| "grad_norm": 0.7815128564834595, |
| "learning_rate": 1.715308386562304e-05, |
| "loss": 0.6863636493682861, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.5213523131672598, |
| "grad_norm": 0.5661780834197998, |
| "learning_rate": 1.7133169305889526e-05, |
| "loss": 0.6976628303527832, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.5231316725978647, |
| "grad_norm": 0.6116606593132019, |
| "learning_rate": 1.7113196983941152e-05, |
| "loss": 0.7353427410125732, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.5249110320284698, |
| "grad_norm": 0.5908172726631165, |
| "learning_rate": 1.709316706150765e-05, |
| "loss": 0.7118365287780761, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.5266903914590747, |
| "grad_norm": 0.5671530365943909, |
| "learning_rate": 1.707307970078518e-05, |
| "loss": 0.6777832508087158, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.5284697508896797, |
| "grad_norm": 0.635502815246582, |
| "learning_rate": 1.7052935064435023e-05, |
| "loss": 0.6588366985321045, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.5302491103202847, |
| "grad_norm": 0.674394428730011, |
| "learning_rate": 1.7032733315582254e-05, |
| "loss": 0.6987817764282227, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.5320284697508897, |
| "grad_norm": 0.6007933020591736, |
| "learning_rate": 1.7012474617814433e-05, |
| "loss": 0.6640087604522705, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.5338078291814946, |
| "grad_norm": 0.5124489665031433, |
| "learning_rate": 1.6992159135180283e-05, |
| "loss": 0.642765474319458, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.5338078291814946, |
| "eval_loss": 0.6583885550498962, |
| "eval_runtime": 409.5482, |
| "eval_samples_per_second": 12.279, |
| "eval_steps_per_second": 6.141, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.5355871886120996, |
| "grad_norm": 0.6412521004676819, |
| "learning_rate": 1.6971787032188336e-05, |
| "loss": 0.6574789047241211, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.5373665480427047, |
| "grad_norm": 0.5690603852272034, |
| "learning_rate": 1.6951358473805633e-05, |
| "loss": 0.7070611476898193, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.5391459074733096, |
| "grad_norm": 0.5345459580421448, |
| "learning_rate": 1.6930873625456362e-05, |
| "loss": 0.6888082027435303, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.5409252669039146, |
| "grad_norm": 0.67616868019104, |
| "learning_rate": 1.6910332653020536e-05, |
| "loss": 0.6946770668029785, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.5427046263345195, |
| "grad_norm": 0.6721035838127136, |
| "learning_rate": 1.6889735722832643e-05, |
| "loss": 0.6662865161895752, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.5444839857651246, |
| "grad_norm": 0.6241431832313538, |
| "learning_rate": 1.6869083001680304e-05, |
| "loss": 0.6311937808990479, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.5462633451957295, |
| "grad_norm": 0.6454595923423767, |
| "learning_rate": 1.6848374656802912e-05, |
| "loss": 0.6600630283355713, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.5480427046263345, |
| "grad_norm": 0.5625023245811462, |
| "learning_rate": 1.6827610855890278e-05, |
| "loss": 0.7191053867340088, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.5498220640569395, |
| "grad_norm": 0.6803082823753357, |
| "learning_rate": 1.6806791767081296e-05, |
| "loss": 0.6553170680999756, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.5516014234875445, |
| "grad_norm": 0.6465727686882019, |
| "learning_rate": 1.6785917558962552e-05, |
| "loss": 0.6733019351959229, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.5533807829181495, |
| "grad_norm": 0.5595579743385315, |
| "learning_rate": 1.6764988400566973e-05, |
| "loss": 0.6531811714172363, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.5551601423487544, |
| "grad_norm": 0.6368974447250366, |
| "learning_rate": 1.6744004461372455e-05, |
| "loss": 0.6765477657318115, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.5569395017793595, |
| "grad_norm": 0.5315082669258118, |
| "learning_rate": 1.67229659113005e-05, |
| "loss": 0.6876490116119385, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.5587188612099644, |
| "grad_norm": 0.6011325716972351, |
| "learning_rate": 1.6701872920714822e-05, |
| "loss": 0.671757698059082, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.5604982206405694, |
| "grad_norm": 0.5254577994346619, |
| "learning_rate": 1.6680725660419987e-05, |
| "loss": 0.6709373950958252, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.5622775800711743, |
| "grad_norm": 0.5257652401924133, |
| "learning_rate": 1.6659524301660014e-05, |
| "loss": 0.6314177513122559, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.5640569395017794, |
| "grad_norm": 0.577984094619751, |
| "learning_rate": 1.6638269016116995e-05, |
| "loss": 0.6948809623718262, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.5658362989323843, |
| "grad_norm": 0.5697906613349915, |
| "learning_rate": 1.6616959975909706e-05, |
| "loss": 0.6561762809753418, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.5676156583629893, |
| "grad_norm": 0.5052287578582764, |
| "learning_rate": 1.6595597353592216e-05, |
| "loss": 0.6577863693237305, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.5693950177935944, |
| "grad_norm": 0.5690486431121826, |
| "learning_rate": 1.6574181322152477e-05, |
| "loss": 0.6753673553466797, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.5711743772241993, |
| "grad_norm": 0.6763755679130554, |
| "learning_rate": 1.6552712055010935e-05, |
| "loss": 0.6881022453308105, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.5729537366548043, |
| "grad_norm": 0.7030187249183655, |
| "learning_rate": 1.6531189726019127e-05, |
| "loss": 0.6971624851226806, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.5747330960854092, |
| "grad_norm": 0.5229400992393494, |
| "learning_rate": 1.6509614509458263e-05, |
| "loss": 0.6652966976165772, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.5765124555160143, |
| "grad_norm": 0.5338436961174011, |
| "learning_rate": 1.6487986580037822e-05, |
| "loss": 0.6946625709533691, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.5782918149466192, |
| "grad_norm": 0.5740528702735901, |
| "learning_rate": 1.646630611289414e-05, |
| "loss": 0.6570149421691894, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.5800711743772242, |
| "grad_norm": 0.5782309174537659, |
| "learning_rate": 1.6444573283588977e-05, |
| "loss": 0.657336950302124, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.5818505338078291, |
| "grad_norm": 0.5372493863105774, |
| "learning_rate": 1.6422788268108112e-05, |
| "loss": 0.7015529632568359, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.5836298932384342, |
| "grad_norm": 0.734725832939148, |
| "learning_rate": 1.6400951242859915e-05, |
| "loss": 0.7008133888244629, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.5854092526690391, |
| "grad_norm": 0.6092361211776733, |
| "learning_rate": 1.6379062384673914e-05, |
| "loss": 0.6785021305084229, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.5871886120996441, |
| "grad_norm": 0.5179940462112427, |
| "learning_rate": 1.6357121870799357e-05, |
| "loss": 0.6846163749694825, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.5889679715302492, |
| "grad_norm": 0.6822018027305603, |
| "learning_rate": 1.633512987890379e-05, |
| "loss": 0.6979034423828125, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.5907473309608541, |
| "grad_norm": 0.6708216667175293, |
| "learning_rate": 1.631308658707161e-05, |
| "loss": 0.616365623474121, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.5925266903914591, |
| "grad_norm": 0.5766979455947876, |
| "learning_rate": 1.6290992173802628e-05, |
| "loss": 0.6930478096008301, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.594306049822064, |
| "grad_norm": 0.5902812480926514, |
| "learning_rate": 1.6268846818010615e-05, |
| "loss": 0.6487626552581787, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.5960854092526691, |
| "grad_norm": 0.45683762431144714, |
| "learning_rate": 1.6246650699021866e-05, |
| "loss": 0.670246696472168, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.597864768683274, |
| "grad_norm": 0.4878489077091217, |
| "learning_rate": 1.6224403996573743e-05, |
| "loss": 0.6784106254577636, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.599644128113879, |
| "grad_norm": 0.6697226166725159, |
| "learning_rate": 1.62021068908132e-05, |
| "loss": 0.7080551147460937, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.6014234875444839, |
| "grad_norm": 0.6667253971099854, |
| "learning_rate": 1.6179759562295356e-05, |
| "loss": 0.6681561470031738, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.603202846975089, |
| "grad_norm": 0.560409426689148, |
| "learning_rate": 1.6157362191982025e-05, |
| "loss": 0.7013211727142334, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.604982206405694, |
| "grad_norm": 0.6660729050636292, |
| "learning_rate": 1.6134914961240224e-05, |
| "loss": 0.6561143398284912, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.6067615658362989, |
| "grad_norm": 0.5322085618972778, |
| "learning_rate": 1.6112418051840745e-05, |
| "loss": 0.6231056213378906, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.608540925266904, |
| "grad_norm": 0.6110619902610779, |
| "learning_rate": 1.6089871645956644e-05, |
| "loss": 0.696910810470581, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.6103202846975089, |
| "grad_norm": 0.7193135619163513, |
| "learning_rate": 1.6067275926161792e-05, |
| "loss": 0.6709835052490234, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.6120996441281139, |
| "grad_norm": 0.5821454524993896, |
| "learning_rate": 1.6044631075429406e-05, |
| "loss": 0.6750143527984619, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.6138790035587188, |
| "grad_norm": 0.6266823410987854, |
| "learning_rate": 1.6021937277130516e-05, |
| "loss": 0.6660190105438233, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.6156583629893239, |
| "grad_norm": 0.6361811757087708, |
| "learning_rate": 1.5999194715032543e-05, |
| "loss": 0.6241551399230957, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.6174377224199288, |
| "grad_norm": 0.5690405964851379, |
| "learning_rate": 1.5976403573297767e-05, |
| "loss": 0.6768513679504394, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.6192170818505338, |
| "grad_norm": 0.6438109278678894, |
| "learning_rate": 1.595356403648186e-05, |
| "loss": 0.6852968215942383, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.6209964412811388, |
| "grad_norm": 0.5949437022209167, |
| "learning_rate": 1.5930676289532373e-05, |
| "loss": 0.6737981319427491, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.6227758007117438, |
| "grad_norm": 0.6436291933059692, |
| "learning_rate": 1.590774051778726e-05, |
| "loss": 0.6780194282531739, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.6227758007117438, |
| "eval_loss": 0.6505803465843201, |
| "eval_runtime": 412.7337, |
| "eval_samples_per_second": 12.185, |
| "eval_steps_per_second": 6.094, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.6245551601423488, |
| "grad_norm": 0.5366589426994324, |
| "learning_rate": 1.588475690697335e-05, |
| "loss": 0.6628384590148926, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.6263345195729537, |
| "grad_norm": 0.5512357950210571, |
| "learning_rate": 1.5861725643204876e-05, |
| "loss": 0.6656132221221924, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.6281138790035588, |
| "grad_norm": 0.5239390134811401, |
| "learning_rate": 1.5838646912981937e-05, |
| "loss": 0.6585372447967529, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.6298932384341637, |
| "grad_norm": 0.611584484577179, |
| "learning_rate": 1.5815520903188998e-05, |
| "loss": 0.6700050354003906, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.6316725978647687, |
| "grad_norm": 0.5967355966567993, |
| "learning_rate": 1.5792347801093393e-05, |
| "loss": 0.6763735294342041, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.6334519572953736, |
| "grad_norm": 0.5486634373664856, |
| "learning_rate": 1.576912779434379e-05, |
| "loss": 0.6555093765258789, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.6352313167259787, |
| "grad_norm": 0.6155397891998291, |
| "learning_rate": 1.5745861070968667e-05, |
| "loss": 0.660044813156128, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.6370106761565836, |
| "grad_norm": 0.6666173338890076, |
| "learning_rate": 1.5722547819374807e-05, |
| "loss": 0.6919036388397217, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.6387900355871886, |
| "grad_norm": 0.628685474395752, |
| "learning_rate": 1.5699188228345765e-05, |
| "loss": 0.6886429786682129, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.6405693950177936, |
| "grad_norm": 0.5950825810432434, |
| "learning_rate": 1.5675782487040337e-05, |
| "loss": 0.6522699356079101, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.6423487544483986, |
| "grad_norm": 0.6900691390037537, |
| "learning_rate": 1.565233078499103e-05, |
| "loss": 0.6545799255371094, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.6441281138790036, |
| "grad_norm": 0.6600697040557861, |
| "learning_rate": 1.5628833312102526e-05, |
| "loss": 0.690045976638794, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.6459074733096085, |
| "grad_norm": 0.7045279145240784, |
| "learning_rate": 1.5605290258650144e-05, |
| "loss": 0.6638887405395508, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.6476868327402135, |
| "grad_norm": 0.6415700316429138, |
| "learning_rate": 1.5581701815278302e-05, |
| "loss": 0.6451498508453369, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.6494661921708185, |
| "grad_norm": 0.6846213340759277, |
| "learning_rate": 1.5558068172998977e-05, |
| "loss": 0.6113666534423828, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.6512455516014235, |
| "grad_norm": 0.584511935710907, |
| "learning_rate": 1.5534389523190142e-05, |
| "loss": 0.7053666591644288, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.6530249110320284, |
| "grad_norm": 0.7102410793304443, |
| "learning_rate": 1.551066605759424e-05, |
| "loss": 0.6806889533996582, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.6548042704626335, |
| "grad_norm": 0.5693709254264832, |
| "learning_rate": 1.5486897968316604e-05, |
| "loss": 0.6717594623565674, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.6565836298932385, |
| "grad_norm": 0.6451858878135681, |
| "learning_rate": 1.546308544782392e-05, |
| "loss": 0.69324951171875, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.6583629893238434, |
| "grad_norm": 0.5983235836029053, |
| "learning_rate": 1.543922868894268e-05, |
| "loss": 0.6728285312652588, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.6601423487544484, |
| "grad_norm": 0.5725272297859192, |
| "learning_rate": 1.541532788485758e-05, |
| "loss": 0.6560508728027343, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.6619217081850534, |
| "grad_norm": 0.6461522579193115, |
| "learning_rate": 1.5391383229110005e-05, |
| "loss": 0.6883309364318848, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.6637010676156584, |
| "grad_norm": 0.6512270569801331, |
| "learning_rate": 1.5367394915596414e-05, |
| "loss": 0.6622300624847413, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.6654804270462633, |
| "grad_norm": 0.6106224656105042, |
| "learning_rate": 1.534336313856681e-05, |
| "loss": 0.6878883361816406, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.6672597864768683, |
| "grad_norm": 0.6902608275413513, |
| "learning_rate": 1.5319288092623142e-05, |
| "loss": 0.6434782981872559, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.6690391459074733, |
| "grad_norm": 0.5784608721733093, |
| "learning_rate": 1.5295169972717743e-05, |
| "loss": 0.6367124557495117, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.6708185053380783, |
| "grad_norm": 0.6913280487060547, |
| "learning_rate": 1.5271008974151744e-05, |
| "loss": 0.630396556854248, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.6725978647686833, |
| "grad_norm": 0.7483834624290466, |
| "learning_rate": 1.5246805292573487e-05, |
| "loss": 0.6295557975769043, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.6743772241992882, |
| "grad_norm": 0.7149993777275085, |
| "learning_rate": 1.5222559123976962e-05, |
| "loss": 0.6769547939300538, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.6761565836298933, |
| "grad_norm": 0.6534271836280823, |
| "learning_rate": 1.5198270664700187e-05, |
| "loss": 0.7023432731628418, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.6779359430604982, |
| "grad_norm": 0.6507487297058105, |
| "learning_rate": 1.5173940111423657e-05, |
| "loss": 0.6808289527893067, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.6797153024911032, |
| "grad_norm": 0.5977747440338135, |
| "learning_rate": 1.5149567661168715e-05, |
| "loss": 0.6398194313049317, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.6814946619217082, |
| "grad_norm": 0.6036016941070557, |
| "learning_rate": 1.5125153511295989e-05, |
| "loss": 0.6317630767822265, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.6832740213523132, |
| "grad_norm": 0.8481062054634094, |
| "learning_rate": 1.5100697859503762e-05, |
| "loss": 0.6741732597351074, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.6850533807829181, |
| "grad_norm": 0.5988038182258606, |
| "learning_rate": 1.5076200903826391e-05, |
| "loss": 0.6638431072235107, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.6868327402135231, |
| "grad_norm": 0.6785585880279541, |
| "learning_rate": 1.5051662842632709e-05, |
| "loss": 0.6523032665252686, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.6886120996441281, |
| "grad_norm": 0.6749284863471985, |
| "learning_rate": 1.5027083874624392e-05, |
| "loss": 0.6517146110534668, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.6903914590747331, |
| "grad_norm": 0.6880838871002197, |
| "learning_rate": 1.5002464198834383e-05, |
| "loss": 0.6895311355590821, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.6921708185053381, |
| "grad_norm": 0.6296641826629639, |
| "learning_rate": 1.4977804014625257e-05, |
| "loss": 0.691303300857544, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.693950177935943, |
| "grad_norm": 0.62867671251297, |
| "learning_rate": 1.4953103521687612e-05, |
| "loss": 0.6549241065979003, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.6957295373665481, |
| "grad_norm": 0.6810404658317566, |
| "learning_rate": 1.4928362920038455e-05, |
| "loss": 0.6266399383544922, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.697508896797153, |
| "grad_norm": 0.7017929553985596, |
| "learning_rate": 1.4903582410019586e-05, |
| "loss": 0.6477387905120849, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.699288256227758, |
| "grad_norm": 0.5611311197280884, |
| "learning_rate": 1.487876219229596e-05, |
| "loss": 0.6562673568725585, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.701067615658363, |
| "grad_norm": 0.5652351975440979, |
| "learning_rate": 1.4853902467854084e-05, |
| "loss": 0.6768126487731934, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.702846975088968, |
| "grad_norm": 0.7303992509841919, |
| "learning_rate": 1.4829003438000374e-05, |
| "loss": 0.6467350959777832, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.7046263345195729, |
| "grad_norm": 0.5587636232376099, |
| "learning_rate": 1.4804065304359525e-05, |
| "loss": 0.6700319766998291, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.7064056939501779, |
| "grad_norm": 0.7100324630737305, |
| "learning_rate": 1.477908826887289e-05, |
| "loss": 0.6796034812927246, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.708185053380783, |
| "grad_norm": 0.6912830471992493, |
| "learning_rate": 1.4754072533796833e-05, |
| "loss": 0.6907623291015625, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.7099644128113879, |
| "grad_norm": 0.744030773639679, |
| "learning_rate": 1.4729018301701093e-05, |
| "loss": 0.6747735023498536, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.7117437722419929, |
| "grad_norm": 0.6915676593780518, |
| "learning_rate": 1.4703925775467149e-05, |
| "loss": 0.6172840595245361, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.7117437722419929, |
| "eval_loss": 0.6436223387718201, |
| "eval_runtime": 412.5315, |
| "eval_samples_per_second": 12.191, |
| "eval_steps_per_second": 6.097, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.7135231316725978, |
| "grad_norm": 0.5881261229515076, |
| "learning_rate": 1.4678795158286579e-05, |
| "loss": 0.6607324123382569, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.7153024911032029, |
| "grad_norm": 0.7214511036872864, |
| "learning_rate": 1.4653626653659399e-05, |
| "loss": 0.6573444366455078, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.7170818505338078, |
| "grad_norm": 0.6598398089408875, |
| "learning_rate": 1.4628420465392432e-05, |
| "loss": 0.6589923858642578, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.7188612099644128, |
| "grad_norm": 0.6202049255371094, |
| "learning_rate": 1.4603176797597654e-05, |
| "loss": 0.6494176387786865, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.7206405693950177, |
| "grad_norm": 0.6085755825042725, |
| "learning_rate": 1.4577895854690536e-05, |
| "loss": 0.6625244140625, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.7224199288256228, |
| "grad_norm": 0.799647331237793, |
| "learning_rate": 1.4552577841388388e-05, |
| "loss": 0.6426272392272949, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.7241992882562278, |
| "grad_norm": 0.5650002956390381, |
| "learning_rate": 1.4527222962708714e-05, |
| "loss": 0.6512536525726318, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.7259786476868327, |
| "grad_norm": 0.7105417251586914, |
| "learning_rate": 1.4501831423967531e-05, |
| "loss": 0.639466667175293, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.7277580071174378, |
| "grad_norm": 0.6997768878936768, |
| "learning_rate": 1.4476403430777729e-05, |
| "loss": 0.6274962425231934, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.7295373665480427, |
| "grad_norm": 0.6622691750526428, |
| "learning_rate": 1.4450939189047379e-05, |
| "loss": 0.6450656890869141, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.7313167259786477, |
| "grad_norm": 0.669937789440155, |
| "learning_rate": 1.4425438904978103e-05, |
| "loss": 0.6467487335205078, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.7330960854092526, |
| "grad_norm": 0.6928410530090332, |
| "learning_rate": 1.4399902785063366e-05, |
| "loss": 0.6678302764892579, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.7348754448398577, |
| "grad_norm": 0.7608679533004761, |
| "learning_rate": 1.4374331036086831e-05, |
| "loss": 0.6973666191101074, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.7366548042704626, |
| "grad_norm": 0.7009332180023193, |
| "learning_rate": 1.4348723865120663e-05, |
| "loss": 0.6456516742706299, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.7384341637010676, |
| "grad_norm": 0.7158817648887634, |
| "learning_rate": 1.4323081479523878e-05, |
| "loss": 0.6287565708160401, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.7402135231316725, |
| "grad_norm": 0.5314562916755676, |
| "learning_rate": 1.4297404086940635e-05, |
| "loss": 0.6649733543395996, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.7419928825622776, |
| "grad_norm": 0.6606214642524719, |
| "learning_rate": 1.4271691895298573e-05, |
| "loss": 0.6175446033477783, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.7437722419928826, |
| "grad_norm": 0.618217945098877, |
| "learning_rate": 1.4245945112807133e-05, |
| "loss": 0.6604631423950196, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.7455516014234875, |
| "grad_norm": 0.5399601459503174, |
| "learning_rate": 1.422016394795585e-05, |
| "loss": 0.6682997226715088, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.7473309608540926, |
| "grad_norm": 0.5808553695678711, |
| "learning_rate": 1.419434860951268e-05, |
| "loss": 0.6625009059906006, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.7491103202846975, |
| "grad_norm": 0.670625627040863, |
| "learning_rate": 1.416849930652231e-05, |
| "loss": 0.6778800010681152, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.7508896797153025, |
| "grad_norm": 0.6508112549781799, |
| "learning_rate": 1.4142616248304459e-05, |
| "loss": 0.6265085220336915, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.7526690391459074, |
| "grad_norm": 0.5993587970733643, |
| "learning_rate": 1.4116699644452182e-05, |
| "loss": 0.656840181350708, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.7544483985765125, |
| "grad_norm": 0.6819363236427307, |
| "learning_rate": 1.4090749704830184e-05, |
| "loss": 0.6575029373168946, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.7562277580071174, |
| "grad_norm": 0.6625942587852478, |
| "learning_rate": 1.4064766639573104e-05, |
| "loss": 0.6340457916259765, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.7580071174377224, |
| "grad_norm": 0.8185866475105286, |
| "learning_rate": 1.4038750659083831e-05, |
| "loss": 0.6835246086120605, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.7597864768683275, |
| "grad_norm": 0.8232684135437012, |
| "learning_rate": 1.4012701974031782e-05, |
| "loss": 0.6450761795043946, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.7615658362989324, |
| "grad_norm": 0.6913644671440125, |
| "learning_rate": 1.3986620795351214e-05, |
| "loss": 0.6313485145568848, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.7633451957295374, |
| "grad_norm": 0.641167163848877, |
| "learning_rate": 1.3960507334239501e-05, |
| "loss": 0.6450904846191406, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.7651245551601423, |
| "grad_norm": 0.5932332873344421, |
| "learning_rate": 1.3934361802155436e-05, |
| "loss": 0.6665386199951172, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.7669039145907474, |
| "grad_norm": 0.5669364929199219, |
| "learning_rate": 1.3908184410817511e-05, |
| "loss": 0.6564301013946533, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.7686832740213523, |
| "grad_norm": 0.6267620325088501, |
| "learning_rate": 1.3881975372202201e-05, |
| "loss": 0.6448751449584961, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.7704626334519573, |
| "grad_norm": 0.8893764615058899, |
| "learning_rate": 1.3855734898542252e-05, |
| "loss": 0.6395359516143799, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.7722419928825622, |
| "grad_norm": 0.6805179119110107, |
| "learning_rate": 1.3829463202324967e-05, |
| "loss": 0.6458981990814209, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.7740213523131673, |
| "grad_norm": 0.6416231393814087, |
| "learning_rate": 1.3803160496290472e-05, |
| "loss": 0.6462121963500976, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.7758007117437722, |
| "grad_norm": 0.7718709707260132, |
| "learning_rate": 1.3776826993430006e-05, |
| "loss": 0.6271074295043946, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.7775800711743772, |
| "grad_norm": 0.6111568212509155, |
| "learning_rate": 1.375046290698419e-05, |
| "loss": 0.6282791137695313, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.7793594306049823, |
| "grad_norm": 0.7178627252578735, |
| "learning_rate": 1.3724068450441303e-05, |
| "loss": 0.6567965507507324, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.7811387900355872, |
| "grad_norm": 0.6303468346595764, |
| "learning_rate": 1.3697643837535546e-05, |
| "loss": 0.6168845653533935, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.7829181494661922, |
| "grad_norm": 0.6654033660888672, |
| "learning_rate": 1.3671189282245326e-05, |
| "loss": 0.6331443309783935, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.7846975088967971, |
| "grad_norm": 0.7051156163215637, |
| "learning_rate": 1.3644704998791501e-05, |
| "loss": 0.6948952198028564, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.7864768683274022, |
| "grad_norm": 0.5967740416526794, |
| "learning_rate": 1.361819120163567e-05, |
| "loss": 0.6140963077545166, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.7882562277580071, |
| "grad_norm": 0.688831090927124, |
| "learning_rate": 1.3591648105478423e-05, |
| "loss": 0.6627942085266113, |
| "step": 4430 |
| }, |
| { |
| "epoch": 0.7900355871886121, |
| "grad_norm": 0.5357785820960999, |
| "learning_rate": 1.3565075925257605e-05, |
| "loss": 0.6558830261230468, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.791814946619217, |
| "grad_norm": 0.6449471712112427, |
| "learning_rate": 1.3538474876146567e-05, |
| "loss": 0.6528484344482421, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.7935943060498221, |
| "grad_norm": 0.6996321678161621, |
| "learning_rate": 1.3511845173552446e-05, |
| "loss": 0.6519684314727783, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.7953736654804271, |
| "grad_norm": 0.6798763871192932, |
| "learning_rate": 1.348518703311439e-05, |
| "loss": 0.6224774360656739, |
| "step": 4470 |
| }, |
| { |
| "epoch": 0.797153024911032, |
| "grad_norm": 0.698722243309021, |
| "learning_rate": 1.3458500670701833e-05, |
| "loss": 0.6481215953826904, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.798932384341637, |
| "grad_norm": 0.6421968340873718, |
| "learning_rate": 1.3431786302412749e-05, |
| "loss": 0.6016243934631348, |
| "step": 4490 |
| }, |
| { |
| "epoch": 0.800711743772242, |
| "grad_norm": 0.6529412269592285, |
| "learning_rate": 1.3405044144571888e-05, |
| "loss": 0.6958633899688721, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.800711743772242, |
| "eval_loss": 0.6375713348388672, |
| "eval_runtime": 411.392, |
| "eval_samples_per_second": 12.224, |
| "eval_steps_per_second": 6.113, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.802491103202847, |
| "grad_norm": 0.7270268797874451, |
| "learning_rate": 1.3378274413729036e-05, |
| "loss": 0.6834945201873779, |
| "step": 4510 |
| }, |
| { |
| "epoch": 0.8042704626334519, |
| "grad_norm": 0.6442169547080994, |
| "learning_rate": 1.335147732665725e-05, |
| "loss": 0.6535075187683106, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.806049822064057, |
| "grad_norm": 0.7817319631576538, |
| "learning_rate": 1.3324653100351117e-05, |
| "loss": 0.6588070869445801, |
| "step": 4530 |
| }, |
| { |
| "epoch": 0.8078291814946619, |
| "grad_norm": 0.6025936603546143, |
| "learning_rate": 1.3297801952024983e-05, |
| "loss": 0.6654253482818604, |
| "step": 4540 |
| }, |
| { |
| "epoch": 0.8096085409252669, |
| "grad_norm": 0.6981809735298157, |
| "learning_rate": 1.3270924099111204e-05, |
| "loss": 0.6565302848815918, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.8113879003558719, |
| "grad_norm": 0.7100082635879517, |
| "learning_rate": 1.3244019759258378e-05, |
| "loss": 0.6451606273651123, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.8131672597864769, |
| "grad_norm": 0.7197165489196777, |
| "learning_rate": 1.3217089150329589e-05, |
| "loss": 0.6707216739654541, |
| "step": 4570 |
| }, |
| { |
| "epoch": 0.8149466192170819, |
| "grad_norm": 0.6494991183280945, |
| "learning_rate": 1.3190132490400642e-05, |
| "loss": 0.6538206577301026, |
| "step": 4580 |
| }, |
| { |
| "epoch": 0.8167259786476868, |
| "grad_norm": 0.610467255115509, |
| "learning_rate": 1.316314999775829e-05, |
| "loss": 0.6370253562927246, |
| "step": 4590 |
| }, |
| { |
| "epoch": 0.8185053380782918, |
| "grad_norm": 0.765133261680603, |
| "learning_rate": 1.3136141890898473e-05, |
| "loss": 0.6969471454620362, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.8202846975088968, |
| "grad_norm": 0.6949911117553711, |
| "learning_rate": 1.3109108388524551e-05, |
| "loss": 0.6275270462036133, |
| "step": 4610 |
| }, |
| { |
| "epoch": 0.8220640569395018, |
| "grad_norm": 0.6720005869865417, |
| "learning_rate": 1.3082049709545524e-05, |
| "loss": 0.6380300521850586, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.8238434163701067, |
| "grad_norm": 0.7357544898986816, |
| "learning_rate": 1.3054966073074264e-05, |
| "loss": 0.6391136169433593, |
| "step": 4630 |
| }, |
| { |
| "epoch": 0.8256227758007118, |
| "grad_norm": 0.6352887749671936, |
| "learning_rate": 1.3027857698425748e-05, |
| "loss": 0.6527080059051513, |
| "step": 4640 |
| }, |
| { |
| "epoch": 0.8274021352313167, |
| "grad_norm": 0.6615159511566162, |
| "learning_rate": 1.3000724805115265e-05, |
| "loss": 0.6786240100860595, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.8291814946619217, |
| "grad_norm": 0.7004484534263611, |
| "learning_rate": 1.2973567612856659e-05, |
| "loss": 0.6390516281127929, |
| "step": 4660 |
| }, |
| { |
| "epoch": 0.8309608540925267, |
| "grad_norm": 0.7463746070861816, |
| "learning_rate": 1.294638634156053e-05, |
| "loss": 0.6174628734588623, |
| "step": 4670 |
| }, |
| { |
| "epoch": 0.8327402135231317, |
| "grad_norm": 0.7666842341423035, |
| "learning_rate": 1.2919181211332474e-05, |
| "loss": 0.6169525623321533, |
| "step": 4680 |
| }, |
| { |
| "epoch": 0.8345195729537367, |
| "grad_norm": 0.6636411547660828, |
| "learning_rate": 1.2891952442471274e-05, |
| "loss": 0.6446310043334961, |
| "step": 4690 |
| }, |
| { |
| "epoch": 0.8362989323843416, |
| "grad_norm": 0.9001480937004089, |
| "learning_rate": 1.2864700255467148e-05, |
| "loss": 0.6689016342163085, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.8380782918149466, |
| "grad_norm": 0.551397442817688, |
| "learning_rate": 1.2837424870999933e-05, |
| "loss": 0.6616122245788574, |
| "step": 4710 |
| }, |
| { |
| "epoch": 0.8398576512455516, |
| "grad_norm": 0.7142363786697388, |
| "learning_rate": 1.281012650993732e-05, |
| "loss": 0.690334415435791, |
| "step": 4720 |
| }, |
| { |
| "epoch": 0.8416370106761566, |
| "grad_norm": 0.701836347579956, |
| "learning_rate": 1.2782805393333054e-05, |
| "loss": 0.6755175113677978, |
| "step": 4730 |
| }, |
| { |
| "epoch": 0.8434163701067615, |
| "grad_norm": 0.6752656102180481, |
| "learning_rate": 1.2755461742425147e-05, |
| "loss": 0.6132485866546631, |
| "step": 4740 |
| }, |
| { |
| "epoch": 0.8451957295373665, |
| "grad_norm": 0.6739411354064941, |
| "learning_rate": 1.2728095778634094e-05, |
| "loss": 0.6823519706726074, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.8469750889679716, |
| "grad_norm": 0.6728504300117493, |
| "learning_rate": 1.2700707723561064e-05, |
| "loss": 0.6264513492584228, |
| "step": 4760 |
| }, |
| { |
| "epoch": 0.8487544483985765, |
| "grad_norm": 0.6379041075706482, |
| "learning_rate": 1.2673297798986118e-05, |
| "loss": 0.6372091770172119, |
| "step": 4770 |
| }, |
| { |
| "epoch": 0.8505338078291815, |
| "grad_norm": 0.6896198987960815, |
| "learning_rate": 1.2645866226866405e-05, |
| "loss": 0.6795585632324219, |
| "step": 4780 |
| }, |
| { |
| "epoch": 0.8523131672597865, |
| "grad_norm": 0.6732060313224792, |
| "learning_rate": 1.261841322933438e-05, |
| "loss": 0.6782153129577637, |
| "step": 4790 |
| }, |
| { |
| "epoch": 0.8540925266903915, |
| "grad_norm": 0.7170758247375488, |
| "learning_rate": 1.2590939028695987e-05, |
| "loss": 0.6489102840423584, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.8558718861209964, |
| "grad_norm": 0.7446131110191345, |
| "learning_rate": 1.2563443847428862e-05, |
| "loss": 0.6556113719940185, |
| "step": 4810 |
| }, |
| { |
| "epoch": 0.8576512455516014, |
| "grad_norm": 0.5690045952796936, |
| "learning_rate": 1.2535927908180547e-05, |
| "loss": 0.6207235813140869, |
| "step": 4820 |
| }, |
| { |
| "epoch": 0.8594306049822064, |
| "grad_norm": 0.7088342308998108, |
| "learning_rate": 1.2508391433766667e-05, |
| "loss": 0.6439788341522217, |
| "step": 4830 |
| }, |
| { |
| "epoch": 0.8612099644128114, |
| "grad_norm": 0.573747992515564, |
| "learning_rate": 1.2480834647169134e-05, |
| "loss": 0.6493591785430908, |
| "step": 4840 |
| }, |
| { |
| "epoch": 0.8629893238434164, |
| "grad_norm": 0.6367696523666382, |
| "learning_rate": 1.2453257771534348e-05, |
| "loss": 0.6573034286499023, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.8647686832740213, |
| "grad_norm": 0.6127105355262756, |
| "learning_rate": 1.2425661030171382e-05, |
| "loss": 0.6419090747833252, |
| "step": 4860 |
| }, |
| { |
| "epoch": 0.8665480427046264, |
| "grad_norm": 0.6644669771194458, |
| "learning_rate": 1.2398044646550167e-05, |
| "loss": 0.6521550178527832, |
| "step": 4870 |
| }, |
| { |
| "epoch": 0.8683274021352313, |
| "grad_norm": 0.6144851446151733, |
| "learning_rate": 1.2370408844299705e-05, |
| "loss": 0.6388635158538818, |
| "step": 4880 |
| }, |
| { |
| "epoch": 0.8701067615658363, |
| "grad_norm": 0.748590886592865, |
| "learning_rate": 1.2342753847206236e-05, |
| "loss": 0.6553171157836915, |
| "step": 4890 |
| }, |
| { |
| "epoch": 0.8718861209964412, |
| "grad_norm": 0.7603781819343567, |
| "learning_rate": 1.2315079879211435e-05, |
| "loss": 0.639424467086792, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.8736654804270463, |
| "grad_norm": 0.60029137134552, |
| "learning_rate": 1.2287387164410597e-05, |
| "loss": 0.6580222606658935, |
| "step": 4910 |
| }, |
| { |
| "epoch": 0.8754448398576512, |
| "grad_norm": 0.5586308836936951, |
| "learning_rate": 1.2259675927050829e-05, |
| "loss": 0.6166585445404053, |
| "step": 4920 |
| }, |
| { |
| "epoch": 0.8772241992882562, |
| "grad_norm": 0.6883022785186768, |
| "learning_rate": 1.2231946391529213e-05, |
| "loss": 0.651360559463501, |
| "step": 4930 |
| }, |
| { |
| "epoch": 0.8790035587188612, |
| "grad_norm": 0.7347468733787537, |
| "learning_rate": 1.2204198782391018e-05, |
| "loss": 0.6587865352630615, |
| "step": 4940 |
| }, |
| { |
| "epoch": 0.8807829181494662, |
| "grad_norm": 0.8330339193344116, |
| "learning_rate": 1.2176433324327868e-05, |
| "loss": 0.6858234405517578, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.8825622775800712, |
| "grad_norm": 0.6569383144378662, |
| "learning_rate": 1.2148650242175908e-05, |
| "loss": 0.7256248950958252, |
| "step": 4960 |
| }, |
| { |
| "epoch": 0.8843416370106761, |
| "grad_norm": 0.7147039771080017, |
| "learning_rate": 1.2120849760914013e-05, |
| "loss": 0.6601557254791259, |
| "step": 4970 |
| }, |
| { |
| "epoch": 0.8861209964412812, |
| "grad_norm": 0.7715662717819214, |
| "learning_rate": 1.2093032105661944e-05, |
| "loss": 0.6112511634826661, |
| "step": 4980 |
| }, |
| { |
| "epoch": 0.8879003558718861, |
| "grad_norm": 0.7267486453056335, |
| "learning_rate": 1.2065197501678529e-05, |
| "loss": 0.6230842590332031, |
| "step": 4990 |
| }, |
| { |
| "epoch": 0.8896797153024911, |
| "grad_norm": 0.7366806268692017, |
| "learning_rate": 1.203734617435985e-05, |
| "loss": 0.6849907875061035, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.8896797153024911, |
| "eval_loss": 0.6321616172790527, |
| "eval_runtime": 411.8441, |
| "eval_samples_per_second": 12.211, |
| "eval_steps_per_second": 6.107, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.891459074733096, |
| "grad_norm": 0.7812427282333374, |
| "learning_rate": 1.2009478349237397e-05, |
| "loss": 0.6636211395263671, |
| "step": 5010 |
| }, |
| { |
| "epoch": 0.8932384341637011, |
| "grad_norm": 0.6625553965568542, |
| "learning_rate": 1.1981594251976265e-05, |
| "loss": 0.6543920040130615, |
| "step": 5020 |
| }, |
| { |
| "epoch": 0.895017793594306, |
| "grad_norm": 0.6391006112098694, |
| "learning_rate": 1.1953694108373313e-05, |
| "loss": 0.653505516052246, |
| "step": 5030 |
| }, |
| { |
| "epoch": 0.896797153024911, |
| "grad_norm": 0.763852059841156, |
| "learning_rate": 1.1925778144355338e-05, |
| "loss": 0.6871216773986817, |
| "step": 5040 |
| }, |
| { |
| "epoch": 0.8985765124555161, |
| "grad_norm": 0.7798473834991455, |
| "learning_rate": 1.189784658597724e-05, |
| "loss": 0.6243946552276611, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.900355871886121, |
| "grad_norm": 0.6348104476928711, |
| "learning_rate": 1.1869899659420208e-05, |
| "loss": 0.6567151069641113, |
| "step": 5060 |
| }, |
| { |
| "epoch": 0.902135231316726, |
| "grad_norm": 0.6433530449867249, |
| "learning_rate": 1.1841937590989873e-05, |
| "loss": 0.677039909362793, |
| "step": 5070 |
| }, |
| { |
| "epoch": 0.9039145907473309, |
| "grad_norm": 0.8366072177886963, |
| "learning_rate": 1.1813960607114476e-05, |
| "loss": 0.6381283760070801, |
| "step": 5080 |
| }, |
| { |
| "epoch": 0.905693950177936, |
| "grad_norm": 0.7176342606544495, |
| "learning_rate": 1.1785968934343045e-05, |
| "loss": 0.6601722717285157, |
| "step": 5090 |
| }, |
| { |
| "epoch": 0.9074733096085409, |
| "grad_norm": 0.6531491279602051, |
| "learning_rate": 1.1757962799343548e-05, |
| "loss": 0.6662145137786866, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.9092526690391459, |
| "grad_norm": 0.6918512582778931, |
| "learning_rate": 1.1729942428901068e-05, |
| "loss": 0.6182730674743653, |
| "step": 5110 |
| }, |
| { |
| "epoch": 0.9110320284697508, |
| "grad_norm": 0.5597010850906372, |
| "learning_rate": 1.1701908049915964e-05, |
| "loss": 0.6443261623382568, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.9128113879003559, |
| "grad_norm": 0.6807442307472229, |
| "learning_rate": 1.1673859889402028e-05, |
| "loss": 0.6334492206573487, |
| "step": 5130 |
| }, |
| { |
| "epoch": 0.9145907473309609, |
| "grad_norm": 0.6249061226844788, |
| "learning_rate": 1.1645798174484653e-05, |
| "loss": 0.614483642578125, |
| "step": 5140 |
| }, |
| { |
| "epoch": 0.9163701067615658, |
| "grad_norm": 0.5527177453041077, |
| "learning_rate": 1.161772313239899e-05, |
| "loss": 0.650807237625122, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.9181494661921709, |
| "grad_norm": 0.7590207457542419, |
| "learning_rate": 1.1589634990488107e-05, |
| "loss": 0.6382132053375245, |
| "step": 5160 |
| }, |
| { |
| "epoch": 0.9199288256227758, |
| "grad_norm": 0.7238802909851074, |
| "learning_rate": 1.1561533976201157e-05, |
| "loss": 0.6336176872253418, |
| "step": 5170 |
| }, |
| { |
| "epoch": 0.9217081850533808, |
| "grad_norm": 0.8075314164161682, |
| "learning_rate": 1.1533420317091519e-05, |
| "loss": 0.6544758796691894, |
| "step": 5180 |
| }, |
| { |
| "epoch": 0.9234875444839857, |
| "grad_norm": 0.7770307064056396, |
| "learning_rate": 1.1505294240814979e-05, |
| "loss": 0.6220970153808594, |
| "step": 5190 |
| }, |
| { |
| "epoch": 0.9252669039145908, |
| "grad_norm": 0.5794429779052734, |
| "learning_rate": 1.1477155975127866e-05, |
| "loss": 0.6260251045227051, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.9270462633451957, |
| "grad_norm": 0.6863322257995605, |
| "learning_rate": 1.1449005747885212e-05, |
| "loss": 0.6177238464355469, |
| "step": 5210 |
| }, |
| { |
| "epoch": 0.9288256227758007, |
| "grad_norm": 0.6806472539901733, |
| "learning_rate": 1.142084378703892e-05, |
| "loss": 0.6731130599975585, |
| "step": 5220 |
| }, |
| { |
| "epoch": 0.9306049822064056, |
| "grad_norm": 0.6430651545524597, |
| "learning_rate": 1.1392670320635894e-05, |
| "loss": 0.6535952091217041, |
| "step": 5230 |
| }, |
| { |
| "epoch": 0.9323843416370107, |
| "grad_norm": 0.6920055150985718, |
| "learning_rate": 1.1364485576816225e-05, |
| "loss": 0.6301285266876221, |
| "step": 5240 |
| }, |
| { |
| "epoch": 0.9341637010676157, |
| "grad_norm": 0.7043541669845581, |
| "learning_rate": 1.1336289783811303e-05, |
| "loss": 0.6545778751373291, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.9359430604982206, |
| "grad_norm": 0.7749147415161133, |
| "learning_rate": 1.130808316994201e-05, |
| "loss": 0.6474496841430664, |
| "step": 5260 |
| }, |
| { |
| "epoch": 0.9377224199288257, |
| "grad_norm": 0.6907658576965332, |
| "learning_rate": 1.127986596361684e-05, |
| "loss": 0.6155929565429688, |
| "step": 5270 |
| }, |
| { |
| "epoch": 0.9395017793594306, |
| "grad_norm": 0.6255580186843872, |
| "learning_rate": 1.125163839333007e-05, |
| "loss": 0.6644204139709473, |
| "step": 5280 |
| }, |
| { |
| "epoch": 0.9412811387900356, |
| "grad_norm": 0.5674806833267212, |
| "learning_rate": 1.1223400687659898e-05, |
| "loss": 0.6196834564208984, |
| "step": 5290 |
| }, |
| { |
| "epoch": 0.9430604982206405, |
| "grad_norm": 0.7296446561813354, |
| "learning_rate": 1.1195153075266591e-05, |
| "loss": 0.6685366153717041, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.9448398576512456, |
| "grad_norm": 0.5960925817489624, |
| "learning_rate": 1.1166895784890644e-05, |
| "loss": 0.6431320190429688, |
| "step": 5310 |
| }, |
| { |
| "epoch": 0.9466192170818505, |
| "grad_norm": 0.6691383719444275, |
| "learning_rate": 1.1138629045350911e-05, |
| "loss": 0.6147652626037597, |
| "step": 5320 |
| }, |
| { |
| "epoch": 0.9483985765124555, |
| "grad_norm": 0.7819799184799194, |
| "learning_rate": 1.1110353085542778e-05, |
| "loss": 0.6021285057067871, |
| "step": 5330 |
| }, |
| { |
| "epoch": 0.9501779359430605, |
| "grad_norm": 0.690168023109436, |
| "learning_rate": 1.1082068134436281e-05, |
| "loss": 0.6405185699462891, |
| "step": 5340 |
| }, |
| { |
| "epoch": 0.9519572953736655, |
| "grad_norm": 0.6121358871459961, |
| "learning_rate": 1.1053774421074272e-05, |
| "loss": 0.6638952255249023, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.9537366548042705, |
| "grad_norm": 0.6227392554283142, |
| "learning_rate": 1.1025472174570554e-05, |
| "loss": 0.6735183715820312, |
| "step": 5360 |
| }, |
| { |
| "epoch": 0.9555160142348754, |
| "grad_norm": 0.7254964113235474, |
| "learning_rate": 1.099716162410803e-05, |
| "loss": 0.6874561309814453, |
| "step": 5370 |
| }, |
| { |
| "epoch": 0.9572953736654805, |
| "grad_norm": 0.916182816028595, |
| "learning_rate": 1.0968842998936843e-05, |
| "loss": 0.6294379234313965, |
| "step": 5380 |
| }, |
| { |
| "epoch": 0.9590747330960854, |
| "grad_norm": 0.7955939769744873, |
| "learning_rate": 1.0940516528372527e-05, |
| "loss": 0.648458194732666, |
| "step": 5390 |
| }, |
| { |
| "epoch": 0.9608540925266904, |
| "grad_norm": 0.5914123058319092, |
| "learning_rate": 1.0912182441794141e-05, |
| "loss": 0.641319465637207, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.9626334519572953, |
| "grad_norm": 0.8009176850318909, |
| "learning_rate": 1.0883840968642416e-05, |
| "loss": 0.6294963836669922, |
| "step": 5410 |
| }, |
| { |
| "epoch": 0.9644128113879004, |
| "grad_norm": 0.8268579244613647, |
| "learning_rate": 1.0855492338417905e-05, |
| "loss": 0.6227912425994873, |
| "step": 5420 |
| }, |
| { |
| "epoch": 0.9661921708185054, |
| "grad_norm": 0.7227009534835815, |
| "learning_rate": 1.0827136780679109e-05, |
| "loss": 0.6420284271240234, |
| "step": 5430 |
| }, |
| { |
| "epoch": 0.9679715302491103, |
| "grad_norm": 0.7250238060951233, |
| "learning_rate": 1.0798774525040628e-05, |
| "loss": 0.6643415927886963, |
| "step": 5440 |
| }, |
| { |
| "epoch": 0.9697508896797153, |
| "grad_norm": 0.6804770827293396, |
| "learning_rate": 1.07704058011713e-05, |
| "loss": 0.6299617290496826, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.9715302491103203, |
| "grad_norm": 0.8592752814292908, |
| "learning_rate": 1.0742030838792343e-05, |
| "loss": 0.6287535667419434, |
| "step": 5460 |
| }, |
| { |
| "epoch": 0.9733096085409253, |
| "grad_norm": 0.6754792928695679, |
| "learning_rate": 1.0713649867675483e-05, |
| "loss": 0.6633755207061768, |
| "step": 5470 |
| }, |
| { |
| "epoch": 0.9750889679715302, |
| "grad_norm": 0.8593308925628662, |
| "learning_rate": 1.0685263117641117e-05, |
| "loss": 0.6187657833099365, |
| "step": 5480 |
| }, |
| { |
| "epoch": 0.9768683274021353, |
| "grad_norm": 0.6328932642936707, |
| "learning_rate": 1.065687081855643e-05, |
| "loss": 0.618121862411499, |
| "step": 5490 |
| }, |
| { |
| "epoch": 0.9786476868327402, |
| "grad_norm": 0.7381909489631653, |
| "learning_rate": 1.062847320033354e-05, |
| "loss": 0.6680717945098877, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.9786476868327402, |
| "eval_loss": 0.6279275417327881, |
| "eval_runtime": 413.1424, |
| "eval_samples_per_second": 12.173, |
| "eval_steps_per_second": 6.087, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.9804270462633452, |
| "grad_norm": 0.8082507848739624, |
| "learning_rate": 1.0600070492927644e-05, |
| "loss": 0.619424295425415, |
| "step": 5510 |
| }, |
| { |
| "epoch": 0.9822064056939501, |
| "grad_norm": 0.782515823841095, |
| "learning_rate": 1.0571662926335138e-05, |
| "loss": 0.6681380271911621, |
| "step": 5520 |
| }, |
| { |
| "epoch": 0.9839857651245552, |
| "grad_norm": 0.756851851940155, |
| "learning_rate": 1.0543250730591778e-05, |
| "loss": 0.648841381072998, |
| "step": 5530 |
| }, |
| { |
| "epoch": 0.9857651245551602, |
| "grad_norm": 0.6018344759941101, |
| "learning_rate": 1.0514834135770805e-05, |
| "loss": 0.6303011417388916, |
| "step": 5540 |
| }, |
| { |
| "epoch": 0.9875444839857651, |
| "grad_norm": 0.6782673597335815, |
| "learning_rate": 1.0486413371981077e-05, |
| "loss": 0.6756137371063232, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.9893238434163701, |
| "grad_norm": 0.680172860622406, |
| "learning_rate": 1.045798866936521e-05, |
| "loss": 0.62680082321167, |
| "step": 5560 |
| }, |
| { |
| "epoch": 0.9911032028469751, |
| "grad_norm": 0.7947545647621155, |
| "learning_rate": 1.042956025809772e-05, |
| "loss": 0.6698834419250488, |
| "step": 5570 |
| }, |
| { |
| "epoch": 0.9928825622775801, |
| "grad_norm": 0.6484361290931702, |
| "learning_rate": 1.0401128368383151e-05, |
| "loss": 0.6242072582244873, |
| "step": 5580 |
| }, |
| { |
| "epoch": 0.994661921708185, |
| "grad_norm": 0.8095009326934814, |
| "learning_rate": 1.037269323045422e-05, |
| "loss": 0.6368332862854004, |
| "step": 5590 |
| }, |
| { |
| "epoch": 0.99644128113879, |
| "grad_norm": 0.7750351428985596, |
| "learning_rate": 1.034425507456994e-05, |
| "loss": 0.6159787654876709, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.998220640569395, |
| "grad_norm": 0.6969480514526367, |
| "learning_rate": 1.0315814131013768e-05, |
| "loss": 0.6518408298492432, |
| "step": 5610 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.7439318299293518, |
| "learning_rate": 1.0287370630091731e-05, |
| "loss": 0.6812664031982422, |
| "step": 5620 |
| }, |
| { |
| "epoch": 1.001779359430605, |
| "grad_norm": 0.7261248826980591, |
| "learning_rate": 1.0258924802130565e-05, |
| "loss": 0.6570511817932129, |
| "step": 5630 |
| }, |
| { |
| "epoch": 1.00355871886121, |
| "grad_norm": 0.6743627190589905, |
| "learning_rate": 1.0230476877475854e-05, |
| "loss": 0.6013021469116211, |
| "step": 5640 |
| }, |
| { |
| "epoch": 1.0053380782918149, |
| "grad_norm": 0.698407769203186, |
| "learning_rate": 1.0202027086490154e-05, |
| "loss": 0.6084145069122314, |
| "step": 5650 |
| }, |
| { |
| "epoch": 1.00711743772242, |
| "grad_norm": 0.7206895351409912, |
| "learning_rate": 1.0173575659551137e-05, |
| "loss": 0.642551326751709, |
| "step": 5660 |
| }, |
| { |
| "epoch": 1.008896797153025, |
| "grad_norm": 0.8103647232055664, |
| "learning_rate": 1.0145122827049725e-05, |
| "loss": 0.5881889820098877, |
| "step": 5670 |
| }, |
| { |
| "epoch": 1.01067615658363, |
| "grad_norm": 0.7496983408927917, |
| "learning_rate": 1.0116668819388218e-05, |
| "loss": 0.6021871089935302, |
| "step": 5680 |
| }, |
| { |
| "epoch": 1.0124555160142348, |
| "grad_norm": 0.6843361258506775, |
| "learning_rate": 1.0088213866978435e-05, |
| "loss": 0.6468405723571777, |
| "step": 5690 |
| }, |
| { |
| "epoch": 1.0142348754448398, |
| "grad_norm": 0.6858223676681519, |
| "learning_rate": 1.0059758200239842e-05, |
| "loss": 0.6358915328979492, |
| "step": 5700 |
| }, |
| { |
| "epoch": 1.0160142348754448, |
| "grad_norm": 0.9082915186882019, |
| "learning_rate": 1.0031302049597691e-05, |
| "loss": 0.6340798377990723, |
| "step": 5710 |
| }, |
| { |
| "epoch": 1.0177935943060499, |
| "grad_norm": 0.6842049360275269, |
| "learning_rate": 1.0002845645481152e-05, |
| "loss": 0.6286153793334961, |
| "step": 5720 |
| }, |
| { |
| "epoch": 1.019572953736655, |
| "grad_norm": 0.7649215459823608, |
| "learning_rate": 9.974389218321453e-06, |
| "loss": 0.6225019931793213, |
| "step": 5730 |
| }, |
| { |
| "epoch": 1.0213523131672597, |
| "grad_norm": 0.6515229344367981, |
| "learning_rate": 9.945932998549996e-06, |
| "loss": 0.6120016574859619, |
| "step": 5740 |
| }, |
| { |
| "epoch": 1.0231316725978647, |
| "grad_norm": 0.5917989015579224, |
| "learning_rate": 9.917477216596521e-06, |
| "loss": 0.6404437065124512, |
| "step": 5750 |
| }, |
| { |
| "epoch": 1.0249110320284698, |
| "grad_norm": 0.7759416699409485, |
| "learning_rate": 9.889022102887205e-06, |
| "loss": 0.6269901275634766, |
| "step": 5760 |
| }, |
| { |
| "epoch": 1.0266903914590748, |
| "grad_norm": 0.7149049043655396, |
| "learning_rate": 9.860567887842827e-06, |
| "loss": 0.6032210350036621, |
| "step": 5770 |
| }, |
| { |
| "epoch": 1.0284697508896796, |
| "grad_norm": 0.6775475144386292, |
| "learning_rate": 9.832114801876877e-06, |
| "loss": 0.6163185119628907, |
| "step": 5780 |
| }, |
| { |
| "epoch": 1.0302491103202847, |
| "grad_norm": 0.7221093773841858, |
| "learning_rate": 9.80366307539372e-06, |
| "loss": 0.5833635330200195, |
| "step": 5790 |
| }, |
| { |
| "epoch": 1.0320284697508897, |
| "grad_norm": 0.7127135396003723, |
| "learning_rate": 9.775212938786689e-06, |
| "loss": 0.6052863597869873, |
| "step": 5800 |
| }, |
| { |
| "epoch": 1.0338078291814947, |
| "grad_norm": 0.7285045981407166, |
| "learning_rate": 9.746764622436265e-06, |
| "loss": 0.6380964756011963, |
| "step": 5810 |
| }, |
| { |
| "epoch": 1.0355871886120998, |
| "grad_norm": 0.7396540641784668, |
| "learning_rate": 9.718318356708172e-06, |
| "loss": 0.6616352081298829, |
| "step": 5820 |
| }, |
| { |
| "epoch": 1.0373665480427046, |
| "grad_norm": 0.6225027441978455, |
| "learning_rate": 9.689874371951541e-06, |
| "loss": 0.6689383029937744, |
| "step": 5830 |
| }, |
| { |
| "epoch": 1.0391459074733096, |
| "grad_norm": 0.8353135585784912, |
| "learning_rate": 9.661432898497024e-06, |
| "loss": 0.6042938232421875, |
| "step": 5840 |
| }, |
| { |
| "epoch": 1.0409252669039146, |
| "grad_norm": 0.6610475778579712, |
| "learning_rate": 9.632994166654941e-06, |
| "loss": 0.6156399726867676, |
| "step": 5850 |
| }, |
| { |
| "epoch": 1.0427046263345197, |
| "grad_norm": 0.7322497963905334, |
| "learning_rate": 9.604558406713413e-06, |
| "loss": 0.6352502822875976, |
| "step": 5860 |
| }, |
| { |
| "epoch": 1.0444839857651245, |
| "grad_norm": 0.7838056087493896, |
| "learning_rate": 9.576125848936484e-06, |
| "loss": 0.6298691272735596, |
| "step": 5870 |
| }, |
| { |
| "epoch": 1.0462633451957295, |
| "grad_norm": 0.81569504737854, |
| "learning_rate": 9.547696723562289e-06, |
| "loss": 0.6307301998138428, |
| "step": 5880 |
| }, |
| { |
| "epoch": 1.0480427046263345, |
| "grad_norm": 0.8897931575775146, |
| "learning_rate": 9.519271260801146e-06, |
| "loss": 0.6494285106658936, |
| "step": 5890 |
| }, |
| { |
| "epoch": 1.0498220640569396, |
| "grad_norm": 0.7175611257553101, |
| "learning_rate": 9.490849690833732e-06, |
| "loss": 0.5930293083190918, |
| "step": 5900 |
| }, |
| { |
| "epoch": 1.0516014234875444, |
| "grad_norm": 0.6708109378814697, |
| "learning_rate": 9.462432243809191e-06, |
| "loss": 0.595799732208252, |
| "step": 5910 |
| }, |
| { |
| "epoch": 1.0533807829181494, |
| "grad_norm": 0.6808570623397827, |
| "learning_rate": 9.434019149843285e-06, |
| "loss": 0.6564432621002197, |
| "step": 5920 |
| }, |
| { |
| "epoch": 1.0551601423487544, |
| "grad_norm": 0.8115124106407166, |
| "learning_rate": 9.405610639016522e-06, |
| "loss": 0.6025516033172608, |
| "step": 5930 |
| }, |
| { |
| "epoch": 1.0569395017793595, |
| "grad_norm": 0.7574615478515625, |
| "learning_rate": 9.377206941372306e-06, |
| "loss": 0.6670190811157226, |
| "step": 5940 |
| }, |
| { |
| "epoch": 1.0587188612099645, |
| "grad_norm": 0.8795053362846375, |
| "learning_rate": 9.34880828691505e-06, |
| "loss": 0.6606448650360107, |
| "step": 5950 |
| }, |
| { |
| "epoch": 1.0604982206405693, |
| "grad_norm": 0.7287918925285339, |
| "learning_rate": 9.320414905608348e-06, |
| "loss": 0.6295819759368897, |
| "step": 5960 |
| }, |
| { |
| "epoch": 1.0622775800711743, |
| "grad_norm": 0.7272006869316101, |
| "learning_rate": 9.292027027373075e-06, |
| "loss": 0.6425396919250488, |
| "step": 5970 |
| }, |
| { |
| "epoch": 1.0640569395017794, |
| "grad_norm": 0.6797536015510559, |
| "learning_rate": 9.263644882085564e-06, |
| "loss": 0.5847621917724609, |
| "step": 5980 |
| }, |
| { |
| "epoch": 1.0658362989323844, |
| "grad_norm": 0.6698694229125977, |
| "learning_rate": 9.235268699575704e-06, |
| "loss": 0.6509596824645996, |
| "step": 5990 |
| }, |
| { |
| "epoch": 1.0676156583629894, |
| "grad_norm": 0.7406882643699646, |
| "learning_rate": 9.206898709625109e-06, |
| "loss": 0.6330312252044678, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.0676156583629894, |
| "eval_loss": 0.6239920854568481, |
| "eval_runtime": 411.471, |
| "eval_samples_per_second": 12.222, |
| "eval_steps_per_second": 6.112, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.0693950177935942, |
| "grad_norm": 0.695398211479187, |
| "learning_rate": 9.17853514196525e-06, |
| "loss": 0.6093502998352051, |
| "step": 6010 |
| }, |
| { |
| "epoch": 1.0711743772241993, |
| "grad_norm": 0.839644730091095, |
| "learning_rate": 9.150178226275584e-06, |
| "loss": 0.6351033687591553, |
| "step": 6020 |
| }, |
| { |
| "epoch": 1.0729537366548043, |
| "grad_norm": 0.6793309450149536, |
| "learning_rate": 9.121828192181716e-06, |
| "loss": 0.6527360916137696, |
| "step": 6030 |
| }, |
| { |
| "epoch": 1.0747330960854093, |
| "grad_norm": 0.8601770401000977, |
| "learning_rate": 9.093485269253508e-06, |
| "loss": 0.6375271320343018, |
| "step": 6040 |
| }, |
| { |
| "epoch": 1.0765124555160142, |
| "grad_norm": 0.8268119692802429, |
| "learning_rate": 9.065149687003256e-06, |
| "loss": 0.6043989181518554, |
| "step": 6050 |
| }, |
| { |
| "epoch": 1.0782918149466192, |
| "grad_norm": 0.6872331500053406, |
| "learning_rate": 9.0368216748838e-06, |
| "loss": 0.5880636215209961, |
| "step": 6060 |
| }, |
| { |
| "epoch": 1.0800711743772242, |
| "grad_norm": 0.7637034058570862, |
| "learning_rate": 9.008501462286682e-06, |
| "loss": 0.6080229759216309, |
| "step": 6070 |
| }, |
| { |
| "epoch": 1.0818505338078293, |
| "grad_norm": 0.7357282042503357, |
| "learning_rate": 8.980189278540294e-06, |
| "loss": 0.6133236408233642, |
| "step": 6080 |
| }, |
| { |
| "epoch": 1.083629893238434, |
| "grad_norm": 0.7250421643257141, |
| "learning_rate": 8.951885352908006e-06, |
| "loss": 0.6319203853607178, |
| "step": 6090 |
| }, |
| { |
| "epoch": 1.085409252669039, |
| "grad_norm": 0.6590055227279663, |
| "learning_rate": 8.92358991458631e-06, |
| "loss": 0.649744987487793, |
| "step": 6100 |
| }, |
| { |
| "epoch": 1.0871886120996441, |
| "grad_norm": 0.7239564061164856, |
| "learning_rate": 8.895303192702988e-06, |
| "loss": 0.6277432918548584, |
| "step": 6110 |
| }, |
| { |
| "epoch": 1.0889679715302492, |
| "grad_norm": 0.8234860301017761, |
| "learning_rate": 8.867025416315221e-06, |
| "loss": 0.6343496799468994, |
| "step": 6120 |
| }, |
| { |
| "epoch": 1.0907473309608542, |
| "grad_norm": 0.8464725613594055, |
| "learning_rate": 8.838756814407766e-06, |
| "loss": 0.6384105205535888, |
| "step": 6130 |
| }, |
| { |
| "epoch": 1.092526690391459, |
| "grad_norm": 0.8043434619903564, |
| "learning_rate": 8.810497615891078e-06, |
| "loss": 0.6212152957916259, |
| "step": 6140 |
| }, |
| { |
| "epoch": 1.094306049822064, |
| "grad_norm": 0.7087031602859497, |
| "learning_rate": 8.782248049599468e-06, |
| "loss": 0.5984238624572754, |
| "step": 6150 |
| }, |
| { |
| "epoch": 1.096085409252669, |
| "grad_norm": 0.7631204724311829, |
| "learning_rate": 8.754008344289253e-06, |
| "loss": 0.6375543594360351, |
| "step": 6160 |
| }, |
| { |
| "epoch": 1.097864768683274, |
| "grad_norm": 0.6403396725654602, |
| "learning_rate": 8.725778728636893e-06, |
| "loss": 0.6312360763549805, |
| "step": 6170 |
| }, |
| { |
| "epoch": 1.099644128113879, |
| "grad_norm": 0.8257510662078857, |
| "learning_rate": 8.697559431237153e-06, |
| "loss": 0.6133918762207031, |
| "step": 6180 |
| }, |
| { |
| "epoch": 1.101423487544484, |
| "grad_norm": 0.6782218813896179, |
| "learning_rate": 8.66935068060123e-06, |
| "loss": 0.6308522701263428, |
| "step": 6190 |
| }, |
| { |
| "epoch": 1.103202846975089, |
| "grad_norm": 0.7291736602783203, |
| "learning_rate": 8.641152705154935e-06, |
| "loss": 0.6390894889831543, |
| "step": 6200 |
| }, |
| { |
| "epoch": 1.104982206405694, |
| "grad_norm": 0.7014942169189453, |
| "learning_rate": 8.612965733236811e-06, |
| "loss": 0.6089354038238526, |
| "step": 6210 |
| }, |
| { |
| "epoch": 1.106761565836299, |
| "grad_norm": 0.6617055535316467, |
| "learning_rate": 8.584789993096303e-06, |
| "loss": 0.6288234710693359, |
| "step": 6220 |
| }, |
| { |
| "epoch": 1.1085409252669038, |
| "grad_norm": 0.7303082942962646, |
| "learning_rate": 8.5566257128919e-06, |
| "loss": 0.6383206367492675, |
| "step": 6230 |
| }, |
| { |
| "epoch": 1.1103202846975089, |
| "grad_norm": 0.7110517024993896, |
| "learning_rate": 8.528473120689302e-06, |
| "loss": 0.6590532779693603, |
| "step": 6240 |
| }, |
| { |
| "epoch": 1.112099644128114, |
| "grad_norm": 0.779214084148407, |
| "learning_rate": 8.50033244445955e-06, |
| "loss": 0.6261368274688721, |
| "step": 6250 |
| }, |
| { |
| "epoch": 1.113879003558719, |
| "grad_norm": 0.819240152835846, |
| "learning_rate": 8.472203912077205e-06, |
| "loss": 0.6361266136169433, |
| "step": 6260 |
| }, |
| { |
| "epoch": 1.1156583629893237, |
| "grad_norm": 0.7131257653236389, |
| "learning_rate": 8.444087751318484e-06, |
| "loss": 0.6407057285308838, |
| "step": 6270 |
| }, |
| { |
| "epoch": 1.1174377224199288, |
| "grad_norm": 0.9261246919631958, |
| "learning_rate": 8.415984189859418e-06, |
| "loss": 0.5832745552062988, |
| "step": 6280 |
| }, |
| { |
| "epoch": 1.1192170818505338, |
| "grad_norm": 0.8466585278511047, |
| "learning_rate": 8.38789345527403e-06, |
| "loss": 0.6238195419311523, |
| "step": 6290 |
| }, |
| { |
| "epoch": 1.1209964412811388, |
| "grad_norm": 0.7208281755447388, |
| "learning_rate": 8.359815775032457e-06, |
| "loss": 0.6571403980255127, |
| "step": 6300 |
| }, |
| { |
| "epoch": 1.1227758007117439, |
| "grad_norm": 0.6916828155517578, |
| "learning_rate": 8.331751376499131e-06, |
| "loss": 0.6353542327880859, |
| "step": 6310 |
| }, |
| { |
| "epoch": 1.1245551601423487, |
| "grad_norm": 0.804423987865448, |
| "learning_rate": 8.303700486930935e-06, |
| "loss": 0.6343297481536865, |
| "step": 6320 |
| }, |
| { |
| "epoch": 1.1263345195729537, |
| "grad_norm": 0.8437130451202393, |
| "learning_rate": 8.275663333475365e-06, |
| "loss": 0.6212164878845214, |
| "step": 6330 |
| }, |
| { |
| "epoch": 1.1281138790035588, |
| "grad_norm": 0.8472525477409363, |
| "learning_rate": 8.247640143168675e-06, |
| "loss": 0.6239662647247315, |
| "step": 6340 |
| }, |
| { |
| "epoch": 1.1298932384341638, |
| "grad_norm": 0.696539044380188, |
| "learning_rate": 8.219631142934062e-06, |
| "loss": 0.6344510078430176, |
| "step": 6350 |
| }, |
| { |
| "epoch": 1.1316725978647686, |
| "grad_norm": 0.7422542572021484, |
| "learning_rate": 8.191636559579802e-06, |
| "loss": 0.6504097938537597, |
| "step": 6360 |
| }, |
| { |
| "epoch": 1.1334519572953736, |
| "grad_norm": 0.801693856716156, |
| "learning_rate": 8.163656619797444e-06, |
| "loss": 0.6314446449279785, |
| "step": 6370 |
| }, |
| { |
| "epoch": 1.1352313167259787, |
| "grad_norm": 0.786159098148346, |
| "learning_rate": 8.135691550159943e-06, |
| "loss": 0.6034214973449707, |
| "step": 6380 |
| }, |
| { |
| "epoch": 1.1370106761565837, |
| "grad_norm": 0.7948549389839172, |
| "learning_rate": 8.107741577119853e-06, |
| "loss": 0.653587532043457, |
| "step": 6390 |
| }, |
| { |
| "epoch": 1.1387900355871885, |
| "grad_norm": 0.6635374426841736, |
| "learning_rate": 8.079806927007469e-06, |
| "loss": 0.6300637722015381, |
| "step": 6400 |
| }, |
| { |
| "epoch": 1.1405693950177935, |
| "grad_norm": 0.7938548922538757, |
| "learning_rate": 8.051887826029006e-06, |
| "loss": 0.6417815685272217, |
| "step": 6410 |
| }, |
| { |
| "epoch": 1.1423487544483986, |
| "grad_norm": 0.7719388008117676, |
| "learning_rate": 8.023984500264782e-06, |
| "loss": 0.624653434753418, |
| "step": 6420 |
| }, |
| { |
| "epoch": 1.1441281138790036, |
| "grad_norm": 0.8489981293678284, |
| "learning_rate": 7.996097175667352e-06, |
| "loss": 0.6066908836364746, |
| "step": 6430 |
| }, |
| { |
| "epoch": 1.1459074733096086, |
| "grad_norm": 0.7810853123664856, |
| "learning_rate": 7.968226078059716e-06, |
| "loss": 0.5920337677001953, |
| "step": 6440 |
| }, |
| { |
| "epoch": 1.1476868327402134, |
| "grad_norm": 0.7887650728225708, |
| "learning_rate": 7.940371433133459e-06, |
| "loss": 0.6641497135162353, |
| "step": 6450 |
| }, |
| { |
| "epoch": 1.1494661921708185, |
| "grad_norm": 0.7652443051338196, |
| "learning_rate": 7.912533466446947e-06, |
| "loss": 0.6473179340362549, |
| "step": 6460 |
| }, |
| { |
| "epoch": 1.1512455516014235, |
| "grad_norm": 0.7206646800041199, |
| "learning_rate": 7.88471240342348e-06, |
| "loss": 0.5841953754425049, |
| "step": 6470 |
| }, |
| { |
| "epoch": 1.1530249110320285, |
| "grad_norm": 0.7867498993873596, |
| "learning_rate": 7.856908469349495e-06, |
| "loss": 0.6486004829406739, |
| "step": 6480 |
| }, |
| { |
| "epoch": 1.1548042704626336, |
| "grad_norm": 0.8296671509742737, |
| "learning_rate": 7.829121889372702e-06, |
| "loss": 0.6224873065948486, |
| "step": 6490 |
| }, |
| { |
| "epoch": 1.1565836298932384, |
| "grad_norm": 0.8746348023414612, |
| "learning_rate": 7.801352888500304e-06, |
| "loss": 0.640509843826294, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.1565836298932384, |
| "eval_loss": 0.620580792427063, |
| "eval_runtime": 411.917, |
| "eval_samples_per_second": 12.209, |
| "eval_steps_per_second": 6.106, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.1583629893238434, |
| "grad_norm": 0.7378506660461426, |
| "learning_rate": 7.773601691597135e-06, |
| "loss": 0.6328612327575683, |
| "step": 6510 |
| }, |
| { |
| "epoch": 1.1601423487544484, |
| "grad_norm": 0.7263290882110596, |
| "learning_rate": 7.745868523383876e-06, |
| "loss": 0.6071421146392822, |
| "step": 6520 |
| }, |
| { |
| "epoch": 1.1619217081850535, |
| "grad_norm": 0.8653813004493713, |
| "learning_rate": 7.7181536084352e-06, |
| "loss": 0.6067211627960205, |
| "step": 6530 |
| }, |
| { |
| "epoch": 1.1637010676156583, |
| "grad_norm": 0.9452764987945557, |
| "learning_rate": 7.690457171177984e-06, |
| "loss": 0.6291831970214844, |
| "step": 6540 |
| }, |
| { |
| "epoch": 1.1654804270462633, |
| "grad_norm": 0.65375155210495, |
| "learning_rate": 7.66277943588947e-06, |
| "loss": 0.6606705665588379, |
| "step": 6550 |
| }, |
| { |
| "epoch": 1.1672597864768683, |
| "grad_norm": 0.978212833404541, |
| "learning_rate": 7.635120626695456e-06, |
| "loss": 0.5956210613250732, |
| "step": 6560 |
| }, |
| { |
| "epoch": 1.1690391459074734, |
| "grad_norm": 0.7400839328765869, |
| "learning_rate": 7.607480967568497e-06, |
| "loss": 0.6242622852325439, |
| "step": 6570 |
| }, |
| { |
| "epoch": 1.1708185053380782, |
| "grad_norm": 1.0911694765090942, |
| "learning_rate": 7.579860682326055e-06, |
| "loss": 0.6239947319030762, |
| "step": 6580 |
| }, |
| { |
| "epoch": 1.1725978647686832, |
| "grad_norm": 0.7858565449714661, |
| "learning_rate": 7.552259994628728e-06, |
| "loss": 0.6366580963134766, |
| "step": 6590 |
| }, |
| { |
| "epoch": 1.1743772241992882, |
| "grad_norm": 0.8578958511352539, |
| "learning_rate": 7.5246791279784056e-06, |
| "loss": 0.6067252635955811, |
| "step": 6600 |
| }, |
| { |
| "epoch": 1.1761565836298933, |
| "grad_norm": 0.7615208029747009, |
| "learning_rate": 7.4971183057164785e-06, |
| "loss": 0.6032675266265869, |
| "step": 6610 |
| }, |
| { |
| "epoch": 1.1779359430604983, |
| "grad_norm": 0.8318284749984741, |
| "learning_rate": 7.469577751022024e-06, |
| "loss": 0.5974441528320312, |
| "step": 6620 |
| }, |
| { |
| "epoch": 1.1797153024911031, |
| "grad_norm": 0.882125198841095, |
| "learning_rate": 7.442057686909998e-06, |
| "loss": 0.6573843002319336, |
| "step": 6630 |
| }, |
| { |
| "epoch": 1.1814946619217082, |
| "grad_norm": 0.7975313663482666, |
| "learning_rate": 7.414558336229426e-06, |
| "loss": 0.6807239055633545, |
| "step": 6640 |
| }, |
| { |
| "epoch": 1.1832740213523132, |
| "grad_norm": 0.7822281122207642, |
| "learning_rate": 7.387079921661614e-06, |
| "loss": 0.6663013458251953, |
| "step": 6650 |
| }, |
| { |
| "epoch": 1.1850533807829182, |
| "grad_norm": 0.8564392328262329, |
| "learning_rate": 7.359622665718317e-06, |
| "loss": 0.6218353271484375, |
| "step": 6660 |
| }, |
| { |
| "epoch": 1.1868327402135233, |
| "grad_norm": 0.7377424836158752, |
| "learning_rate": 7.332186790739973e-06, |
| "loss": 0.5968504905700683, |
| "step": 6670 |
| }, |
| { |
| "epoch": 1.188612099644128, |
| "grad_norm": 0.730351984500885, |
| "learning_rate": 7.304772518893866e-06, |
| "loss": 0.6158394813537598, |
| "step": 6680 |
| }, |
| { |
| "epoch": 1.190391459074733, |
| "grad_norm": 0.7883412837982178, |
| "learning_rate": 7.277380072172354e-06, |
| "loss": 0.6515108585357666, |
| "step": 6690 |
| }, |
| { |
| "epoch": 1.1921708185053381, |
| "grad_norm": 0.9434972405433655, |
| "learning_rate": 7.250009672391063e-06, |
| "loss": 0.6276324272155762, |
| "step": 6700 |
| }, |
| { |
| "epoch": 1.193950177935943, |
| "grad_norm": 0.8042780756950378, |
| "learning_rate": 7.2226615411870796e-06, |
| "loss": 0.6224194526672363, |
| "step": 6710 |
| }, |
| { |
| "epoch": 1.195729537366548, |
| "grad_norm": 0.8094464540481567, |
| "learning_rate": 7.195335900017181e-06, |
| "loss": 0.6250015735626221, |
| "step": 6720 |
| }, |
| { |
| "epoch": 1.197508896797153, |
| "grad_norm": 0.8359760046005249, |
| "learning_rate": 7.168032970156011e-06, |
| "loss": 0.6454795837402344, |
| "step": 6730 |
| }, |
| { |
| "epoch": 1.199288256227758, |
| "grad_norm": 0.728487491607666, |
| "learning_rate": 7.140752972694325e-06, |
| "loss": 0.6341996192932129, |
| "step": 6740 |
| }, |
| { |
| "epoch": 1.201067615658363, |
| "grad_norm": 0.8493318557739258, |
| "learning_rate": 7.113496128537154e-06, |
| "loss": 0.6078832626342774, |
| "step": 6750 |
| }, |
| { |
| "epoch": 1.2028469750889679, |
| "grad_norm": 0.7177495360374451, |
| "learning_rate": 7.086262658402059e-06, |
| "loss": 0.6244680881500244, |
| "step": 6760 |
| }, |
| { |
| "epoch": 1.204626334519573, |
| "grad_norm": 0.7511991858482361, |
| "learning_rate": 7.059052782817317e-06, |
| "loss": 0.6233652591705322, |
| "step": 6770 |
| }, |
| { |
| "epoch": 1.206405693950178, |
| "grad_norm": 1.0159603357315063, |
| "learning_rate": 7.0318667221201505e-06, |
| "loss": 0.6203540802001953, |
| "step": 6780 |
| }, |
| { |
| "epoch": 1.208185053380783, |
| "grad_norm": 0.9800230264663696, |
| "learning_rate": 7.004704696454924e-06, |
| "loss": 0.621929931640625, |
| "step": 6790 |
| }, |
| { |
| "epoch": 1.209964412811388, |
| "grad_norm": 0.740306556224823, |
| "learning_rate": 6.97756692577139e-06, |
| "loss": 0.5935471534729004, |
| "step": 6800 |
| }, |
| { |
| "epoch": 1.2117437722419928, |
| "grad_norm": 0.7026015520095825, |
| "learning_rate": 6.950453629822874e-06, |
| "loss": 0.5996420383453369, |
| "step": 6810 |
| }, |
| { |
| "epoch": 1.2135231316725978, |
| "grad_norm": 0.7063615918159485, |
| "learning_rate": 6.923365028164532e-06, |
| "loss": 0.6140129089355468, |
| "step": 6820 |
| }, |
| { |
| "epoch": 1.2153024911032029, |
| "grad_norm": 0.6547632217407227, |
| "learning_rate": 6.8963013401515365e-06, |
| "loss": 0.6896812915802002, |
| "step": 6830 |
| }, |
| { |
| "epoch": 1.217081850533808, |
| "grad_norm": 0.7407922744750977, |
| "learning_rate": 6.8692627849373226e-06, |
| "loss": 0.6203756332397461, |
| "step": 6840 |
| }, |
| { |
| "epoch": 1.2188612099644127, |
| "grad_norm": 0.7740945219993591, |
| "learning_rate": 6.842249581471814e-06, |
| "loss": 0.6261481761932373, |
| "step": 6850 |
| }, |
| { |
| "epoch": 1.2206405693950177, |
| "grad_norm": 0.8180769085884094, |
| "learning_rate": 6.815261948499628e-06, |
| "loss": 0.6112605094909668, |
| "step": 6860 |
| }, |
| { |
| "epoch": 1.2224199288256228, |
| "grad_norm": 0.7909367084503174, |
| "learning_rate": 6.788300104558341e-06, |
| "loss": 0.6290336132049561, |
| "step": 6870 |
| }, |
| { |
| "epoch": 1.2241992882562278, |
| "grad_norm": 0.7958800792694092, |
| "learning_rate": 6.76136426797668e-06, |
| "loss": 0.6461628913879395, |
| "step": 6880 |
| }, |
| { |
| "epoch": 1.2259786476868326, |
| "grad_norm": 0.83876633644104, |
| "learning_rate": 6.734454656872784e-06, |
| "loss": 0.6532453060150146, |
| "step": 6890 |
| }, |
| { |
| "epoch": 1.2277580071174377, |
| "grad_norm": 0.8222100138664246, |
| "learning_rate": 6.707571489152418e-06, |
| "loss": 0.6066458225250244, |
| "step": 6900 |
| }, |
| { |
| "epoch": 1.2295373665480427, |
| "grad_norm": 0.7338365316390991, |
| "learning_rate": 6.680714982507219e-06, |
| "loss": 0.6187913417816162, |
| "step": 6910 |
| }, |
| { |
| "epoch": 1.2313167259786477, |
| "grad_norm": 1.0070669651031494, |
| "learning_rate": 6.653885354412935e-06, |
| "loss": 0.638453197479248, |
| "step": 6920 |
| }, |
| { |
| "epoch": 1.2330960854092528, |
| "grad_norm": 0.8759623169898987, |
| "learning_rate": 6.627082822127657e-06, |
| "loss": 0.6360817909240722, |
| "step": 6930 |
| }, |
| { |
| "epoch": 1.2348754448398576, |
| "grad_norm": 0.7884645462036133, |
| "learning_rate": 6.600307602690057e-06, |
| "loss": 0.6332939147949219, |
| "step": 6940 |
| }, |
| { |
| "epoch": 1.2366548042704626, |
| "grad_norm": 0.7928617596626282, |
| "learning_rate": 6.573559912917648e-06, |
| "loss": 0.6071663856506347, |
| "step": 6950 |
| }, |
| { |
| "epoch": 1.2384341637010676, |
| "grad_norm": 0.8969078063964844, |
| "learning_rate": 6.546839969405003e-06, |
| "loss": 0.6088360786437989, |
| "step": 6960 |
| }, |
| { |
| "epoch": 1.2402135231316727, |
| "grad_norm": 0.6923132538795471, |
| "learning_rate": 6.520147988522021e-06, |
| "loss": 0.602042293548584, |
| "step": 6970 |
| }, |
| { |
| "epoch": 1.2419928825622777, |
| "grad_norm": 0.8759524822235107, |
| "learning_rate": 6.493484186412172e-06, |
| "loss": 0.6699591636657715, |
| "step": 6980 |
| }, |
| { |
| "epoch": 1.2437722419928825, |
| "grad_norm": 0.9501714706420898, |
| "learning_rate": 6.466848778990734e-06, |
| "loss": 0.631440258026123, |
| "step": 6990 |
| }, |
| { |
| "epoch": 1.2455516014234875, |
| "grad_norm": 0.8564425706863403, |
| "learning_rate": 6.440241981943063e-06, |
| "loss": 0.6175348281860351, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.2455516014234875, |
| "eval_loss": 0.6177652478218079, |
| "eval_runtime": 410.1105, |
| "eval_samples_per_second": 12.263, |
| "eval_steps_per_second": 6.132, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.2473309608540926, |
| "grad_norm": 0.8425670266151428, |
| "learning_rate": 6.413664010722825e-06, |
| "loss": 0.6334220886230468, |
| "step": 7010 |
| }, |
| { |
| "epoch": 1.2491103202846976, |
| "grad_norm": 0.6265377998352051, |
| "learning_rate": 6.3871150805502765e-06, |
| "loss": 0.6206889629364014, |
| "step": 7020 |
| }, |
| { |
| "epoch": 1.2508896797153026, |
| "grad_norm": 0.7120670676231384, |
| "learning_rate": 6.360595406410497e-06, |
| "loss": 0.609261417388916, |
| "step": 7030 |
| }, |
| { |
| "epoch": 1.2526690391459074, |
| "grad_norm": 0.7951711416244507, |
| "learning_rate": 6.334105203051673e-06, |
| "loss": 0.6361230850219727, |
| "step": 7040 |
| }, |
| { |
| "epoch": 1.2544483985765125, |
| "grad_norm": 0.9487454891204834, |
| "learning_rate": 6.307644684983329e-06, |
| "loss": 0.5716474533081055, |
| "step": 7050 |
| }, |
| { |
| "epoch": 1.2562277580071175, |
| "grad_norm": 0.7447927594184875, |
| "learning_rate": 6.281214066474623e-06, |
| "loss": 0.6536383152008056, |
| "step": 7060 |
| }, |
| { |
| "epoch": 1.2580071174377223, |
| "grad_norm": 0.8137961626052856, |
| "learning_rate": 6.254813561552585e-06, |
| "loss": 0.6361209869384765, |
| "step": 7070 |
| }, |
| { |
| "epoch": 1.2597864768683273, |
| "grad_norm": 0.7065114378929138, |
| "learning_rate": 6.2284433840004e-06, |
| "loss": 0.6489431858062744, |
| "step": 7080 |
| }, |
| { |
| "epoch": 1.2615658362989324, |
| "grad_norm": 0.7409806847572327, |
| "learning_rate": 6.202103747355667e-06, |
| "loss": 0.6485908031463623, |
| "step": 7090 |
| }, |
| { |
| "epoch": 1.2633451957295374, |
| "grad_norm": 0.662041962146759, |
| "learning_rate": 6.175794864908672e-06, |
| "loss": 0.6204521179199218, |
| "step": 7100 |
| }, |
| { |
| "epoch": 1.2651245551601424, |
| "grad_norm": 0.7687442898750305, |
| "learning_rate": 6.1495169497006755e-06, |
| "loss": 0.6361071109771729, |
| "step": 7110 |
| }, |
| { |
| "epoch": 1.2669039145907472, |
| "grad_norm": 0.9222472906112671, |
| "learning_rate": 6.123270214522159e-06, |
| "loss": 0.6162978172302246, |
| "step": 7120 |
| }, |
| { |
| "epoch": 1.2686832740213523, |
| "grad_norm": 0.819275975227356, |
| "learning_rate": 6.097054871911132e-06, |
| "loss": 0.6023138523101806, |
| "step": 7130 |
| }, |
| { |
| "epoch": 1.2704626334519573, |
| "grad_norm": 0.8842992782592773, |
| "learning_rate": 6.070871134151386e-06, |
| "loss": 0.658526086807251, |
| "step": 7140 |
| }, |
| { |
| "epoch": 1.2722419928825623, |
| "grad_norm": 0.7101578116416931, |
| "learning_rate": 6.044719213270791e-06, |
| "loss": 0.6004268169403076, |
| "step": 7150 |
| }, |
| { |
| "epoch": 1.2740213523131674, |
| "grad_norm": 0.7537800669670105, |
| "learning_rate": 6.018599321039569e-06, |
| "loss": 0.6223325729370117, |
| "step": 7160 |
| }, |
| { |
| "epoch": 1.2758007117437722, |
| "grad_norm": 0.6810758709907532, |
| "learning_rate": 5.9925116689685925e-06, |
| "loss": 0.6302705764770508, |
| "step": 7170 |
| }, |
| { |
| "epoch": 1.2775800711743772, |
| "grad_norm": 0.8179675936698914, |
| "learning_rate": 5.966456468307653e-06, |
| "loss": 0.6177227020263671, |
| "step": 7180 |
| }, |
| { |
| "epoch": 1.2793594306049823, |
| "grad_norm": 0.7833004593849182, |
| "learning_rate": 5.940433930043772e-06, |
| "loss": 0.6414045333862305, |
| "step": 7190 |
| }, |
| { |
| "epoch": 1.281138790035587, |
| "grad_norm": 0.9323675036430359, |
| "learning_rate": 5.914444264899466e-06, |
| "loss": 0.603922176361084, |
| "step": 7200 |
| }, |
| { |
| "epoch": 1.282918149466192, |
| "grad_norm": 0.844142735004425, |
| "learning_rate": 5.888487683331072e-06, |
| "loss": 0.6308831691741943, |
| "step": 7210 |
| }, |
| { |
| "epoch": 1.2846975088967971, |
| "grad_norm": 0.8182924389839172, |
| "learning_rate": 5.862564395527013e-06, |
| "loss": 0.6259811401367188, |
| "step": 7220 |
| }, |
| { |
| "epoch": 1.2864768683274022, |
| "grad_norm": 0.8091204166412354, |
| "learning_rate": 5.836674611406117e-06, |
| "loss": 0.6158743858337402, |
| "step": 7230 |
| }, |
| { |
| "epoch": 1.2882562277580072, |
| "grad_norm": 0.9385861754417419, |
| "learning_rate": 5.810818540615903e-06, |
| "loss": 0.6304144382476806, |
| "step": 7240 |
| }, |
| { |
| "epoch": 1.290035587188612, |
| "grad_norm": 1.0828267335891724, |
| "learning_rate": 5.784996392530892e-06, |
| "loss": 0.6548801898956299, |
| "step": 7250 |
| }, |
| { |
| "epoch": 1.291814946619217, |
| "grad_norm": 0.7489603161811829, |
| "learning_rate": 5.759208376250916e-06, |
| "loss": 0.6102576732635498, |
| "step": 7260 |
| }, |
| { |
| "epoch": 1.293594306049822, |
| "grad_norm": 0.7216728329658508, |
| "learning_rate": 5.733454700599408e-06, |
| "loss": 0.6362377166748047, |
| "step": 7270 |
| }, |
| { |
| "epoch": 1.295373665480427, |
| "grad_norm": 0.8442836403846741, |
| "learning_rate": 5.707735574121732e-06, |
| "loss": 0.6299904346466064, |
| "step": 7280 |
| }, |
| { |
| "epoch": 1.2971530249110321, |
| "grad_norm": 0.6905636787414551, |
| "learning_rate": 5.68205120508347e-06, |
| "loss": 0.6162694454193115, |
| "step": 7290 |
| }, |
| { |
| "epoch": 1.298932384341637, |
| "grad_norm": 0.8710734844207764, |
| "learning_rate": 5.656401801468764e-06, |
| "loss": 0.5905096054077148, |
| "step": 7300 |
| }, |
| { |
| "epoch": 1.300711743772242, |
| "grad_norm": 0.7497642040252686, |
| "learning_rate": 5.630787570978601e-06, |
| "loss": 0.6081714630126953, |
| "step": 7310 |
| }, |
| { |
| "epoch": 1.302491103202847, |
| "grad_norm": 0.7930024266242981, |
| "learning_rate": 5.605208721029162e-06, |
| "loss": 0.6393218517303467, |
| "step": 7320 |
| }, |
| { |
| "epoch": 1.304270462633452, |
| "grad_norm": 0.622921347618103, |
| "learning_rate": 5.579665458750119e-06, |
| "loss": 0.6169804096221924, |
| "step": 7330 |
| }, |
| { |
| "epoch": 1.306049822064057, |
| "grad_norm": 0.7385269403457642, |
| "learning_rate": 5.554157990982964e-06, |
| "loss": 0.6091341495513916, |
| "step": 7340 |
| }, |
| { |
| "epoch": 1.3078291814946619, |
| "grad_norm": 0.839229941368103, |
| "learning_rate": 5.528686524279336e-06, |
| "loss": 0.6216320037841797, |
| "step": 7350 |
| }, |
| { |
| "epoch": 1.309608540925267, |
| "grad_norm": 0.8395034670829773, |
| "learning_rate": 5.503251264899361e-06, |
| "loss": 0.6268163204193116, |
| "step": 7360 |
| }, |
| { |
| "epoch": 1.311387900355872, |
| "grad_norm": 0.7301527261734009, |
| "learning_rate": 5.477852418809955e-06, |
| "loss": 0.6226423740386963, |
| "step": 7370 |
| }, |
| { |
| "epoch": 1.3131672597864767, |
| "grad_norm": 0.9010869860649109, |
| "learning_rate": 5.452490191683171e-06, |
| "loss": 0.6155654907226562, |
| "step": 7380 |
| }, |
| { |
| "epoch": 1.3149466192170818, |
| "grad_norm": 0.7506121397018433, |
| "learning_rate": 5.427164788894543e-06, |
| "loss": 0.6015159130096436, |
| "step": 7390 |
| }, |
| { |
| "epoch": 1.3167259786476868, |
| "grad_norm": 0.8243475556373596, |
| "learning_rate": 5.401876415521402e-06, |
| "loss": 0.6120582103729248, |
| "step": 7400 |
| }, |
| { |
| "epoch": 1.3185053380782918, |
| "grad_norm": 0.877004086971283, |
| "learning_rate": 5.3766252763412375e-06, |
| "loss": 0.6368375301361084, |
| "step": 7410 |
| }, |
| { |
| "epoch": 1.3202846975088969, |
| "grad_norm": 0.6776864528656006, |
| "learning_rate": 5.351411575830015e-06, |
| "loss": 0.5798074722290039, |
| "step": 7420 |
| }, |
| { |
| "epoch": 1.3220640569395017, |
| "grad_norm": 0.7413604259490967, |
| "learning_rate": 5.326235518160543e-06, |
| "loss": 0.6417413711547851, |
| "step": 7430 |
| }, |
| { |
| "epoch": 1.3238434163701067, |
| "grad_norm": 0.680547297000885, |
| "learning_rate": 5.301097307200804e-06, |
| "loss": 0.6709810256958008, |
| "step": 7440 |
| }, |
| { |
| "epoch": 1.3256227758007118, |
| "grad_norm": 0.8982633352279663, |
| "learning_rate": 5.275997146512317e-06, |
| "loss": 0.6244979858398437, |
| "step": 7450 |
| }, |
| { |
| "epoch": 1.3274021352313168, |
| "grad_norm": 0.7878574728965759, |
| "learning_rate": 5.250935239348469e-06, |
| "loss": 0.6555228233337402, |
| "step": 7460 |
| }, |
| { |
| "epoch": 1.3291814946619218, |
| "grad_norm": 0.7958076000213623, |
| "learning_rate": 5.2259117886528974e-06, |
| "loss": 0.6336334228515625, |
| "step": 7470 |
| }, |
| { |
| "epoch": 1.3309608540925266, |
| "grad_norm": 0.8020284175872803, |
| "learning_rate": 5.200926997057818e-06, |
| "loss": 0.6250798225402832, |
| "step": 7480 |
| }, |
| { |
| "epoch": 1.3327402135231317, |
| "grad_norm": 0.9492243528366089, |
| "learning_rate": 5.175981066882403e-06, |
| "loss": 0.5951490879058838, |
| "step": 7490 |
| }, |
| { |
| "epoch": 1.3345195729537367, |
| "grad_norm": 0.8778184056282043, |
| "learning_rate": 5.15107420013113e-06, |
| "loss": 0.5949514865875244, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.3345195729537367, |
| "eval_loss": 0.6148350238800049, |
| "eval_runtime": 411.3736, |
| "eval_samples_per_second": 12.225, |
| "eval_steps_per_second": 6.114, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.3362989323843417, |
| "grad_norm": 0.888278603553772, |
| "learning_rate": 5.126206598492167e-06, |
| "loss": 0.6134849071502686, |
| "step": 7510 |
| }, |
| { |
| "epoch": 1.3380782918149468, |
| "grad_norm": 0.8123595714569092, |
| "learning_rate": 5.101378463335713e-06, |
| "loss": 0.6279965877532959, |
| "step": 7520 |
| }, |
| { |
| "epoch": 1.3398576512455516, |
| "grad_norm": 0.7082033753395081, |
| "learning_rate": 5.07658999571238e-06, |
| "loss": 0.6239530086517334, |
| "step": 7530 |
| }, |
| { |
| "epoch": 1.3416370106761566, |
| "grad_norm": 0.8400927782058716, |
| "learning_rate": 5.051841396351574e-06, |
| "loss": 0.5917630195617676, |
| "step": 7540 |
| }, |
| { |
| "epoch": 1.3434163701067616, |
| "grad_norm": 0.8962631225585938, |
| "learning_rate": 5.027132865659847e-06, |
| "loss": 0.6083420753479004, |
| "step": 7550 |
| }, |
| { |
| "epoch": 1.3451957295373664, |
| "grad_norm": 0.9693284630775452, |
| "learning_rate": 5.0024646037193025e-06, |
| "loss": 0.606035566329956, |
| "step": 7560 |
| }, |
| { |
| "epoch": 1.3469750889679715, |
| "grad_norm": 0.8629393577575684, |
| "learning_rate": 4.977836810285942e-06, |
| "loss": 0.6264309406280517, |
| "step": 7570 |
| }, |
| { |
| "epoch": 1.3487544483985765, |
| "grad_norm": 0.7961446642875671, |
| "learning_rate": 4.953249684788084e-06, |
| "loss": 0.6090545654296875, |
| "step": 7580 |
| }, |
| { |
| "epoch": 1.3505338078291815, |
| "grad_norm": 0.7653380036354065, |
| "learning_rate": 4.928703426324712e-06, |
| "loss": 0.6106627941131592, |
| "step": 7590 |
| }, |
| { |
| "epoch": 1.3523131672597866, |
| "grad_norm": 0.8642345070838928, |
| "learning_rate": 4.9041982336639004e-06, |
| "loss": 0.5813540458679199, |
| "step": 7600 |
| }, |
| { |
| "epoch": 1.3540925266903914, |
| "grad_norm": 0.7263400554656982, |
| "learning_rate": 4.8797343052411675e-06, |
| "loss": 0.685100507736206, |
| "step": 7610 |
| }, |
| { |
| "epoch": 1.3558718861209964, |
| "grad_norm": 0.8232746720314026, |
| "learning_rate": 4.855311839157899e-06, |
| "loss": 0.6377671718597412, |
| "step": 7620 |
| }, |
| { |
| "epoch": 1.3576512455516014, |
| "grad_norm": 0.73633873462677, |
| "learning_rate": 4.830931033179725e-06, |
| "loss": 0.6779125690460205, |
| "step": 7630 |
| }, |
| { |
| "epoch": 1.3594306049822065, |
| "grad_norm": 0.8550395965576172, |
| "learning_rate": 4.806592084734928e-06, |
| "loss": 0.6058492660522461, |
| "step": 7640 |
| }, |
| { |
| "epoch": 1.3612099644128115, |
| "grad_norm": 0.7828453779220581, |
| "learning_rate": 4.782295190912831e-06, |
| "loss": 0.6215229034423828, |
| "step": 7650 |
| }, |
| { |
| "epoch": 1.3629893238434163, |
| "grad_norm": 0.8796820640563965, |
| "learning_rate": 4.758040548462233e-06, |
| "loss": 0.5914856433868408, |
| "step": 7660 |
| }, |
| { |
| "epoch": 1.3647686832740213, |
| "grad_norm": 0.7559328675270081, |
| "learning_rate": 4.733828353789772e-06, |
| "loss": 0.6324697017669678, |
| "step": 7670 |
| }, |
| { |
| "epoch": 1.3665480427046264, |
| "grad_norm": 0.9165831804275513, |
| "learning_rate": 4.709658802958366e-06, |
| "loss": 0.6462049961090088, |
| "step": 7680 |
| }, |
| { |
| "epoch": 1.3683274021352312, |
| "grad_norm": 0.9339064359664917, |
| "learning_rate": 4.685532091685625e-06, |
| "loss": 0.6461282253265381, |
| "step": 7690 |
| }, |
| { |
| "epoch": 1.3701067615658362, |
| "grad_norm": 0.8191850781440735, |
| "learning_rate": 4.66144841534224e-06, |
| "loss": 0.6682465553283692, |
| "step": 7700 |
| }, |
| { |
| "epoch": 1.3718861209964412, |
| "grad_norm": 0.9715204238891602, |
| "learning_rate": 4.637407968950434e-06, |
| "loss": 0.5913180351257324, |
| "step": 7710 |
| }, |
| { |
| "epoch": 1.3736654804270463, |
| "grad_norm": 0.7584412693977356, |
| "learning_rate": 4.613410947182354e-06, |
| "loss": 0.5695419311523438, |
| "step": 7720 |
| }, |
| { |
| "epoch": 1.3754448398576513, |
| "grad_norm": 0.6991924047470093, |
| "learning_rate": 4.589457544358521e-06, |
| "loss": 0.6356719493865967, |
| "step": 7730 |
| }, |
| { |
| "epoch": 1.3772241992882561, |
| "grad_norm": 0.8555135726928711, |
| "learning_rate": 4.56554795444623e-06, |
| "loss": 0.5793665409088135, |
| "step": 7740 |
| }, |
| { |
| "epoch": 1.3790035587188612, |
| "grad_norm": 0.9497565031051636, |
| "learning_rate": 4.5416823710580046e-06, |
| "loss": 0.6186023712158203, |
| "step": 7750 |
| }, |
| { |
| "epoch": 1.3807829181494662, |
| "grad_norm": 0.8282408118247986, |
| "learning_rate": 4.517860987450007e-06, |
| "loss": 0.6287422180175781, |
| "step": 7760 |
| }, |
| { |
| "epoch": 1.3825622775800712, |
| "grad_norm": 0.7512138485908508, |
| "learning_rate": 4.494083996520492e-06, |
| "loss": 0.6511450290679932, |
| "step": 7770 |
| }, |
| { |
| "epoch": 1.3843416370106763, |
| "grad_norm": 0.8342724442481995, |
| "learning_rate": 4.470351590808228e-06, |
| "loss": 0.6355900764465332, |
| "step": 7780 |
| }, |
| { |
| "epoch": 1.386120996441281, |
| "grad_norm": 0.9951474666595459, |
| "learning_rate": 4.446663962490951e-06, |
| "loss": 0.6075921058654785, |
| "step": 7790 |
| }, |
| { |
| "epoch": 1.387900355871886, |
| "grad_norm": 0.7905099391937256, |
| "learning_rate": 4.423021303383799e-06, |
| "loss": 0.5925492286682129, |
| "step": 7800 |
| }, |
| { |
| "epoch": 1.3896797153024911, |
| "grad_norm": 0.8992406129837036, |
| "learning_rate": 4.3994238049377715e-06, |
| "loss": 0.6204987049102784, |
| "step": 7810 |
| }, |
| { |
| "epoch": 1.3914590747330962, |
| "grad_norm": 1.124568223953247, |
| "learning_rate": 4.375871658238163e-06, |
| "loss": 0.6461961746215821, |
| "step": 7820 |
| }, |
| { |
| "epoch": 1.3932384341637012, |
| "grad_norm": 0.7916883230209351, |
| "learning_rate": 4.352365054003022e-06, |
| "loss": 0.6402833461761475, |
| "step": 7830 |
| }, |
| { |
| "epoch": 1.395017793594306, |
| "grad_norm": 1.0332368612289429, |
| "learning_rate": 4.328904182581619e-06, |
| "loss": 0.6191961288452148, |
| "step": 7840 |
| }, |
| { |
| "epoch": 1.396797153024911, |
| "grad_norm": 1.0284537076950073, |
| "learning_rate": 4.305489233952881e-06, |
| "loss": 0.634841012954712, |
| "step": 7850 |
| }, |
| { |
| "epoch": 1.398576512455516, |
| "grad_norm": 0.8202362656593323, |
| "learning_rate": 4.282120397723879e-06, |
| "loss": 0.5904129028320313, |
| "step": 7860 |
| }, |
| { |
| "epoch": 1.4003558718861209, |
| "grad_norm": 0.8149864077568054, |
| "learning_rate": 4.258797863128266e-06, |
| "loss": 0.6212067127227783, |
| "step": 7870 |
| }, |
| { |
| "epoch": 1.402135231316726, |
| "grad_norm": 0.750307023525238, |
| "learning_rate": 4.235521819024776e-06, |
| "loss": 0.6193465232849121, |
| "step": 7880 |
| }, |
| { |
| "epoch": 1.403914590747331, |
| "grad_norm": 0.719935417175293, |
| "learning_rate": 4.212292453895658e-06, |
| "loss": 0.6318532466888428, |
| "step": 7890 |
| }, |
| { |
| "epoch": 1.405693950177936, |
| "grad_norm": 0.8568369150161743, |
| "learning_rate": 4.189109955845186e-06, |
| "loss": 0.6267857074737548, |
| "step": 7900 |
| }, |
| { |
| "epoch": 1.407473309608541, |
| "grad_norm": 0.8335803151130676, |
| "learning_rate": 4.165974512598102e-06, |
| "loss": 0.6253261089324951, |
| "step": 7910 |
| }, |
| { |
| "epoch": 1.4092526690391458, |
| "grad_norm": 0.9222790598869324, |
| "learning_rate": 4.142886311498133e-06, |
| "loss": 0.6295660972595215, |
| "step": 7920 |
| }, |
| { |
| "epoch": 1.4110320284697508, |
| "grad_norm": 0.7769973874092102, |
| "learning_rate": 4.119845539506436e-06, |
| "loss": 0.6295949935913085, |
| "step": 7930 |
| }, |
| { |
| "epoch": 1.4128113879003559, |
| "grad_norm": 0.8693393468856812, |
| "learning_rate": 4.096852383200106e-06, |
| "loss": 0.5903539180755615, |
| "step": 7940 |
| }, |
| { |
| "epoch": 1.414590747330961, |
| "grad_norm": 0.8232077956199646, |
| "learning_rate": 4.073907028770671e-06, |
| "loss": 0.5986703395843506, |
| "step": 7950 |
| }, |
| { |
| "epoch": 1.416370106761566, |
| "grad_norm": 0.9507026672363281, |
| "learning_rate": 4.051009662022559e-06, |
| "loss": 0.6164099693298339, |
| "step": 7960 |
| }, |
| { |
| "epoch": 1.4181494661921707, |
| "grad_norm": 0.9032939672470093, |
| "learning_rate": 4.028160468371618e-06, |
| "loss": 0.5873546600341797, |
| "step": 7970 |
| }, |
| { |
| "epoch": 1.4199288256227758, |
| "grad_norm": 0.7442892789840698, |
| "learning_rate": 4.005359632843598e-06, |
| "loss": 0.6300751209259033, |
| "step": 7980 |
| }, |
| { |
| "epoch": 1.4217081850533808, |
| "grad_norm": 0.870611310005188, |
| "learning_rate": 3.982607340072673e-06, |
| "loss": 0.6464887142181397, |
| "step": 7990 |
| }, |
| { |
| "epoch": 1.4234875444839858, |
| "grad_norm": 0.9346739053726196, |
| "learning_rate": 3.959903774299914e-06, |
| "loss": 0.5978512763977051, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.4234875444839858, |
| "eval_loss": 0.6126887202262878, |
| "eval_runtime": 408.0866, |
| "eval_samples_per_second": 12.323, |
| "eval_steps_per_second": 6.163, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.4252669039145909, |
| "grad_norm": 0.7768172025680542, |
| "learning_rate": 3.937249119371837e-06, |
| "loss": 0.6360022544860839, |
| "step": 8010 |
| }, |
| { |
| "epoch": 1.4270462633451957, |
| "grad_norm": 0.9232711791992188, |
| "learning_rate": 3.914643558738871e-06, |
| "loss": 0.6255642414093018, |
| "step": 8020 |
| }, |
| { |
| "epoch": 1.4288256227758007, |
| "grad_norm": 0.6983018517494202, |
| "learning_rate": 3.892087275453913e-06, |
| "loss": 0.5994945526123047, |
| "step": 8030 |
| }, |
| { |
| "epoch": 1.4306049822064058, |
| "grad_norm": 1.073339819908142, |
| "learning_rate": 3.869580452170813e-06, |
| "loss": 0.6356189250946045, |
| "step": 8040 |
| }, |
| { |
| "epoch": 1.4323843416370106, |
| "grad_norm": 0.6503099799156189, |
| "learning_rate": 3.847123271142921e-06, |
| "loss": 0.6280940055847168, |
| "step": 8050 |
| }, |
| { |
| "epoch": 1.4341637010676156, |
| "grad_norm": 0.7652536630630493, |
| "learning_rate": 3.824715914221593e-06, |
| "loss": 0.633913803100586, |
| "step": 8060 |
| }, |
| { |
| "epoch": 1.4359430604982206, |
| "grad_norm": 0.7776927351951599, |
| "learning_rate": 3.802358562854719e-06, |
| "loss": 0.6014345169067383, |
| "step": 8070 |
| }, |
| { |
| "epoch": 1.4377224199288257, |
| "grad_norm": 0.9608566164970398, |
| "learning_rate": 3.780051398085274e-06, |
| "loss": 0.6504060745239257, |
| "step": 8080 |
| }, |
| { |
| "epoch": 1.4395017793594307, |
| "grad_norm": 1.0493940114974976, |
| "learning_rate": 3.7577946005498224e-06, |
| "loss": 0.6023014545440674, |
| "step": 8090 |
| }, |
| { |
| "epoch": 1.4412811387900355, |
| "grad_norm": 0.6732901930809021, |
| "learning_rate": 3.735588350477083e-06, |
| "loss": 0.618662166595459, |
| "step": 8100 |
| }, |
| { |
| "epoch": 1.4430604982206405, |
| "grad_norm": 0.7948004007339478, |
| "learning_rate": 3.7134328276864474e-06, |
| "loss": 0.6384446144104003, |
| "step": 8110 |
| }, |
| { |
| "epoch": 1.4448398576512456, |
| "grad_norm": 0.8854271769523621, |
| "learning_rate": 3.691328211586537e-06, |
| "loss": 0.6317539215087891, |
| "step": 8120 |
| }, |
| { |
| "epoch": 1.4466192170818506, |
| "grad_norm": 0.849312961101532, |
| "learning_rate": 3.669274681173741e-06, |
| "loss": 0.632611894607544, |
| "step": 8130 |
| }, |
| { |
| "epoch": 1.4483985765124556, |
| "grad_norm": 0.8737841844558716, |
| "learning_rate": 3.647272415030787e-06, |
| "loss": 0.6378211498260498, |
| "step": 8140 |
| }, |
| { |
| "epoch": 1.4501779359430604, |
| "grad_norm": 0.9041767716407776, |
| "learning_rate": 3.6253215913252614e-06, |
| "loss": 0.6225557804107666, |
| "step": 8150 |
| }, |
| { |
| "epoch": 1.4519572953736655, |
| "grad_norm": 0.9240919947624207, |
| "learning_rate": 3.603422387808203e-06, |
| "loss": 0.6011790752410888, |
| "step": 8160 |
| }, |
| { |
| "epoch": 1.4537366548042705, |
| "grad_norm": 0.7381271719932556, |
| "learning_rate": 3.5815749818126298e-06, |
| "loss": 0.6100322723388671, |
| "step": 8170 |
| }, |
| { |
| "epoch": 1.4555160142348753, |
| "grad_norm": 0.766522228717804, |
| "learning_rate": 3.559779550252135e-06, |
| "loss": 0.6223373413085938, |
| "step": 8180 |
| }, |
| { |
| "epoch": 1.4572953736654806, |
| "grad_norm": 0.722597599029541, |
| "learning_rate": 3.5380362696194246e-06, |
| "loss": 0.6096580982208252, |
| "step": 8190 |
| }, |
| { |
| "epoch": 1.4590747330960854, |
| "grad_norm": 0.848345935344696, |
| "learning_rate": 3.5163453159849158e-06, |
| "loss": 0.5932037830352783, |
| "step": 8200 |
| }, |
| { |
| "epoch": 1.4608540925266904, |
| "grad_norm": 0.8649978637695312, |
| "learning_rate": 3.4947068649952875e-06, |
| "loss": 0.616939926147461, |
| "step": 8210 |
| }, |
| { |
| "epoch": 1.4626334519572954, |
| "grad_norm": 1.0290734767913818, |
| "learning_rate": 3.473121091872068e-06, |
| "loss": 0.5814998149871826, |
| "step": 8220 |
| }, |
| { |
| "epoch": 1.4644128113879002, |
| "grad_norm": 1.0387070178985596, |
| "learning_rate": 3.4515881714102283e-06, |
| "loss": 0.602289867401123, |
| "step": 8230 |
| }, |
| { |
| "epoch": 1.4661921708185053, |
| "grad_norm": 0.7532253861427307, |
| "learning_rate": 3.4301082779767394e-06, |
| "loss": 0.6335249423980713, |
| "step": 8240 |
| }, |
| { |
| "epoch": 1.4679715302491103, |
| "grad_norm": 0.8629032373428345, |
| "learning_rate": 3.4086815855091906e-06, |
| "loss": 0.6227278709411621, |
| "step": 8250 |
| }, |
| { |
| "epoch": 1.4697508896797153, |
| "grad_norm": 0.9435595870018005, |
| "learning_rate": 3.3873082675143533e-06, |
| "loss": 0.6437982559204102, |
| "step": 8260 |
| }, |
| { |
| "epoch": 1.4715302491103204, |
| "grad_norm": 0.9069509506225586, |
| "learning_rate": 3.3659884970667955e-06, |
| "loss": 0.5948871612548828, |
| "step": 8270 |
| }, |
| { |
| "epoch": 1.4733096085409252, |
| "grad_norm": 0.7571467757225037, |
| "learning_rate": 3.344722446807469e-06, |
| "loss": 0.6302636623382568, |
| "step": 8280 |
| }, |
| { |
| "epoch": 1.4750889679715302, |
| "grad_norm": 0.7709717154502869, |
| "learning_rate": 3.3235102889423263e-06, |
| "loss": 0.6287346839904785, |
| "step": 8290 |
| }, |
| { |
| "epoch": 1.4768683274021353, |
| "grad_norm": 0.9050244688987732, |
| "learning_rate": 3.302352195240901e-06, |
| "loss": 0.6431692123413086, |
| "step": 8300 |
| }, |
| { |
| "epoch": 1.4786476868327403, |
| "grad_norm": 0.8429788947105408, |
| "learning_rate": 3.281248337034947e-06, |
| "loss": 0.6204410076141358, |
| "step": 8310 |
| }, |
| { |
| "epoch": 1.4804270462633453, |
| "grad_norm": 0.7680916786193848, |
| "learning_rate": 3.2601988852170207e-06, |
| "loss": 0.6185726642608642, |
| "step": 8320 |
| }, |
| { |
| "epoch": 1.4822064056939501, |
| "grad_norm": 0.7400604486465454, |
| "learning_rate": 3.2392040102391278e-06, |
| "loss": 0.711566686630249, |
| "step": 8330 |
| }, |
| { |
| "epoch": 1.4839857651245552, |
| "grad_norm": 0.7004432082176208, |
| "learning_rate": 3.2182638821113156e-06, |
| "loss": 0.5973163604736328, |
| "step": 8340 |
| }, |
| { |
| "epoch": 1.4857651245551602, |
| "grad_norm": 0.8438174724578857, |
| "learning_rate": 3.1973786704003086e-06, |
| "loss": 0.6273365497589112, |
| "step": 8350 |
| }, |
| { |
| "epoch": 1.487544483985765, |
| "grad_norm": 0.9420303702354431, |
| "learning_rate": 3.1765485442281453e-06, |
| "loss": 0.6080061435699463, |
| "step": 8360 |
| }, |
| { |
| "epoch": 1.48932384341637, |
| "grad_norm": 0.8402919769287109, |
| "learning_rate": 3.1557736722707843e-06, |
| "loss": 0.6027824401855468, |
| "step": 8370 |
| }, |
| { |
| "epoch": 1.491103202846975, |
| "grad_norm": 1.0150177478790283, |
| "learning_rate": 3.1350542227567693e-06, |
| "loss": 0.629277515411377, |
| "step": 8380 |
| }, |
| { |
| "epoch": 1.49288256227758, |
| "grad_norm": 0.8253095149993896, |
| "learning_rate": 3.1143903634658314e-06, |
| "loss": 0.6055563926696778, |
| "step": 8390 |
| }, |
| { |
| "epoch": 1.4946619217081851, |
| "grad_norm": 0.7219249606132507, |
| "learning_rate": 3.093782261727567e-06, |
| "loss": 0.6305870532989502, |
| "step": 8400 |
| }, |
| { |
| "epoch": 1.49644128113879, |
| "grad_norm": 0.7794970870018005, |
| "learning_rate": 3.073230084420051e-06, |
| "loss": 0.6157556533813476, |
| "step": 8410 |
| }, |
| { |
| "epoch": 1.498220640569395, |
| "grad_norm": 0.8257132172584534, |
| "learning_rate": 3.0527339979685068e-06, |
| "loss": 0.628666877746582, |
| "step": 8420 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.7853142619132996, |
| "learning_rate": 3.0322941683439455e-06, |
| "loss": 0.6387495040893555, |
| "step": 8430 |
| }, |
| { |
| "epoch": 1.501779359430605, |
| "grad_norm": 0.9834029674530029, |
| "learning_rate": 3.011910761061837e-06, |
| "loss": 0.6235998153686524, |
| "step": 8440 |
| }, |
| { |
| "epoch": 1.50355871886121, |
| "grad_norm": 0.7815040349960327, |
| "learning_rate": 2.99158394118075e-06, |
| "loss": 0.6229785919189453, |
| "step": 8450 |
| }, |
| { |
| "epoch": 1.5053380782918149, |
| "grad_norm": 0.6993287801742554, |
| "learning_rate": 2.9713138733010373e-06, |
| "loss": 0.6498080730438233, |
| "step": 8460 |
| }, |
| { |
| "epoch": 1.50711743772242, |
| "grad_norm": 0.8505419492721558, |
| "learning_rate": 2.951100721563479e-06, |
| "loss": 0.6163151264190674, |
| "step": 8470 |
| }, |
| { |
| "epoch": 1.508896797153025, |
| "grad_norm": 0.7740962505340576, |
| "learning_rate": 2.930944649647971e-06, |
| "loss": 0.6463754177093506, |
| "step": 8480 |
| }, |
| { |
| "epoch": 1.5106761565836297, |
| "grad_norm": 0.9081360101699829, |
| "learning_rate": 2.9108458207722013e-06, |
| "loss": 0.5963332653045654, |
| "step": 8490 |
| }, |
| { |
| "epoch": 1.512455516014235, |
| "grad_norm": 0.7486013770103455, |
| "learning_rate": 2.8908043976903065e-06, |
| "loss": 0.5907905101776123, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.512455516014235, |
| "eval_loss": 0.6111557483673096, |
| "eval_runtime": 408.1089, |
| "eval_samples_per_second": 12.323, |
| "eval_steps_per_second": 6.163, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.5142348754448398, |
| "grad_norm": 0.6754533648490906, |
| "learning_rate": 2.8708205426915858e-06, |
| "loss": 0.5735606670379638, |
| "step": 8510 |
| }, |
| { |
| "epoch": 1.5160142348754448, |
| "grad_norm": 0.9227625727653503, |
| "learning_rate": 2.850894417599154e-06, |
| "loss": 0.586240005493164, |
| "step": 8520 |
| }, |
| { |
| "epoch": 1.5177935943060499, |
| "grad_norm": 1.1501375436782837, |
| "learning_rate": 2.8310261837686594e-06, |
| "loss": 0.5986839771270752, |
| "step": 8530 |
| }, |
| { |
| "epoch": 1.5195729537366547, |
| "grad_norm": 0.8052434325218201, |
| "learning_rate": 2.811216002086954e-06, |
| "loss": 0.6387444972991944, |
| "step": 8540 |
| }, |
| { |
| "epoch": 1.52135231316726, |
| "grad_norm": 0.9873703718185425, |
| "learning_rate": 2.791464032970812e-06, |
| "loss": 0.6114506244659423, |
| "step": 8550 |
| }, |
| { |
| "epoch": 1.5231316725978647, |
| "grad_norm": 0.8305763006210327, |
| "learning_rate": 2.771770436365612e-06, |
| "loss": 0.65102219581604, |
| "step": 8560 |
| }, |
| { |
| "epoch": 1.5249110320284698, |
| "grad_norm": 0.9540684223175049, |
| "learning_rate": 2.7521353717440523e-06, |
| "loss": 0.6242643833160401, |
| "step": 8570 |
| }, |
| { |
| "epoch": 1.5266903914590748, |
| "grad_norm": 0.764707624912262, |
| "learning_rate": 2.732558998104855e-06, |
| "loss": 0.6340816020965576, |
| "step": 8580 |
| }, |
| { |
| "epoch": 1.5284697508896796, |
| "grad_norm": 0.8800462484359741, |
| "learning_rate": 2.7130414739714884e-06, |
| "loss": 0.6269487857818603, |
| "step": 8590 |
| }, |
| { |
| "epoch": 1.5302491103202847, |
| "grad_norm": 0.7068182826042175, |
| "learning_rate": 2.6935829573908645e-06, |
| "loss": 0.6376915454864502, |
| "step": 8600 |
| }, |
| { |
| "epoch": 1.5320284697508897, |
| "grad_norm": 0.9243912100791931, |
| "learning_rate": 2.6741836059320813e-06, |
| "loss": 0.6650017261505127, |
| "step": 8610 |
| }, |
| { |
| "epoch": 1.5338078291814945, |
| "grad_norm": 0.8034710884094238, |
| "learning_rate": 2.654843576685129e-06, |
| "loss": 0.6140787601470947, |
| "step": 8620 |
| }, |
| { |
| "epoch": 1.5355871886120998, |
| "grad_norm": 0.6924305558204651, |
| "learning_rate": 2.635563026259622e-06, |
| "loss": 0.592177438735962, |
| "step": 8630 |
| }, |
| { |
| "epoch": 1.5373665480427046, |
| "grad_norm": 0.9960291385650635, |
| "learning_rate": 2.616342110783544e-06, |
| "loss": 0.620767879486084, |
| "step": 8640 |
| }, |
| { |
| "epoch": 1.5391459074733096, |
| "grad_norm": 0.9700925350189209, |
| "learning_rate": 2.5971809859019616e-06, |
| "loss": 0.6059544563293457, |
| "step": 8650 |
| }, |
| { |
| "epoch": 1.5409252669039146, |
| "grad_norm": 0.8979527354240417, |
| "learning_rate": 2.578079806775786e-06, |
| "loss": 0.6165768146514893, |
| "step": 8660 |
| }, |
| { |
| "epoch": 1.5427046263345194, |
| "grad_norm": 0.7694889903068542, |
| "learning_rate": 2.559038728080495e-06, |
| "loss": 0.6269434452056885, |
| "step": 8670 |
| }, |
| { |
| "epoch": 1.5444839857651247, |
| "grad_norm": 0.7812192440032959, |
| "learning_rate": 2.5400579040049045e-06, |
| "loss": 0.6156674385070801, |
| "step": 8680 |
| }, |
| { |
| "epoch": 1.5462633451957295, |
| "grad_norm": 0.8593633770942688, |
| "learning_rate": 2.521137488249892e-06, |
| "loss": 0.5995303630828858, |
| "step": 8690 |
| }, |
| { |
| "epoch": 1.5480427046263345, |
| "grad_norm": 0.7315478920936584, |
| "learning_rate": 2.5022776340271827e-06, |
| "loss": 0.6255430698394775, |
| "step": 8700 |
| }, |
| { |
| "epoch": 1.5498220640569396, |
| "grad_norm": 0.7979263663291931, |
| "learning_rate": 2.483478494058081e-06, |
| "loss": 0.617695426940918, |
| "step": 8710 |
| }, |
| { |
| "epoch": 1.5516014234875444, |
| "grad_norm": 0.791521430015564, |
| "learning_rate": 2.4647402205722513e-06, |
| "loss": 0.5985545158386231, |
| "step": 8720 |
| }, |
| { |
| "epoch": 1.5533807829181496, |
| "grad_norm": 0.8115281462669373, |
| "learning_rate": 2.446062965306476e-06, |
| "loss": 0.6137414932250976, |
| "step": 8730 |
| }, |
| { |
| "epoch": 1.5551601423487544, |
| "grad_norm": 0.73805832862854, |
| "learning_rate": 2.427446879503439e-06, |
| "loss": 0.6292818069458008, |
| "step": 8740 |
| }, |
| { |
| "epoch": 1.5569395017793595, |
| "grad_norm": 0.7681689262390137, |
| "learning_rate": 2.408892113910485e-06, |
| "loss": 0.6184986114501954, |
| "step": 8750 |
| }, |
| { |
| "epoch": 1.5587188612099645, |
| "grad_norm": 0.8392589688301086, |
| "learning_rate": 2.390398818778403e-06, |
| "loss": 0.6432001113891601, |
| "step": 8760 |
| }, |
| { |
| "epoch": 1.5604982206405693, |
| "grad_norm": 0.7140183448791504, |
| "learning_rate": 2.3719671438602287e-06, |
| "loss": 0.5931034088134766, |
| "step": 8770 |
| }, |
| { |
| "epoch": 1.5622775800711743, |
| "grad_norm": 1.0340561866760254, |
| "learning_rate": 2.353597238409997e-06, |
| "loss": 0.6184981346130372, |
| "step": 8780 |
| }, |
| { |
| "epoch": 1.5640569395017794, |
| "grad_norm": 1.3788442611694336, |
| "learning_rate": 2.3352892511815695e-06, |
| "loss": 0.6254217147827148, |
| "step": 8790 |
| }, |
| { |
| "epoch": 1.5658362989323842, |
| "grad_norm": 0.8685413599014282, |
| "learning_rate": 2.3170433304274e-06, |
| "loss": 0.6440535545349121, |
| "step": 8800 |
| }, |
| { |
| "epoch": 1.5676156583629894, |
| "grad_norm": 0.8243607878684998, |
| "learning_rate": 2.298859623897357e-06, |
| "loss": 0.6024580001831055, |
| "step": 8810 |
| }, |
| { |
| "epoch": 1.5693950177935942, |
| "grad_norm": 0.9618055820465088, |
| "learning_rate": 2.280738278837508e-06, |
| "loss": 0.6213048934936524, |
| "step": 8820 |
| }, |
| { |
| "epoch": 1.5711743772241993, |
| "grad_norm": 0.8591263294219971, |
| "learning_rate": 2.2626794419889463e-06, |
| "loss": 0.6279497146606445, |
| "step": 8830 |
| }, |
| { |
| "epoch": 1.5729537366548043, |
| "grad_norm": 0.8621962070465088, |
| "learning_rate": 2.2446832595865833e-06, |
| "loss": 0.5932358741760254, |
| "step": 8840 |
| }, |
| { |
| "epoch": 1.5747330960854091, |
| "grad_norm": 0.9073888659477234, |
| "learning_rate": 2.226749877357983e-06, |
| "loss": 0.6451629161834717, |
| "step": 8850 |
| }, |
| { |
| "epoch": 1.5765124555160144, |
| "grad_norm": 0.9106960892677307, |
| "learning_rate": 2.208879440522167e-06, |
| "loss": 0.6058315753936767, |
| "step": 8860 |
| }, |
| { |
| "epoch": 1.5782918149466192, |
| "grad_norm": 0.7805183529853821, |
| "learning_rate": 2.1910720937884432e-06, |
| "loss": 0.6308177947998047, |
| "step": 8870 |
| }, |
| { |
| "epoch": 1.5800711743772242, |
| "grad_norm": 0.8798645734786987, |
| "learning_rate": 2.1733279813552386e-06, |
| "loss": 0.6120688438415527, |
| "step": 8880 |
| }, |
| { |
| "epoch": 1.5818505338078293, |
| "grad_norm": 0.7852058410644531, |
| "learning_rate": 2.1556472469089305e-06, |
| "loss": 0.6294644832611084, |
| "step": 8890 |
| }, |
| { |
| "epoch": 1.583629893238434, |
| "grad_norm": 1.0219141244888306, |
| "learning_rate": 2.1380300336226756e-06, |
| "loss": 0.6379860877990723, |
| "step": 8900 |
| }, |
| { |
| "epoch": 1.585409252669039, |
| "grad_norm": 0.8301982879638672, |
| "learning_rate": 2.120476484155255e-06, |
| "loss": 0.5950196743011474, |
| "step": 8910 |
| }, |
| { |
| "epoch": 1.5871886120996441, |
| "grad_norm": 0.781911313533783, |
| "learning_rate": 2.102986740649928e-06, |
| "loss": 0.6155390739440918, |
| "step": 8920 |
| }, |
| { |
| "epoch": 1.5889679715302492, |
| "grad_norm": 0.8553647994995117, |
| "learning_rate": 2.0855609447332635e-06, |
| "loss": 0.6030520439147949, |
| "step": 8930 |
| }, |
| { |
| "epoch": 1.5907473309608542, |
| "grad_norm": 0.8010233640670776, |
| "learning_rate": 2.06819923751401e-06, |
| "loss": 0.5782717227935791, |
| "step": 8940 |
| }, |
| { |
| "epoch": 1.592526690391459, |
| "grad_norm": 0.7767868041992188, |
| "learning_rate": 2.050901759581937e-06, |
| "loss": 0.5862733364105225, |
| "step": 8950 |
| }, |
| { |
| "epoch": 1.594306049822064, |
| "grad_norm": 0.894256591796875, |
| "learning_rate": 2.033668651006715e-06, |
| "loss": 0.6170249938964844, |
| "step": 8960 |
| }, |
| { |
| "epoch": 1.596085409252669, |
| "grad_norm": 0.8816112875938416, |
| "learning_rate": 2.0165000513367604e-06, |
| "loss": 0.5960803508758545, |
| "step": 8970 |
| }, |
| { |
| "epoch": 1.5978647686832739, |
| "grad_norm": 0.7646706700325012, |
| "learning_rate": 1.9993960995981287e-06, |
| "loss": 0.609787130355835, |
| "step": 8980 |
| }, |
| { |
| "epoch": 1.5996441281138791, |
| "grad_norm": 0.8380435109138489, |
| "learning_rate": 1.9823569342933624e-06, |
| "loss": 0.5809425354003906, |
| "step": 8990 |
| }, |
| { |
| "epoch": 1.601423487544484, |
| "grad_norm": 0.7763661742210388, |
| "learning_rate": 1.965382693400396e-06, |
| "loss": 0.6282608032226562, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.601423487544484, |
| "eval_loss": 0.6098406314849854, |
| "eval_runtime": 407.9599, |
| "eval_samples_per_second": 12.327, |
| "eval_steps_per_second": 6.165, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.603202846975089, |
| "grad_norm": 0.7980552315711975, |
| "learning_rate": 1.9484735143714184e-06, |
| "loss": 0.5892675876617431, |
| "step": 9010 |
| }, |
| { |
| "epoch": 1.604982206405694, |
| "grad_norm": 0.7350971102714539, |
| "learning_rate": 1.931629534131769e-06, |
| "loss": 0.615026330947876, |
| "step": 9020 |
| }, |
| { |
| "epoch": 1.6067615658362988, |
| "grad_norm": 0.9350213408470154, |
| "learning_rate": 1.9148508890788263e-06, |
| "loss": 0.5945257663726806, |
| "step": 9030 |
| }, |
| { |
| "epoch": 1.608540925266904, |
| "grad_norm": 0.8562370538711548, |
| "learning_rate": 1.8981377150809111e-06, |
| "loss": 0.6524335384368897, |
| "step": 9040 |
| }, |
| { |
| "epoch": 1.6103202846975089, |
| "grad_norm": 0.8481477499008179, |
| "learning_rate": 1.8814901474761704e-06, |
| "loss": 0.572668981552124, |
| "step": 9050 |
| }, |
| { |
| "epoch": 1.612099644128114, |
| "grad_norm": 0.8405110239982605, |
| "learning_rate": 1.8649083210714946e-06, |
| "loss": 0.6025730609893799, |
| "step": 9060 |
| }, |
| { |
| "epoch": 1.613879003558719, |
| "grad_norm": 0.6442246437072754, |
| "learning_rate": 1.8483923701414274e-06, |
| "loss": 0.619115161895752, |
| "step": 9070 |
| }, |
| { |
| "epoch": 1.6156583629893237, |
| "grad_norm": 0.9033611416816711, |
| "learning_rate": 1.8319424284270638e-06, |
| "loss": 0.6313360214233399, |
| "step": 9080 |
| }, |
| { |
| "epoch": 1.6174377224199288, |
| "grad_norm": 0.8982630968093872, |
| "learning_rate": 1.8155586291349836e-06, |
| "loss": 0.6707229137420654, |
| "step": 9090 |
| }, |
| { |
| "epoch": 1.6192170818505338, |
| "grad_norm": 0.8548517823219299, |
| "learning_rate": 1.7992411049361612e-06, |
| "loss": 0.5913959503173828, |
| "step": 9100 |
| }, |
| { |
| "epoch": 1.6209964412811388, |
| "grad_norm": 0.8158797025680542, |
| "learning_rate": 1.7829899879649005e-06, |
| "loss": 0.6455716133117676, |
| "step": 9110 |
| }, |
| { |
| "epoch": 1.6227758007117439, |
| "grad_norm": 1.0375958681106567, |
| "learning_rate": 1.7668054098177512e-06, |
| "loss": 0.6378812789916992, |
| "step": 9120 |
| }, |
| { |
| "epoch": 1.6245551601423487, |
| "grad_norm": 0.9441247582435608, |
| "learning_rate": 1.7506875015524649e-06, |
| "loss": 0.6197398662567138, |
| "step": 9130 |
| }, |
| { |
| "epoch": 1.6263345195729537, |
| "grad_norm": 0.7956865429878235, |
| "learning_rate": 1.7346363936869082e-06, |
| "loss": 0.6085912704467773, |
| "step": 9140 |
| }, |
| { |
| "epoch": 1.6281138790035588, |
| "grad_norm": 0.7922634482383728, |
| "learning_rate": 1.7186522161980279e-06, |
| "loss": 0.6055495738983154, |
| "step": 9150 |
| }, |
| { |
| "epoch": 1.6298932384341636, |
| "grad_norm": 0.9479956030845642, |
| "learning_rate": 1.7027350985207847e-06, |
| "loss": 0.5977864265441895, |
| "step": 9160 |
| }, |
| { |
| "epoch": 1.6316725978647688, |
| "grad_norm": 0.8339085578918457, |
| "learning_rate": 1.68688516954711e-06, |
| "loss": 0.6118191242218017, |
| "step": 9170 |
| }, |
| { |
| "epoch": 1.6334519572953736, |
| "grad_norm": 0.8879317045211792, |
| "learning_rate": 1.6711025576248586e-06, |
| "loss": 0.607643461227417, |
| "step": 9180 |
| }, |
| { |
| "epoch": 1.6352313167259787, |
| "grad_norm": 0.7563620805740356, |
| "learning_rate": 1.655387390556782e-06, |
| "loss": 0.566606092453003, |
| "step": 9190 |
| }, |
| { |
| "epoch": 1.6370106761565837, |
| "grad_norm": 0.8990421295166016, |
| "learning_rate": 1.6397397955994742e-06, |
| "loss": 0.6038555145263672, |
| "step": 9200 |
| }, |
| { |
| "epoch": 1.6387900355871885, |
| "grad_norm": 0.8692913055419922, |
| "learning_rate": 1.624159899462353e-06, |
| "loss": 0.6045926570892334, |
| "step": 9210 |
| }, |
| { |
| "epoch": 1.6405693950177938, |
| "grad_norm": 0.7604990005493164, |
| "learning_rate": 1.6086478283066386e-06, |
| "loss": 0.6289362907409668, |
| "step": 9220 |
| }, |
| { |
| "epoch": 1.6423487544483986, |
| "grad_norm": 0.8552067875862122, |
| "learning_rate": 1.5932037077443163e-06, |
| "loss": 0.5995774269104004, |
| "step": 9230 |
| }, |
| { |
| "epoch": 1.6441281138790036, |
| "grad_norm": 0.9507073760032654, |
| "learning_rate": 1.577827662837136e-06, |
| "loss": 0.6592823028564453, |
| "step": 9240 |
| }, |
| { |
| "epoch": 1.6459074733096086, |
| "grad_norm": 0.7741204500198364, |
| "learning_rate": 1.5625198180955836e-06, |
| "loss": 0.5817698001861572, |
| "step": 9250 |
| }, |
| { |
| "epoch": 1.6476868327402134, |
| "grad_norm": 0.906514048576355, |
| "learning_rate": 1.5472802974778911e-06, |
| "loss": 0.6105581760406494, |
| "step": 9260 |
| }, |
| { |
| "epoch": 1.6494661921708185, |
| "grad_norm": 1.1253999471664429, |
| "learning_rate": 1.5321092243890112e-06, |
| "loss": 0.6198730945587159, |
| "step": 9270 |
| }, |
| { |
| "epoch": 1.6512455516014235, |
| "grad_norm": 0.7494739294052124, |
| "learning_rate": 1.5170067216796425e-06, |
| "loss": 0.5921574115753174, |
| "step": 9280 |
| }, |
| { |
| "epoch": 1.6530249110320283, |
| "grad_norm": 0.674996018409729, |
| "learning_rate": 1.5019729116452086e-06, |
| "loss": 0.6231479644775391, |
| "step": 9290 |
| }, |
| { |
| "epoch": 1.6548042704626336, |
| "grad_norm": 0.8674888610839844, |
| "learning_rate": 1.4870079160248918e-06, |
| "loss": 0.5954656600952148, |
| "step": 9300 |
| }, |
| { |
| "epoch": 1.6565836298932384, |
| "grad_norm": 0.8911182284355164, |
| "learning_rate": 1.4721118560006298e-06, |
| "loss": 0.5914658069610595, |
| "step": 9310 |
| }, |
| { |
| "epoch": 1.6583629893238434, |
| "grad_norm": 0.9357933402061462, |
| "learning_rate": 1.4572848521961414e-06, |
| "loss": 0.6310736656188964, |
| "step": 9320 |
| }, |
| { |
| "epoch": 1.6601423487544484, |
| "grad_norm": 0.936082124710083, |
| "learning_rate": 1.4425270246759549e-06, |
| "loss": 0.5978594303131104, |
| "step": 9330 |
| }, |
| { |
| "epoch": 1.6619217081850532, |
| "grad_norm": 0.8404752016067505, |
| "learning_rate": 1.4278384929444233e-06, |
| "loss": 0.5916398048400879, |
| "step": 9340 |
| }, |
| { |
| "epoch": 1.6637010676156585, |
| "grad_norm": 0.6900134086608887, |
| "learning_rate": 1.4132193759447665e-06, |
| "loss": 0.6098292350769043, |
| "step": 9350 |
| }, |
| { |
| "epoch": 1.6654804270462633, |
| "grad_norm": 0.8345168828964233, |
| "learning_rate": 1.398669792058105e-06, |
| "loss": 0.6169853687286377, |
| "step": 9360 |
| }, |
| { |
| "epoch": 1.6672597864768683, |
| "grad_norm": 0.8186418414115906, |
| "learning_rate": 1.384189859102505e-06, |
| "loss": 0.6554735660552978, |
| "step": 9370 |
| }, |
| { |
| "epoch": 1.6690391459074734, |
| "grad_norm": 0.8094502687454224, |
| "learning_rate": 1.369779694332012e-06, |
| "loss": 0.57339506149292, |
| "step": 9380 |
| }, |
| { |
| "epoch": 1.6708185053380782, |
| "grad_norm": 0.8150178790092468, |
| "learning_rate": 1.3554394144357218e-06, |
| "loss": 0.5624317169189453, |
| "step": 9390 |
| }, |
| { |
| "epoch": 1.6725978647686834, |
| "grad_norm": 0.7492024898529053, |
| "learning_rate": 1.341169135536814e-06, |
| "loss": 0.619264554977417, |
| "step": 9400 |
| }, |
| { |
| "epoch": 1.6743772241992882, |
| "grad_norm": 0.7049137949943542, |
| "learning_rate": 1.326968973191628e-06, |
| "loss": 0.5918323040008545, |
| "step": 9410 |
| }, |
| { |
| "epoch": 1.6761565836298933, |
| "grad_norm": 0.8279508948326111, |
| "learning_rate": 1.312839042388716e-06, |
| "loss": 0.6143953800201416, |
| "step": 9420 |
| }, |
| { |
| "epoch": 1.6779359430604983, |
| "grad_norm": 1.0093584060668945, |
| "learning_rate": 1.2987794575479218e-06, |
| "loss": 0.6140541076660156, |
| "step": 9430 |
| }, |
| { |
| "epoch": 1.6797153024911031, |
| "grad_norm": 0.9923004508018494, |
| "learning_rate": 1.2847903325194455e-06, |
| "loss": 0.6000154495239258, |
| "step": 9440 |
| }, |
| { |
| "epoch": 1.6814946619217082, |
| "grad_norm": 0.9289199113845825, |
| "learning_rate": 1.270871780582924e-06, |
| "loss": 0.6113490104675293, |
| "step": 9450 |
| }, |
| { |
| "epoch": 1.6832740213523132, |
| "grad_norm": 0.6759699583053589, |
| "learning_rate": 1.2570239144465212e-06, |
| "loss": 0.6019508838653564, |
| "step": 9460 |
| }, |
| { |
| "epoch": 1.685053380782918, |
| "grad_norm": 0.8999039530754089, |
| "learning_rate": 1.2432468462460024e-06, |
| "loss": 0.6267662048339844, |
| "step": 9470 |
| }, |
| { |
| "epoch": 1.6868327402135233, |
| "grad_norm": 0.9530540704727173, |
| "learning_rate": 1.2295406875438377e-06, |
| "loss": 0.5737581729888916, |
| "step": 9480 |
| }, |
| { |
| "epoch": 1.688612099644128, |
| "grad_norm": 0.9010872840881348, |
| "learning_rate": 1.2159055493282911e-06, |
| "loss": 0.6142421722412109, |
| "step": 9490 |
| }, |
| { |
| "epoch": 1.690391459074733, |
| "grad_norm": 0.9528436660766602, |
| "learning_rate": 1.2023415420125262e-06, |
| "loss": 0.5961336612701416, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.690391459074733, |
| "eval_loss": 0.6090449094772339, |
| "eval_runtime": 409.4467, |
| "eval_samples_per_second": 12.282, |
| "eval_steps_per_second": 6.142, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.6921708185053381, |
| "grad_norm": 0.863777220249176, |
| "learning_rate": 1.1888487754337052e-06, |
| "loss": 0.6345890045166016, |
| "step": 9510 |
| }, |
| { |
| "epoch": 1.693950177935943, |
| "grad_norm": 1.0439646244049072, |
| "learning_rate": 1.1754273588521149e-06, |
| "loss": 0.6450316429138183, |
| "step": 9520 |
| }, |
| { |
| "epoch": 1.6957295373665482, |
| "grad_norm": 0.8258497714996338, |
| "learning_rate": 1.162077400950261e-06, |
| "loss": 0.6218976497650146, |
| "step": 9530 |
| }, |
| { |
| "epoch": 1.697508896797153, |
| "grad_norm": 0.8564954996109009, |
| "learning_rate": 1.148799009832009e-06, |
| "loss": 0.6231951713562012, |
| "step": 9540 |
| }, |
| { |
| "epoch": 1.699288256227758, |
| "grad_norm": 0.7925037741661072, |
| "learning_rate": 1.1355922930216867e-06, |
| "loss": 0.5838134765625, |
| "step": 9550 |
| }, |
| { |
| "epoch": 1.701067615658363, |
| "grad_norm": 0.8268294930458069, |
| "learning_rate": 1.122457357463238e-06, |
| "loss": 0.6027390480041503, |
| "step": 9560 |
| }, |
| { |
| "epoch": 1.7028469750889679, |
| "grad_norm": 0.9756558537483215, |
| "learning_rate": 1.1093943095193328e-06, |
| "loss": 0.6181758403778076, |
| "step": 9570 |
| }, |
| { |
| "epoch": 1.704626334519573, |
| "grad_norm": 0.8576071262359619, |
| "learning_rate": 1.096403254970526e-06, |
| "loss": 0.6374680519104003, |
| "step": 9580 |
| }, |
| { |
| "epoch": 1.706405693950178, |
| "grad_norm": 0.9196457266807556, |
| "learning_rate": 1.083484299014389e-06, |
| "loss": 0.6039985179901123, |
| "step": 9590 |
| }, |
| { |
| "epoch": 1.708185053380783, |
| "grad_norm": 0.9652523994445801, |
| "learning_rate": 1.0706375462646557e-06, |
| "loss": 0.6314795970916748, |
| "step": 9600 |
| }, |
| { |
| "epoch": 1.709964412811388, |
| "grad_norm": 0.9042948484420776, |
| "learning_rate": 1.0578631007503914e-06, |
| "loss": 0.6144063472747803, |
| "step": 9610 |
| }, |
| { |
| "epoch": 1.7117437722419928, |
| "grad_norm": 1.0348018407821655, |
| "learning_rate": 1.045161065915129e-06, |
| "loss": 0.6448927879333496, |
| "step": 9620 |
| }, |
| { |
| "epoch": 1.7135231316725978, |
| "grad_norm": 0.9222016334533691, |
| "learning_rate": 1.03253154461605e-06, |
| "loss": 0.6264961719512939, |
| "step": 9630 |
| }, |
| { |
| "epoch": 1.7153024911032029, |
| "grad_norm": 0.8399068117141724, |
| "learning_rate": 1.019974639123138e-06, |
| "loss": 0.6070163726806641, |
| "step": 9640 |
| }, |
| { |
| "epoch": 1.7170818505338077, |
| "grad_norm": 0.8186811208724976, |
| "learning_rate": 1.0074904511183592e-06, |
| "loss": 0.6530916690826416, |
| "step": 9650 |
| }, |
| { |
| "epoch": 1.718861209964413, |
| "grad_norm": 0.8317114114761353, |
| "learning_rate": 9.95079081694832e-07, |
| "loss": 0.6258822917938233, |
| "step": 9660 |
| }, |
| { |
| "epoch": 1.7206405693950177, |
| "grad_norm": 0.8472376465797424, |
| "learning_rate": 9.827406313560194e-07, |
| "loss": 0.6343019485473633, |
| "step": 9670 |
| }, |
| { |
| "epoch": 1.7224199288256228, |
| "grad_norm": 0.7907516956329346, |
| "learning_rate": 9.704752000148998e-07, |
| "loss": 0.5924992561340332, |
| "step": 9680 |
| }, |
| { |
| "epoch": 1.7241992882562278, |
| "grad_norm": 0.8266814351081848, |
| "learning_rate": 9.582828869931749e-07, |
| "loss": 0.63353590965271, |
| "step": 9690 |
| }, |
| { |
| "epoch": 1.7259786476868326, |
| "grad_norm": 0.8828052878379822, |
| "learning_rate": 9.461637910204468e-07, |
| "loss": 0.5913454532623291, |
| "step": 9700 |
| }, |
| { |
| "epoch": 1.7277580071174379, |
| "grad_norm": 0.795305609703064, |
| "learning_rate": 9.341180102334391e-07, |
| "loss": 0.625618314743042, |
| "step": 9710 |
| }, |
| { |
| "epoch": 1.7295373665480427, |
| "grad_norm": 0.8134214878082275, |
| "learning_rate": 9.221456421751828e-07, |
| "loss": 0.6241415023803711, |
| "step": 9720 |
| }, |
| { |
| "epoch": 1.7313167259786477, |
| "grad_norm": 0.8993518948554993, |
| "learning_rate": 9.102467837942364e-07, |
| "loss": 0.6320163249969483, |
| "step": 9730 |
| }, |
| { |
| "epoch": 1.7330960854092528, |
| "grad_norm": 0.8220566511154175, |
| "learning_rate": 8.984215314439071e-07, |
| "loss": 0.5945559501647949, |
| "step": 9740 |
| }, |
| { |
| "epoch": 1.7348754448398576, |
| "grad_norm": 0.8112704753875732, |
| "learning_rate": 8.866699808814516e-07, |
| "loss": 0.5934808731079102, |
| "step": 9750 |
| }, |
| { |
| "epoch": 1.7366548042704626, |
| "grad_norm": 0.9084371328353882, |
| "learning_rate": 8.749922272673228e-07, |
| "loss": 0.614768123626709, |
| "step": 9760 |
| }, |
| { |
| "epoch": 1.7384341637010676, |
| "grad_norm": 1.0116448402404785, |
| "learning_rate": 8.633883651643815e-07, |
| "loss": 0.6284510612487793, |
| "step": 9770 |
| }, |
| { |
| "epoch": 1.7402135231316724, |
| "grad_norm": 0.7356001734733582, |
| "learning_rate": 8.518584885371417e-07, |
| "loss": 0.5880512237548828, |
| "step": 9780 |
| }, |
| { |
| "epoch": 1.7419928825622777, |
| "grad_norm": 1.018543004989624, |
| "learning_rate": 8.404026907510043e-07, |
| "loss": 0.6093903064727784, |
| "step": 9790 |
| }, |
| { |
| "epoch": 1.7437722419928825, |
| "grad_norm": 0.7327563166618347, |
| "learning_rate": 8.290210645715002e-07, |
| "loss": 0.6004554748535156, |
| "step": 9800 |
| }, |
| { |
| "epoch": 1.7455516014234875, |
| "grad_norm": 0.8729904294013977, |
| "learning_rate": 8.177137021635439e-07, |
| "loss": 0.6066318035125733, |
| "step": 9810 |
| }, |
| { |
| "epoch": 1.7473309608540926, |
| "grad_norm": 0.9604983925819397, |
| "learning_rate": 8.064806950906867e-07, |
| "loss": 0.617271900177002, |
| "step": 9820 |
| }, |
| { |
| "epoch": 1.7491103202846974, |
| "grad_norm": 0.8511675000190735, |
| "learning_rate": 7.95322134314368e-07, |
| "loss": 0.6143304824829101, |
| "step": 9830 |
| }, |
| { |
| "epoch": 1.7508896797153026, |
| "grad_norm": 1.240357518196106, |
| "learning_rate": 7.842381101931895e-07, |
| "loss": 0.5943353652954102, |
| "step": 9840 |
| }, |
| { |
| "epoch": 1.7526690391459074, |
| "grad_norm": 0.7550404667854309, |
| "learning_rate": 7.732287124821747e-07, |
| "loss": 0.6033238887786865, |
| "step": 9850 |
| }, |
| { |
| "epoch": 1.7544483985765125, |
| "grad_norm": 0.8071495294570923, |
| "learning_rate": 7.622940303320426e-07, |
| "loss": 0.6051639080047607, |
| "step": 9860 |
| }, |
| { |
| "epoch": 1.7562277580071175, |
| "grad_norm": 1.0281059741973877, |
| "learning_rate": 7.514341522884971e-07, |
| "loss": 0.6654407501220703, |
| "step": 9870 |
| }, |
| { |
| "epoch": 1.7580071174377223, |
| "grad_norm": 0.8573912978172302, |
| "learning_rate": 7.406491662914917e-07, |
| "loss": 0.5752899646759033, |
| "step": 9880 |
| }, |
| { |
| "epoch": 1.7597864768683276, |
| "grad_norm": 0.9136864542961121, |
| "learning_rate": 7.299391596745342e-07, |
| "loss": 0.5918805599212646, |
| "step": 9890 |
| }, |
| { |
| "epoch": 1.7615658362989324, |
| "grad_norm": 0.7273184657096863, |
| "learning_rate": 7.193042191639677e-07, |
| "loss": 0.6129786491394043, |
| "step": 9900 |
| }, |
| { |
| "epoch": 1.7633451957295374, |
| "grad_norm": 0.8380533456802368, |
| "learning_rate": 7.087444308782787e-07, |
| "loss": 0.6275459289550781, |
| "step": 9910 |
| }, |
| { |
| "epoch": 1.7651245551601424, |
| "grad_norm": 0.8067348003387451, |
| "learning_rate": 6.982598803273854e-07, |
| "loss": 0.6274401187896729, |
| "step": 9920 |
| }, |
| { |
| "epoch": 1.7669039145907472, |
| "grad_norm": 0.9172580242156982, |
| "learning_rate": 6.878506524119644e-07, |
| "loss": 0.6016909122467041, |
| "step": 9930 |
| }, |
| { |
| "epoch": 1.7686832740213523, |
| "grad_norm": 0.8141267895698547, |
| "learning_rate": 6.775168314227442e-07, |
| "loss": 0.5785222053527832, |
| "step": 9940 |
| }, |
| { |
| "epoch": 1.7704626334519573, |
| "grad_norm": 0.8267232179641724, |
| "learning_rate": 6.672585010398347e-07, |
| "loss": 0.59950852394104, |
| "step": 9950 |
| }, |
| { |
| "epoch": 1.7722419928825621, |
| "grad_norm": 1.015816569328308, |
| "learning_rate": 6.570757443320441e-07, |
| "loss": 0.5815055847167969, |
| "step": 9960 |
| }, |
| { |
| "epoch": 1.7740213523131674, |
| "grad_norm": 0.9419827461242676, |
| "learning_rate": 6.469686437562117e-07, |
| "loss": 0.6218266963958741, |
| "step": 9970 |
| }, |
| { |
| "epoch": 1.7758007117437722, |
| "grad_norm": 0.8901230692863464, |
| "learning_rate": 6.369372811565333e-07, |
| "loss": 0.6288963317871094, |
| "step": 9980 |
| }, |
| { |
| "epoch": 1.7775800711743772, |
| "grad_norm": 0.8378164172172546, |
| "learning_rate": 6.269817377639054e-07, |
| "loss": 0.5956534385681153, |
| "step": 9990 |
| }, |
| { |
| "epoch": 1.7793594306049823, |
| "grad_norm": 0.6490439772605896, |
| "learning_rate": 6.171020941952611e-07, |
| "loss": 0.6209733486175537, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.7793594306049823, |
| "eval_loss": 0.6085147261619568, |
| "eval_runtime": 413.8517, |
| "eval_samples_per_second": 12.152, |
| "eval_steps_per_second": 6.077, |
| "step": 10000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 11240, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.602790940448358e+18, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|