Invalid JSON:Unexpected token 'N', ..."ad_norm": NaN,
"... is not valid JSON
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9998899122967965, | |
| "eval_steps": 500, | |
| "global_step": 3406, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0029356720854280577, | |
| "grad_norm": 2.3989222049713135, | |
| "learning_rate": 1.9999583060217186e-05, | |
| "loss": 1.4046, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.005871344170856115, | |
| "grad_norm": 2.4013407230377197, | |
| "learning_rate": 1.9998315387870395e-05, | |
| "loss": 1.2805, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.008807016256284174, | |
| "grad_norm": 2.5987584590911865, | |
| "learning_rate": 1.9996197048273697e-05, | |
| "loss": 1.2627, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01174268834171223, | |
| "grad_norm": 2.5626957416534424, | |
| "learning_rate": 1.999322822165767e-05, | |
| "loss": 1.34, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.014678360427140288, | |
| "grad_norm": 2.4768199920654297, | |
| "learning_rate": 1.998940916061322e-05, | |
| "loss": 1.2963, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.017614032512568347, | |
| "grad_norm": 2.1327669620513916, | |
| "learning_rate": 1.9984740190070102e-05, | |
| "loss": 1.3513, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.020549704597996404, | |
| "grad_norm": 2.7165908813476562, | |
| "learning_rate": 1.9979221707269273e-05, | |
| "loss": 1.2629, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.02348537668342446, | |
| "grad_norm": 2.5773496627807617, | |
| "learning_rate": 1.997285418172908e-05, | |
| "loss": 1.2504, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02642104876885252, | |
| "grad_norm": 2.7524304389953613, | |
| "learning_rate": 1.9965638155205335e-05, | |
| "loss": 1.2854, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.029356720854280576, | |
| "grad_norm": 2.542572498321533, | |
| "learning_rate": 1.995757424164521e-05, | |
| "loss": 1.2583, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.03229239293970863, | |
| "grad_norm": 3.595125198364258, | |
| "learning_rate": 1.9948663127135003e-05, | |
| "loss": 1.2612, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.035228065025136694, | |
| "grad_norm": 2.8669538497924805, | |
| "learning_rate": 1.9938905569841754e-05, | |
| "loss": 1.237, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.03816373711056475, | |
| "grad_norm": 3.1492984294891357, | |
| "learning_rate": 1.9928302399948767e-05, | |
| "loss": 1.2394, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.04109940919599281, | |
| "grad_norm": 3.0048630237579346, | |
| "learning_rate": 1.991685451958495e-05, | |
| "loss": 1.1899, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.04403508128142086, | |
| "grad_norm": 2.9907774925231934, | |
| "learning_rate": 1.990456290274808e-05, | |
| "loss": 1.1939, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.04697075336684892, | |
| "grad_norm": 3.010820150375366, | |
| "learning_rate": 1.9891428595221914e-05, | |
| "loss": 1.2212, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.049906425452276984, | |
| "grad_norm": 2.486607551574707, | |
| "learning_rate": 1.9877452714487232e-05, | |
| "loss": 1.1824, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.05284209753770504, | |
| "grad_norm": 2.491534471511841, | |
| "learning_rate": 1.9862636449626752e-05, | |
| "loss": 1.2118, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.0557777696231331, | |
| "grad_norm": 2.5148768424987793, | |
| "learning_rate": 1.9846981061223958e-05, | |
| "loss": 1.2377, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.05871344170856115, | |
| "grad_norm": 2.640007734298706, | |
| "learning_rate": 1.9830487881255864e-05, | |
| "loss": 1.1995, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.06164911379398921, | |
| "grad_norm": 2.6175191402435303, | |
| "learning_rate": 1.981315831297966e-05, | |
| "loss": 1.1114, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.06458478587941727, | |
| "grad_norm": 2.8480780124664307, | |
| "learning_rate": 1.9794993830813358e-05, | |
| "loss": 1.1897, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.06752045796484532, | |
| "grad_norm": 2.635657787322998, | |
| "learning_rate": 1.9775995980210306e-05, | |
| "loss": 1.1592, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.07045613005027339, | |
| "grad_norm": 2.8263065814971924, | |
| "learning_rate": 1.9756166377527734e-05, | |
| "loss": 1.1712, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.07339180213570144, | |
| "grad_norm": 3.2777886390686035, | |
| "learning_rate": 1.9735506709889213e-05, | |
| "loss": 1.1724, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.0763274742211295, | |
| "grad_norm": 2.587409019470215, | |
| "learning_rate": 1.9714018735041125e-05, | |
| "loss": 1.1767, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.07926314630655756, | |
| "grad_norm": 2.9432199001312256, | |
| "learning_rate": 1.9691704281203098e-05, | |
| "loss": 1.1553, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.08219881839198562, | |
| "grad_norm": 3.1995699405670166, | |
| "learning_rate": 1.966856524691247e-05, | |
| "loss": 1.1577, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.08513449047741367, | |
| "grad_norm": 2.805522918701172, | |
| "learning_rate": 1.9644603600862753e-05, | |
| "loss": 1.1627, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.08807016256284173, | |
| "grad_norm": 2.6327457427978516, | |
| "learning_rate": 1.961982138173615e-05, | |
| "loss": 1.1483, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.09100583464826979, | |
| "grad_norm": 4.197879314422607, | |
| "learning_rate": 1.959422069803007e-05, | |
| "loss": 1.2015, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.09394150673369785, | |
| "grad_norm": 3.2353358268737793, | |
| "learning_rate": 1.956780372787777e-05, | |
| "loss": 1.1068, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.0968771788191259, | |
| "grad_norm": 2.8246076107025146, | |
| "learning_rate": 1.9540572718863012e-05, | |
| "loss": 1.1424, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.09981285090455397, | |
| "grad_norm": 4.32204532623291, | |
| "learning_rate": 1.9512529987828853e-05, | |
| "loss": 1.1508, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.10274852298998202, | |
| "grad_norm": 2.8142335414886475, | |
| "learning_rate": 1.9483677920680512e-05, | |
| "loss": 1.1623, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.10568419507541008, | |
| "grad_norm": 2.9336957931518555, | |
| "learning_rate": 1.9454018972182383e-05, | |
| "loss": 1.161, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.10861986716083813, | |
| "grad_norm": 2.9903533458709717, | |
| "learning_rate": 1.9423555665749182e-05, | |
| "loss": 1.1444, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.1115555392462662, | |
| "grad_norm": 2.8897149562835693, | |
| "learning_rate": 1.939229059323124e-05, | |
| "loss": 1.171, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.11449121133169425, | |
| "grad_norm": 3.222294807434082, | |
| "learning_rate": 1.9360226414694008e-05, | |
| "loss": 1.1365, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.1174268834171223, | |
| "grad_norm": 2.9000742435455322, | |
| "learning_rate": 1.932736585819171e-05, | |
| "loss": 1.1342, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.12036255550255036, | |
| "grad_norm": 2.682969093322754, | |
| "learning_rate": 1.929371171953526e-05, | |
| "loss": 1.0428, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.12329822758797843, | |
| "grad_norm": 2.8611629009246826, | |
| "learning_rate": 1.9259266862054366e-05, | |
| "loss": 1.1135, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.12623389967340648, | |
| "grad_norm": 2.9910166263580322, | |
| "learning_rate": 1.9224034216353947e-05, | |
| "loss": 1.1154, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.12916957175883453, | |
| "grad_norm": 3.245227336883545, | |
| "learning_rate": 1.9188016780064768e-05, | |
| "loss": 1.2029, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.1321052438442626, | |
| "grad_norm": 3.2252321243286133, | |
| "learning_rate": 1.9151217617588412e-05, | |
| "loss": 1.1272, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.13504091592969064, | |
| "grad_norm": 3.23498272895813, | |
| "learning_rate": 1.9113639859836544e-05, | |
| "loss": 1.1421, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.13797658801511872, | |
| "grad_norm": 3.0721843242645264, | |
| "learning_rate": 1.9075286703964554e-05, | |
| "loss": 1.14, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.14091226010054678, | |
| "grad_norm": 3.130610704421997, | |
| "learning_rate": 1.9036161413099512e-05, | |
| "loss": 1.1699, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.14384793218597483, | |
| "grad_norm": 3.054914712905884, | |
| "learning_rate": 1.899626731606255e-05, | |
| "loss": 1.0919, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.14678360427140288, | |
| "grad_norm": 3.3167009353637695, | |
| "learning_rate": 1.895560780708565e-05, | |
| "loss": 1.0625, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.14971927635683094, | |
| "grad_norm": 3.075392484664917, | |
| "learning_rate": 1.8914186345522846e-05, | |
| "loss": 1.0899, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.152654948442259, | |
| "grad_norm": 3.1269266605377197, | |
| "learning_rate": 1.8872006455555906e-05, | |
| "loss": 1.1148, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.15559062052768705, | |
| "grad_norm": 3.867361068725586, | |
| "learning_rate": 1.8829071725894483e-05, | |
| "loss": 1.02, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.15852629261311513, | |
| "grad_norm": 3.529639720916748, | |
| "learning_rate": 1.87853858094708e-05, | |
| "loss": 1.1167, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.16146196469854318, | |
| "grad_norm": 3.098249673843384, | |
| "learning_rate": 1.8740952423128842e-05, | |
| "loss": 1.0181, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.16439763678397123, | |
| "grad_norm": 3.1614904403686523, | |
| "learning_rate": 1.869577534730812e-05, | |
| "loss": 1.1118, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.1673333088693993, | |
| "grad_norm": 3.054616928100586, | |
| "learning_rate": 1.8649858425722033e-05, | |
| "loss": 1.0666, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.17026898095482734, | |
| "grad_norm": 3.479527711868286, | |
| "learning_rate": 1.8603205565030846e-05, | |
| "loss": 1.108, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.1732046530402554, | |
| "grad_norm": 2.9523024559020996, | |
| "learning_rate": 1.8555820734509297e-05, | |
| "loss": 1.0833, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.17614032512568345, | |
| "grad_norm": 2.9907584190368652, | |
| "learning_rate": 1.8507707965708892e-05, | |
| "loss": 1.0283, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.17907599721111153, | |
| "grad_norm": 3.2911376953125, | |
| "learning_rate": 1.8458871352114894e-05, | |
| "loss": 1.0747, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.18201166929653959, | |
| "grad_norm": 3.1361849308013916, | |
| "learning_rate": 1.840931504879806e-05, | |
| "loss": 1.11, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.18494734138196764, | |
| "grad_norm": 3.527332067489624, | |
| "learning_rate": 1.8359043272061086e-05, | |
| "loss": 1.0424, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.1878830134673957, | |
| "grad_norm": 3.5494275093078613, | |
| "learning_rate": 1.8308060299079926e-05, | |
| "loss": 1.0818, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.19081868555282375, | |
| "grad_norm": 3.4427106380462646, | |
| "learning_rate": 1.8256370467539847e-05, | |
| "loss": 1.0883, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.1937543576382518, | |
| "grad_norm": 3.092515230178833, | |
| "learning_rate": 1.82039781752664e-05, | |
| "loss": 1.0285, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.19669002972367985, | |
| "grad_norm": 2.667904853820801, | |
| "learning_rate": 1.815088787985124e-05, | |
| "loss": 0.9751, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.19962570180910794, | |
| "grad_norm": 3.5892174243927, | |
| "learning_rate": 1.809710409827285e-05, | |
| "loss": 1.0603, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.202561373894536, | |
| "grad_norm": 3.122434616088867, | |
| "learning_rate": 1.804263140651227e-05, | |
| "loss": 1.0919, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.20549704597996404, | |
| "grad_norm": 2.9182698726654053, | |
| "learning_rate": 1.798747443916374e-05, | |
| "loss": 1.0553, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.2084327180653921, | |
| "grad_norm": 3.260917901992798, | |
| "learning_rate": 1.793163788904038e-05, | |
| "loss": 1.0711, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.21136839015082015, | |
| "grad_norm": 3.4142649173736572, | |
| "learning_rate": 1.7875126506774956e-05, | |
| "loss": 1.0423, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.2143040622362482, | |
| "grad_norm": 3.0127294063568115, | |
| "learning_rate": 1.781794510041564e-05, | |
| "loss": 1.0679, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.21723973432167626, | |
| "grad_norm": 3.398015022277832, | |
| "learning_rate": 1.776009853501698e-05, | |
| "loss": 1.0558, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.2201754064071043, | |
| "grad_norm": 3.6017568111419678, | |
| "learning_rate": 1.770159173222595e-05, | |
| "loss": 1.0198, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.2231110784925324, | |
| "grad_norm": 3.5204339027404785, | |
| "learning_rate": 1.7642429669863225e-05, | |
| "loss": 0.9951, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.22604675057796045, | |
| "grad_norm": 3.3134777545928955, | |
| "learning_rate": 1.7582617381499655e-05, | |
| "loss": 0.9906, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.2289824226633885, | |
| "grad_norm": 3.5017244815826416, | |
| "learning_rate": 1.7522159956028003e-05, | |
| "loss": 1.0711, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.23191809474881656, | |
| "grad_norm": 3.1634137630462646, | |
| "learning_rate": 1.7461062537229987e-05, | |
| "loss": 0.9909, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.2348537668342446, | |
| "grad_norm": 3.368623971939087, | |
| "learning_rate": 1.739933032333863e-05, | |
| "loss": 0.9815, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.23778943891967266, | |
| "grad_norm": 3.1064817905426025, | |
| "learning_rate": 1.733696856659599e-05, | |
| "loss": 1.0191, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.24072511100510072, | |
| "grad_norm": 3.2074899673461914, | |
| "learning_rate": 1.7273982572806303e-05, | |
| "loss": 1.0314, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.2436607830905288, | |
| "grad_norm": 2.5882649421691895, | |
| "learning_rate": 1.721037770088455e-05, | |
| "loss": 0.958, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.24659645517595685, | |
| "grad_norm": 3.730363130569458, | |
| "learning_rate": 1.7146159362400515e-05, | |
| "loss": 1.0272, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.2495321272613849, | |
| "grad_norm": 2.940425395965576, | |
| "learning_rate": 1.708133302111837e-05, | |
| "loss": 1.0437, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.25246779934681296, | |
| "grad_norm": 4.833681106567383, | |
| "learning_rate": 1.7015904192531814e-05, | |
| "loss": 1.0393, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.255403471432241, | |
| "grad_norm": 3.417707681655884, | |
| "learning_rate": 1.694987844339479e-05, | |
| "loss": 1.0602, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.25833914351766907, | |
| "grad_norm": 3.239388942718506, | |
| "learning_rate": 1.6883261391247888e-05, | |
| "loss": 0.9515, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.2612748156030971, | |
| "grad_norm": 3.1867291927337646, | |
| "learning_rate": 1.6816058703940366e-05, | |
| "loss": 0.9961, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.2642104876885252, | |
| "grad_norm": 3.193343162536621, | |
| "learning_rate": 1.6748276099147952e-05, | |
| "loss": 1.0066, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.26714615977395323, | |
| "grad_norm": 3.1413753032684326, | |
| "learning_rate": 1.6679919343886376e-05, | |
| "loss": 0.9714, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.2700818318593813, | |
| "grad_norm": 3.0826566219329834, | |
| "learning_rate": 1.661099425402067e-05, | |
| "loss": 0.9689, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.2730175039448094, | |
| "grad_norm": 3.5959160327911377, | |
| "learning_rate": 1.6541506693770403e-05, | |
| "loss": 0.9867, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.27595317603023745, | |
| "grad_norm": 3.8435122966766357, | |
| "learning_rate": 1.647146257521071e-05, | |
| "loss": 1.0281, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.2788888481156655, | |
| "grad_norm": 3.396488904953003, | |
| "learning_rate": 1.6400867857769287e-05, | |
| "loss": 0.975, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.28182452020109355, | |
| "grad_norm": 3.2766590118408203, | |
| "learning_rate": 1.6329728547719375e-05, | |
| "loss": 0.9373, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.2847601922865216, | |
| "grad_norm": 3.673755645751953, | |
| "learning_rate": 1.625805069766873e-05, | |
| "loss": 0.9651, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.28769586437194966, | |
| "grad_norm": 3.8751864433288574, | |
| "learning_rate": 1.6185840406044657e-05, | |
| "loss": 0.9262, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.2906315364573777, | |
| "grad_norm": 3.708500623703003, | |
| "learning_rate": 1.611310381657515e-05, | |
| "loss": 0.9972, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.29356720854280577, | |
| "grad_norm": 3.4258711338043213, | |
| "learning_rate": 1.60398471177662e-05, | |
| "loss": 0.9331, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.2965028806282338, | |
| "grad_norm": 3.4662258625030518, | |
| "learning_rate": 1.596607654237522e-05, | |
| "loss": 0.9592, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.2994385527136619, | |
| "grad_norm": 2.938396453857422, | |
| "learning_rate": 1.589179836688081e-05, | |
| "loss": 0.9568, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.30237422479908993, | |
| "grad_norm": 3.248762845993042, | |
| "learning_rate": 1.5817018910948712e-05, | |
| "loss": 0.9928, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.305309896884518, | |
| "grad_norm": 3.423213243484497, | |
| "learning_rate": 1.574174453689415e-05, | |
| "loss": 0.9387, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.30824556896994604, | |
| "grad_norm": 3.249216318130493, | |
| "learning_rate": 1.566598164914049e-05, | |
| "loss": 0.8925, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.3111812410553741, | |
| "grad_norm": 3.6318016052246094, | |
| "learning_rate": 1.5589736693674372e-05, | |
| "loss": 1.0153, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.31411691314080215, | |
| "grad_norm": 3.9752533435821533, | |
| "learning_rate": 1.551301615749726e-05, | |
| "loss": 0.9323, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.31705258522623025, | |
| "grad_norm": 3.46864914894104, | |
| "learning_rate": 1.5435826568073532e-05, | |
| "loss": 0.8901, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.3199882573116583, | |
| "grad_norm": 4.399304389953613, | |
| "learning_rate": 1.535817449277511e-05, | |
| "loss": 0.9118, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.32292392939708636, | |
| "grad_norm": 3.2890026569366455, | |
| "learning_rate": 1.5280066538322703e-05, | |
| "loss": 0.8655, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.3258596014825144, | |
| "grad_norm": 3.491983652114868, | |
| "learning_rate": 1.5201509350223708e-05, | |
| "loss": 0.9217, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.32879527356794247, | |
| "grad_norm": 5.0014824867248535, | |
| "learning_rate": 1.5122509612206785e-05, | |
| "loss": 0.9362, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.3317309456533705, | |
| "grad_norm": 4.092339038848877, | |
| "learning_rate": 1.5043074045653215e-05, | |
| "loss": 0.9262, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.3346666177387986, | |
| "grad_norm": 3.286433219909668, | |
| "learning_rate": 1.496320940902503e-05, | |
| "loss": 0.8891, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.33760228982422663, | |
| "grad_norm": 3.6521873474121094, | |
| "learning_rate": 1.4882922497290007e-05, | |
| "loss": 0.9281, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.3405379619096547, | |
| "grad_norm": 3.8015809059143066, | |
| "learning_rate": 1.4802220141343516e-05, | |
| "loss": 0.8949, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.34347363399508274, | |
| "grad_norm": 4.149661064147949, | |
| "learning_rate": 1.472110920742738e-05, | |
| "loss": 0.8889, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.3464093060805108, | |
| "grad_norm": 3.5252785682678223, | |
| "learning_rate": 1.4639596596545656e-05, | |
| "loss": 0.8397, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.34934497816593885, | |
| "grad_norm": 3.6884541511535645, | |
| "learning_rate": 1.4557689243877507e-05, | |
| "loss": 0.9142, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.3522806502513669, | |
| "grad_norm": 3.9577550888061523, | |
| "learning_rate": 1.4475394118187146e-05, | |
| "loss": 0.9809, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.35521632233679495, | |
| "grad_norm": 3.6897339820861816, | |
| "learning_rate": 1.4392718221230917e-05, | |
| "loss": 0.9141, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.35815199442222306, | |
| "grad_norm": 3.061516046524048, | |
| "learning_rate": 1.4309668587161596e-05, | |
| "loss": 0.8669, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.3610876665076511, | |
| "grad_norm": 3.1191623210906982, | |
| "learning_rate": 1.4226252281929902e-05, | |
| "loss": 0.8384, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.36402333859307917, | |
| "grad_norm": 4.198310852050781, | |
| "learning_rate": 1.4142476402683327e-05, | |
| "loss": 0.8971, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.3669590106785072, | |
| "grad_norm": 3.8184337615966797, | |
| "learning_rate": 1.4058348077162301e-05, | |
| "loss": 0.8783, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.3698946827639353, | |
| "grad_norm": 3.842637777328491, | |
| "learning_rate": 1.3973874463093747e-05, | |
| "loss": 0.9623, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.37283035484936333, | |
| "grad_norm": 3.5173559188842773, | |
| "learning_rate": 1.3889062747582118e-05, | |
| "loss": 0.8092, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.3757660269347914, | |
| "grad_norm": 3.8953890800476074, | |
| "learning_rate": 1.3803920146497887e-05, | |
| "loss": 0.8762, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.37870169902021944, | |
| "grad_norm": 3.0550928115844727, | |
| "learning_rate": 1.3718453903863616e-05, | |
| "loss": 0.8321, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.3816373711056475, | |
| "grad_norm": 3.9677445888519287, | |
| "learning_rate": 1.3632671291237645e-05, | |
| "loss": 0.8566, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.38457304319107555, | |
| "grad_norm": 3.887268304824829, | |
| "learning_rate": 1.35465796070954e-05, | |
| "loss": 0.8944, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.3875087152765036, | |
| "grad_norm": 3.1006393432617188, | |
| "learning_rate": 1.3460186176208439e-05, | |
| "loss": 0.7583, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.39044438736193166, | |
| "grad_norm": 3.7594895362854004, | |
| "learning_rate": 1.337349834902125e-05, | |
| "loss": 0.814, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.3933800594473597, | |
| "grad_norm": 4.34951114654541, | |
| "learning_rate": 1.328652350102588e-05, | |
| "loss": 0.8006, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.39631573153278776, | |
| "grad_norm": 2.9645636081695557, | |
| "learning_rate": 1.3199269032134395e-05, | |
| "loss": 0.8129, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.39925140361821587, | |
| "grad_norm": 3.858602285385132, | |
| "learning_rate": 1.3111742366049317e-05, | |
| "loss": 0.8366, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.4021870757036439, | |
| "grad_norm": 3.2778103351593018, | |
| "learning_rate": 1.3023950949631979e-05, | |
| "loss": 0.8551, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.405122747789072, | |
| "grad_norm": 3.2402875423431396, | |
| "learning_rate": 1.2935902252268965e-05, | |
| "loss": 0.8398, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.40805841987450003, | |
| "grad_norm": 4.325957775115967, | |
| "learning_rate": 1.2847603765236589e-05, | |
| "loss": 0.836, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.4109940919599281, | |
| "grad_norm": 3.5310022830963135, | |
| "learning_rate": 1.2759063001063531e-05, | |
| "loss": 0.8369, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.41392976404535614, | |
| "grad_norm": 3.5352087020874023, | |
| "learning_rate": 1.2670287492891675e-05, | |
| "loss": 0.8988, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.4168654361307842, | |
| "grad_norm": 3.190788745880127, | |
| "learning_rate": 1.258128479383516e-05, | |
| "loss": 0.8352, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.41980110821621225, | |
| "grad_norm": 3.459728240966797, | |
| "learning_rate": 1.249206247633778e-05, | |
| "loss": 0.8295, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.4227367803016403, | |
| "grad_norm": 3.5794529914855957, | |
| "learning_rate": 1.2402628131528686e-05, | |
| "loss": 0.8103, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.42567245238706836, | |
| "grad_norm": 4.169612407684326, | |
| "learning_rate": 1.2312989368576547e-05, | |
| "loss": 0.7757, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.4286081244724964, | |
| "grad_norm": 3.301011562347412, | |
| "learning_rate": 1.2223153814042137e-05, | |
| "loss": 0.7871, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.43154379655792446, | |
| "grad_norm": 4.524185657501221, | |
| "learning_rate": 1.2133129111229466e-05, | |
| "loss": 0.851, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.4344794686433525, | |
| "grad_norm": 3.72041392326355, | |
| "learning_rate": 1.2042922919535484e-05, | |
| "loss": 0.803, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.43741514072878057, | |
| "grad_norm": 3.926424503326416, | |
| "learning_rate": 1.1952542913798406e-05, | |
| "loss": 0.761, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.4403508128142086, | |
| "grad_norm": 3.5725414752960205, | |
| "learning_rate": 1.1861996783644727e-05, | |
| "loss": 0.8086, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.44328648489963673, | |
| "grad_norm": 4.109748363494873, | |
| "learning_rate": 1.1771292232834983e-05, | |
| "loss": 0.8483, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.4462221569850648, | |
| "grad_norm": 3.673794984817505, | |
| "learning_rate": 1.1680436978608314e-05, | |
| "loss": 0.738, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.44915782907049284, | |
| "grad_norm": 3.831571102142334, | |
| "learning_rate": 1.1589438751025852e-05, | |
| "loss": 0.7462, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.4520935011559209, | |
| "grad_norm": 4.181507587432861, | |
| "learning_rate": 1.149830529231307e-05, | |
| "loss": 0.7707, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.45502917324134895, | |
| "grad_norm": 3.3295936584472656, | |
| "learning_rate": 1.140704435620104e-05, | |
| "loss": 0.7832, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.457964845326777, | |
| "grad_norm": 4.025683403015137, | |
| "learning_rate": 1.1315663707266742e-05, | |
| "loss": 0.74, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.46090051741220506, | |
| "grad_norm": 3.792701244354248, | |
| "learning_rate": 1.1224171120272455e-05, | |
| "loss": 0.6698, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.4638361894976331, | |
| "grad_norm": 3.7220959663391113, | |
| "learning_rate": 1.1132574379504269e-05, | |
| "loss": 0.7604, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.46677186158306117, | |
| "grad_norm": 4.423033714294434, | |
| "learning_rate": 1.1040881278109784e-05, | |
| "loss": 0.7466, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.4697075336684892, | |
| "grad_norm": 3.633347272872925, | |
| "learning_rate": 1.0949099617435062e-05, | |
| "loss": 0.7452, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.4726432057539173, | |
| "grad_norm": 3.661238431930542, | |
| "learning_rate": 1.0857237206360885e-05, | |
| "loss": 0.7637, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.4755788778393453, | |
| "grad_norm": 4.33590030670166, | |
| "learning_rate": 1.0765301860638364e-05, | |
| "loss": 0.7364, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.4785145499247734, | |
| "grad_norm": 3.7030036449432373, | |
| "learning_rate": 1.0673301402223964e-05, | |
| "loss": 0.7356, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.48145022201020143, | |
| "grad_norm": 4.784999847412109, | |
| "learning_rate": 1.0581243658614013e-05, | |
| "loss": 0.765, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.48438589409562954, | |
| "grad_norm": 3.2158679962158203, | |
| "learning_rate": 1.0489136462178718e-05, | |
| "loss": 0.75, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.4873215661810576, | |
| "grad_norm": 4.584315299987793, | |
| "learning_rate": 1.039698764949579e-05, | |
| "loss": 0.7347, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.49025723826648565, | |
| "grad_norm": 3.4453585147857666, | |
| "learning_rate": 1.0304805060683692e-05, | |
| "loss": 0.7887, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.4931929103519137, | |
| "grad_norm": 3.9263744354248047, | |
| "learning_rate": 1.021259653873459e-05, | |
| "loss": 0.7492, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.49612858243734176, | |
| "grad_norm": 4.6535539627075195, | |
| "learning_rate": 1.012036992884708e-05, | |
| "loss": 0.7676, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.4990642545227698, | |
| "grad_norm": 4.22018575668335, | |
| "learning_rate": 1.0028133077758688e-05, | |
| "loss": 0.7088, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.5019999266081978, | |
| "grad_norm": 4.408539295196533, | |
| "learning_rate": 9.935893833078284e-06, | |
| "loss": 0.7646, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.5049355986936259, | |
| "grad_norm": 5.264422416687012, | |
| "learning_rate": 9.843660042618372e-06, | |
| "loss": 0.8147, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.507871270779054, | |
| "grad_norm": 4.2693047523498535, | |
| "learning_rate": 9.75143955372742e-06, | |
| "loss": 0.7104, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.510806942864482, | |
| "grad_norm": 4.856871128082275, | |
| "learning_rate": 9.659240212622175e-06, | |
| "loss": 0.7367, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.5137426149499101, | |
| "grad_norm": 2.8976457118988037, | |
| "learning_rate": 9.567069863720113e-06, | |
| "loss": 0.7564, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.5166782870353381, | |
| "grad_norm": 5.992892742156982, | |
| "learning_rate": 9.474936348972021e-06, | |
| "loss": 0.7735, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.5196139591207662, | |
| "grad_norm": 3.6526339054107666, | |
| "learning_rate": 9.382847507194797e-06, | |
| "loss": 0.7035, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.5225496312061942, | |
| "grad_norm": 4.040701389312744, | |
| "learning_rate": 9.290811173404513e-06, | |
| "loss": 0.6347, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.5254853032916224, | |
| "grad_norm": 3.848483085632324, | |
| "learning_rate": 9.198835178149807e-06, | |
| "loss": 0.6359, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.5284209753770504, | |
| "grad_norm": 3.2821764945983887, | |
| "learning_rate": 9.106927346845663e-06, | |
| "loss": 0.7137, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.5313566474624785, | |
| "grad_norm": 4.672881603240967, | |
| "learning_rate": 9.015095499107578e-06, | |
| "loss": 0.7085, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.5342923195479065, | |
| "grad_norm": 3.976231098175049, | |
| "learning_rate": 8.923347448086311e-06, | |
| "loss": 0.6501, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.5372279916333346, | |
| "grad_norm": 4.726049423217773, | |
| "learning_rate": 8.831690999803101e-06, | |
| "loss": 0.8129, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.5401636637187626, | |
| "grad_norm": 6.278385162353516, | |
| "learning_rate": 8.740133952485515e-06, | |
| "loss": 0.6732, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.5430993358041907, | |
| "grad_norm": 4.620763301849365, | |
| "learning_rate": 8.648684095904001e-06, | |
| "loss": 0.6872, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.5460350078896188, | |
| "grad_norm": 4.494777679443359, | |
| "learning_rate": 8.557349210709098e-06, | |
| "loss": 0.6686, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.5489706799750468, | |
| "grad_norm": 4.2295637130737305, | |
| "learning_rate": 8.46613706776945e-06, | |
| "loss": 0.6853, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.5519063520604749, | |
| "grad_norm": 3.5783040523529053, | |
| "learning_rate": 8.375055427510673e-06, | |
| "loss": 0.6923, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.5548420241459029, | |
| "grad_norm": 3.5585546493530273, | |
| "learning_rate": 8.284112039255071e-06, | |
| "loss": 0.6744, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.557777696231331, | |
| "grad_norm": 3.939253330230713, | |
| "learning_rate": 8.193314640562315e-06, | |
| "loss": 0.627, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.560713368316759, | |
| "grad_norm": 3.630519390106201, | |
| "learning_rate": 8.102670956571139e-06, | |
| "loss": 0.6627, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.5636490404021871, | |
| "grad_norm": 11.943046569824219, | |
| "learning_rate": 8.012188699342072e-06, | |
| "loss": 0.6476, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.5665847124876151, | |
| "grad_norm": 5.358550071716309, | |
| "learning_rate": 7.92187556720126e-06, | |
| "loss": 0.6968, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.5695203845730432, | |
| "grad_norm": 3.8031585216522217, | |
| "learning_rate": 7.831739244085534e-06, | |
| "loss": 0.6811, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.5724560566584712, | |
| "grad_norm": 3.1659951210021973, | |
| "learning_rate": 7.741787398888617e-06, | |
| "loss": 0.6501, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.5753917287438993, | |
| "grad_norm": 3.7877001762390137, | |
| "learning_rate": 7.652027684808644e-06, | |
| "loss": 0.6496, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.5783274008293273, | |
| "grad_norm": 4.701345920562744, | |
| "learning_rate": 7.56246773869705e-06, | |
| "loss": 0.659, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.5812630729147554, | |
| "grad_norm": 4.617175579071045, | |
| "learning_rate": 7.47311518040879e-06, | |
| "loss": 0.6429, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.5841987450001834, | |
| "grad_norm": 5.269269943237305, | |
| "learning_rate": 7.3839776121540385e-06, | |
| "loss": 0.6845, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.5871344170856115, | |
| "grad_norm": 3.911558151245117, | |
| "learning_rate": 7.2950626178514e-06, | |
| "loss": 0.6536, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.5900700891710396, | |
| "grad_norm": NaN, | |
| "learning_rate": 7.215235676567183e-06, | |
| "loss": 0.6691, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.5930057612564676, | |
| "grad_norm": 5.29760217666626, | |
| "learning_rate": 7.126764398128368e-06, | |
| "loss": 0.6483, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.5959414333418958, | |
| "grad_norm": 3.4294636249542236, | |
| "learning_rate": 7.038537577614009e-06, | |
| "loss": 0.5965, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.5988771054273238, | |
| "grad_norm": 3.6569931507110596, | |
| "learning_rate": 6.950562721455325e-06, | |
| "loss": 0.5782, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.6018127775127519, | |
| "grad_norm": 3.845431089401245, | |
| "learning_rate": 6.86284731464614e-06, | |
| "loss": 0.6419, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.6047484495981799, | |
| "grad_norm": 3.8947107791900635, | |
| "learning_rate": 6.775398820106065e-06, | |
| "loss": 0.5942, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.607684121683608, | |
| "grad_norm": 5.501591682434082, | |
| "learning_rate": 6.688224678045507e-06, | |
| "loss": 0.5874, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.610619793769036, | |
| "grad_norm": 4.684408187866211, | |
| "learning_rate": 6.6013323053327065e-06, | |
| "loss": 0.6178, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.6135554658544641, | |
| "grad_norm": 4.132544040679932, | |
| "learning_rate": 6.5147290948626365e-06, | |
| "loss": 0.5972, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.6164911379398921, | |
| "grad_norm": 3.2844135761260986, | |
| "learning_rate": 6.428422414928066e-06, | |
| "loss": 0.5808, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.6194268100253202, | |
| "grad_norm": 4.8152289390563965, | |
| "learning_rate": 6.342419608592626e-06, | |
| "loss": 0.6407, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.6223624821107482, | |
| "grad_norm": 4.975841999053955, | |
| "learning_rate": 6.25672799306605e-06, | |
| "loss": 0.5792, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.6252981541961763, | |
| "grad_norm": 3.772268772125244, | |
| "learning_rate": 6.171354859081639e-06, | |
| "loss": 0.7062, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.6282338262816043, | |
| "grad_norm": 3.6091275215148926, | |
| "learning_rate": 6.086307470275947e-06, | |
| "loss": 0.6015, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.6311694983670324, | |
| "grad_norm": 3.9650683403015137, | |
| "learning_rate": 6.001593062570776e-06, | |
| "loss": 0.699, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.6341051704524605, | |
| "grad_norm": 3.4142041206359863, | |
| "learning_rate": 5.917218843557551e-06, | |
| "loss": 0.5912, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.6370408425378885, | |
| "grad_norm": 3.6262595653533936, | |
| "learning_rate": 5.8415788415375744e-06, | |
| "loss": 0.6029, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.6399765146233166, | |
| "grad_norm": 3.612025737762451, | |
| "learning_rate": 5.757870733799642e-06, | |
| "loss": 0.6054, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.6429121867087446, | |
| "grad_norm": 3.7721731662750244, | |
| "learning_rate": 5.6745235509072135e-06, | |
| "loss": 0.5703, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.6458478587941727, | |
| "grad_norm": 4.44386100769043, | |
| "learning_rate": 5.591544384126769e-06, | |
| "loss": 0.6101, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.6487835308796007, | |
| "grad_norm": 3.6553893089294434, | |
| "learning_rate": 5.508940293413603e-06, | |
| "loss": 0.6131, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.6517192029650288, | |
| "grad_norm": 4.550465106964111, | |
| "learning_rate": 5.426718306811134e-06, | |
| "loss": 0.5761, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.6546548750504568, | |
| "grad_norm": 3.433598279953003, | |
| "learning_rate": 5.344885419852961e-06, | |
| "loss": 0.6456, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.6575905471358849, | |
| "grad_norm": 5.087676048278809, | |
| "learning_rate": 5.263448594967673e-06, | |
| "loss": 0.657, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.6605262192213129, | |
| "grad_norm": 4.578396797180176, | |
| "learning_rate": 5.182414760886484e-06, | |
| "loss": 0.6083, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.663461891306741, | |
| "grad_norm": 6.043960094451904, | |
| "learning_rate": 5.1017908120537105e-06, | |
| "loss": 0.5721, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.666397563392169, | |
| "grad_norm": 3.624394178390503, | |
| "learning_rate": 5.021583608040208e-06, | |
| "loss": 0.5952, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.6693332354775972, | |
| "grad_norm": 2.965820074081421, | |
| "learning_rate": 4.941799972959752e-06, | |
| "loss": 0.5074, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.6722689075630253, | |
| "grad_norm": 5.590756416320801, | |
| "learning_rate": 4.862446694888403e-06, | |
| "loss": 0.5274, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.6752045796484533, | |
| "grad_norm": 4.188043594360352, | |
| "learning_rate": 4.783530525287006e-06, | |
| "loss": 0.5694, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.6781402517338814, | |
| "grad_norm": 3.925184488296509, | |
| "learning_rate": 4.705058178426753e-06, | |
| "loss": 0.55, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.6810759238193094, | |
| "grad_norm": 4.226954936981201, | |
| "learning_rate": 4.627036330817926e-06, | |
| "loss": 0.5432, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.6840115959047375, | |
| "grad_norm": 3.9109609127044678, | |
| "learning_rate": 4.5494716206418555e-06, | |
| "loss": 0.5332, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.6869472679901655, | |
| "grad_norm": 4.698592662811279, | |
| "learning_rate": 4.4723706471861385e-06, | |
| "loss": 0.5744, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.6898829400755936, | |
| "grad_norm": 4.461889266967773, | |
| "learning_rate": 4.3957399702831505e-06, | |
| "loss": 0.5314, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.6928186121610216, | |
| "grad_norm": 4.412221908569336, | |
| "learning_rate": 4.31958610975195e-06, | |
| "loss": 0.554, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.6957542842464497, | |
| "grad_norm": 3.776421308517456, | |
| "learning_rate": 4.243915544843549e-06, | |
| "loss": 0.4857, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.6986899563318777, | |
| "grad_norm": 4.851159572601318, | |
| "learning_rate": 4.168734713689658e-06, | |
| "loss": 0.5484, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.7016256284173058, | |
| "grad_norm": 3.8917558193206787, | |
| "learning_rate": 4.094050012754925e-06, | |
| "loss": 0.4888, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.7045613005027338, | |
| "grad_norm": 4.396358966827393, | |
| "learning_rate": 4.019867796292709e-06, | |
| "loss": 0.5125, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.7074969725881619, | |
| "grad_norm": 4.374291896820068, | |
| "learning_rate": 3.946194375804452e-06, | |
| "loss": 0.5262, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.7104326446735899, | |
| "grad_norm": 4.330350875854492, | |
| "learning_rate": 3.873036019502716e-06, | |
| "loss": 0.5581, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.713368316759018, | |
| "grad_norm": 4.86287784576416, | |
| "learning_rate": 3.800398951777845e-06, | |
| "loss": 0.5687, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.7163039888444461, | |
| "grad_norm": 5.004453659057617, | |
| "learning_rate": 3.7282893526683914e-06, | |
| "loss": 0.5136, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.7192396609298741, | |
| "grad_norm": 3.035261631011963, | |
| "learning_rate": 3.656713357335334e-06, | |
| "loss": 0.5358, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.7221753330153022, | |
| "grad_norm": 3.3477425575256348, | |
| "learning_rate": 3.585677055540072e-06, | |
| "loss": 0.5214, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.7251110051007302, | |
| "grad_norm": 3.495814323425293, | |
| "learning_rate": 3.5151864911263066e-06, | |
| "loss": 0.5048, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.7280466771861583, | |
| "grad_norm": 3.35532283782959, | |
| "learning_rate": 3.4452476615058316e-06, | |
| "loss": 0.509, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.7309823492715863, | |
| "grad_norm": 3.0357613563537598, | |
| "learning_rate": 3.3758665171482474e-06, | |
| "loss": 0.5361, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.7339180213570144, | |
| "grad_norm": 4.02761173248291, | |
| "learning_rate": 3.3070489610747146e-06, | |
| "loss": 0.5033, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.7368536934424424, | |
| "grad_norm": 4.085331916809082, | |
| "learning_rate": 3.238800848355702e-06, | |
| "loss": 0.526, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.7397893655278706, | |
| "grad_norm": 4.161253929138184, | |
| "learning_rate": 3.1711279856128387e-06, | |
| "loss": 0.5014, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.7427250376132986, | |
| "grad_norm": 3.7220897674560547, | |
| "learning_rate": 3.10403613052487e-06, | |
| "loss": 0.4514, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.7456607096987267, | |
| "grad_norm": 4.337230682373047, | |
| "learning_rate": 3.037530991337807e-06, | |
| "loss": 0.5645, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.7485963817841547, | |
| "grad_norm": 4.30481481552124, | |
| "learning_rate": 2.9716182263792314e-06, | |
| "loss": 0.5026, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.7515320538695828, | |
| "grad_norm": 3.3447349071502686, | |
| "learning_rate": 2.9063034435769242e-06, | |
| "loss": 0.5318, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.7544677259550108, | |
| "grad_norm": 3.936032295227051, | |
| "learning_rate": 2.8415921999816966e-06, | |
| "loss": 0.5106, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.7574033980404389, | |
| "grad_norm": 3.9542150497436523, | |
| "learning_rate": 2.7774900012946037e-06, | |
| "loss": 0.5299, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.760339070125867, | |
| "grad_norm": 4.351448059082031, | |
| "learning_rate": 2.714002301398524e-06, | |
| "loss": 0.5211, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.763274742211295, | |
| "grad_norm": 4.167295932769775, | |
| "learning_rate": 2.6511345018941225e-06, | |
| "loss": 0.5071, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.7662104142967231, | |
| "grad_norm": 5.125722408294678, | |
| "learning_rate": 2.588891951640288e-06, | |
| "loss": 0.5199, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.7691460863821511, | |
| "grad_norm": 4.25960111618042, | |
| "learning_rate": 2.527279946299037e-06, | |
| "loss": 0.4537, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.7720817584675792, | |
| "grad_norm": 3.875459909439087, | |
| "learning_rate": 2.4663037278849665e-06, | |
| "loss": 0.4993, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.7750174305530072, | |
| "grad_norm": 4.285188674926758, | |
| "learning_rate": 2.405968484319231e-06, | |
| "loss": 0.4596, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.7779531026384353, | |
| "grad_norm": 3.827913284301758, | |
| "learning_rate": 2.3462793489881884e-06, | |
| "loss": 0.5141, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.7808887747238633, | |
| "grad_norm": 4.174901485443115, | |
| "learning_rate": 2.2872414003066146e-06, | |
| "loss": 0.4483, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.7838244468092914, | |
| "grad_norm": 3.4712812900543213, | |
| "learning_rate": 2.2288596612856306e-06, | |
| "loss": 0.4834, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.7867601188947194, | |
| "grad_norm": 4.7577972412109375, | |
| "learning_rate": 2.1711390991053547e-06, | |
| "loss": 0.4913, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.7896957909801475, | |
| "grad_norm": 3.8983209133148193, | |
| "learning_rate": 2.1140846246922774e-06, | |
| "loss": 0.4748, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.7926314630655755, | |
| "grad_norm": 3.3365228176116943, | |
| "learning_rate": 2.0577010923014353e-06, | |
| "loss": 0.5014, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.7955671351510036, | |
| "grad_norm": 3.7394635677337646, | |
| "learning_rate": 2.001993299103411e-06, | |
| "loss": 0.4524, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.7985028072364317, | |
| "grad_norm": 3.325190544128418, | |
| "learning_rate": 1.946965984776181e-06, | |
| "loss": 0.486, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.8014384793218597, | |
| "grad_norm": 3.337636947631836, | |
| "learning_rate": 1.8926238311018551e-06, | |
| "loss": 0.4112, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.8043741514072879, | |
| "grad_norm": 4.279343128204346, | |
| "learning_rate": 1.8443055276768218e-06, | |
| "loss": 0.4758, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.8073098234927159, | |
| "grad_norm": 3.3319621086120605, | |
| "learning_rate": 1.7912778684550137e-06, | |
| "loss": 0.4209, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.810245495578144, | |
| "grad_norm": 4.332451343536377, | |
| "learning_rate": 1.7389486159957436e-06, | |
| "loss": 0.4913, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.813181167663572, | |
| "grad_norm": 5.22000789642334, | |
| "learning_rate": 1.6873222225271656e-06, | |
| "loss": 0.4523, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.8161168397490001, | |
| "grad_norm": 4.080671787261963, | |
| "learning_rate": 1.63640308047745e-06, | |
| "loss": 0.4545, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.8190525118344281, | |
| "grad_norm": 5.591613292694092, | |
| "learning_rate": 1.5861955221010671e-06, | |
| "loss": 0.5272, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.8219881839198562, | |
| "grad_norm": 3.4515106678009033, | |
| "learning_rate": 1.536703819110198e-06, | |
| "loss": 0.4166, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.8249238560052842, | |
| "grad_norm": 3.1613569259643555, | |
| "learning_rate": 1.4879321823112802e-06, | |
| "loss": 0.5194, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.8278595280907123, | |
| "grad_norm": 4.931222915649414, | |
| "learning_rate": 1.4398847612467736e-06, | |
| "loss": 0.4626, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.8307952001761403, | |
| "grad_norm": 3.088315963745117, | |
| "learning_rate": 1.3925656438420876e-06, | |
| "loss": 0.4246, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.8337308722615684, | |
| "grad_norm": 3.7036452293395996, | |
| "learning_rate": 1.3459788560577847e-06, | |
| "loss": 0.431, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.8366665443469964, | |
| "grad_norm": 4.452617168426514, | |
| "learning_rate": 1.3001283615470517e-06, | |
| "loss": 0.4478, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.8396022164324245, | |
| "grad_norm": 3.2161977291107178, | |
| "learning_rate": 1.255018061318467e-06, | |
| "loss": 0.4432, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.8425378885178526, | |
| "grad_norm": 4.302596092224121, | |
| "learning_rate": 1.2106517934040917e-06, | |
| "loss": 0.4598, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.8454735606032806, | |
| "grad_norm": 4.297342300415039, | |
| "learning_rate": 1.1670333325329353e-06, | |
| "loss": 0.4908, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.8484092326887087, | |
| "grad_norm": 3.9199209213256836, | |
| "learning_rate": 1.1241663898097865e-06, | |
| "loss": 0.4239, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.8513449047741367, | |
| "grad_norm": 4.693470001220703, | |
| "learning_rate": 1.08205461239948e-06, | |
| "loss": 0.4636, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.8542805768595648, | |
| "grad_norm": 4.2040510177612305, | |
| "learning_rate": 1.04070158321659e-06, | |
| "loss": 0.4595, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.8572162489449928, | |
| "grad_norm": 4.8676252365112305, | |
| "learning_rate": 1.00011082062058e-06, | |
| "loss": 0.4699, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.8601519210304209, | |
| "grad_norm": 3.176576852798462, | |
| "learning_rate": 9.602857781164721e-07, | |
| "loss": 0.4599, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.8630875931158489, | |
| "grad_norm": 4.111423969268799, | |
| "learning_rate": 9.212298440610101e-07, | |
| "loss": 0.4601, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.866023265201277, | |
| "grad_norm": 3.106792449951172, | |
| "learning_rate": 8.829463413743811e-07, | |
| "loss": 0.453, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.868958937286705, | |
| "grad_norm": 4.940300941467285, | |
| "learning_rate": 8.454385272574906e-07, | |
| "loss": 0.4298, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.8718946093721331, | |
| "grad_norm": 4.7473249435424805, | |
| "learning_rate": 8.087095929148436e-07, | |
| "loss": 0.457, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.8748302814575611, | |
| "grad_norm": 4.263439655303955, | |
| "learning_rate": 7.727626632830221e-07, | |
| "loss": 0.4194, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.8777659535429893, | |
| "grad_norm": 4.69775390625, | |
| "learning_rate": 7.376007967648302e-07, | |
| "loss": 0.4457, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.8807016256284173, | |
| "grad_norm": 4.177097320556641, | |
| "learning_rate": 7.032269849690654e-07, | |
| "loss": 0.4532, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.8836372977138454, | |
| "grad_norm": 4.312076568603516, | |
| "learning_rate": 6.696441524559983e-07, | |
| "loss": 0.4772, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.8865729697992735, | |
| "grad_norm": 4.328220367431641, | |
| "learning_rate": 6.368551564885439e-07, | |
| "loss": 0.4239, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.8895086418847015, | |
| "grad_norm": 3.3847310543060303, | |
| "learning_rate": 6.048627867891665e-07, | |
| "loss": 0.4564, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.8924443139701296, | |
| "grad_norm": 3.408613681793213, | |
| "learning_rate": 5.736697653025192e-07, | |
| "loss": 0.4206, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.8953799860555576, | |
| "grad_norm": 4.045165061950684, | |
| "learning_rate": 5.432787459638722e-07, | |
| "loss": 0.4751, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.8983156581409857, | |
| "grad_norm": 3.965830087661743, | |
| "learning_rate": 5.136923144732997e-07, | |
| "loss": 0.4273, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.9012513302264137, | |
| "grad_norm": 3.462986707687378, | |
| "learning_rate": 4.849129880756886e-07, | |
| "loss": 0.472, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.9041870023118418, | |
| "grad_norm": 5.114270210266113, | |
| "learning_rate": 4.569432153465736e-07, | |
| "loss": 0.5233, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.9071226743972698, | |
| "grad_norm": 4.655681610107422, | |
| "learning_rate": 4.297853759838055e-07, | |
| "loss": 0.4543, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.9100583464826979, | |
| "grad_norm": 4.586308002471924, | |
| "learning_rate": 4.034417806050872e-07, | |
| "loss": 0.4383, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.9129940185681259, | |
| "grad_norm": 3.405298948287964, | |
| "learning_rate": 3.779146705513814e-07, | |
| "loss": 0.4644, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.915929690653554, | |
| "grad_norm": 3.8995399475097656, | |
| "learning_rate": 3.532062176962159e-07, | |
| "loss": 0.4591, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.918865362738982, | |
| "grad_norm": 3.6494014263153076, | |
| "learning_rate": 3.293185242608954e-07, | |
| "loss": 0.4354, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.9218010348244101, | |
| "grad_norm": 4.192446708679199, | |
| "learning_rate": 3.062536226356472e-07, | |
| "loss": 0.4466, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.9247367069098382, | |
| "grad_norm": 3.4682350158691406, | |
| "learning_rate": 2.8401347520670253e-07, | |
| "loss": 0.4629, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.9276723789952662, | |
| "grad_norm": 3.987903594970703, | |
| "learning_rate": 2.625999741893304e-07, | |
| "loss": 0.5156, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.9306080510806943, | |
| "grad_norm": 4.3802103996276855, | |
| "learning_rate": 2.420149414668493e-07, | |
| "loss": 0.42, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.9335437231661223, | |
| "grad_norm": 4.355963230133057, | |
| "learning_rate": 2.222601284356185e-07, | |
| "loss": 0.4408, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.9364793952515504, | |
| "grad_norm": 5.095834255218506, | |
| "learning_rate": 2.0333721585602984e-07, | |
| "loss": 0.4558, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.9394150673369784, | |
| "grad_norm": 3.3932993412017822, | |
| "learning_rate": 1.8524781370950262e-07, | |
| "loss": 0.4475, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.9423507394224065, | |
| "grad_norm": 5.2142014503479, | |
| "learning_rate": 1.679934610615064e-07, | |
| "loss": 0.4351, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.9452864115078345, | |
| "grad_norm": 4.271505832672119, | |
| "learning_rate": 1.515756259306178e-07, | |
| "loss": 0.4431, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.9482220835932627, | |
| "grad_norm": 4.888089656829834, | |
| "learning_rate": 1.3599570516361737e-07, | |
| "loss": 0.4256, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.9511577556786907, | |
| "grad_norm": 4.216527938842773, | |
| "learning_rate": 1.212550243166455e-07, | |
| "loss": 0.4811, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.9540934277641188, | |
| "grad_norm": 4.079187393188477, | |
| "learning_rate": 1.0735483754242049e-07, | |
| "loss": 0.4435, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.9570290998495468, | |
| "grad_norm": 4.337707042694092, | |
| "learning_rate": 9.429632748354068e-08, | |
| "loss": 0.4152, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.9599647719349749, | |
| "grad_norm": 3.4174439907073975, | |
| "learning_rate": 8.208060517185146e-08, | |
| "loss": 0.4579, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.9629004440204029, | |
| "grad_norm": 4.021118640899658, | |
| "learning_rate": 7.070870993393209e-08, | |
| "loss": 0.4531, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.965836116105831, | |
| "grad_norm": 3.2883527278900146, | |
| "learning_rate": 6.01816093026586e-08, | |
| "loss": 0.4384, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.9687717881912591, | |
| "grad_norm": 3.5450029373168945, | |
| "learning_rate": 5.0500198934889665e-08, | |
| "loss": 0.4028, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.9717074602766871, | |
| "grad_norm": 3.9169414043426514, | |
| "learning_rate": 4.16653025352598e-08, | |
| "loss": 0.4894, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.9746431323621152, | |
| "grad_norm": 4.527153015136719, | |
| "learning_rate": 3.367767178609982e-08, | |
| "loss": 0.4403, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.9775788044475432, | |
| "grad_norm": 4.728188514709473, | |
| "learning_rate": 2.6537986283485805e-08, | |
| "loss": 0.4123, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.9805144765329713, | |
| "grad_norm": 4.7194037437438965, | |
| "learning_rate": 2.024685347941202e-08, | |
| "loss": 0.4456, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.9834501486183993, | |
| "grad_norm": 4.34934139251709, | |
| "learning_rate": 1.4804808630112244e-08, | |
| "loss": 0.4383, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.9863858207038274, | |
| "grad_norm": 4.218658924102783, | |
| "learning_rate": 1.0212314750518426e-08, | |
| "loss": 0.4368, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.9893214927892554, | |
| "grad_norm": 4.213326454162598, | |
| "learning_rate": 6.469762574868866e-09, | |
| "loss": 0.4769, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.9922571648746835, | |
| "grad_norm": 3.9344699382781982, | |
| "learning_rate": 3.5774705234625783e-09, | |
| "loss": 0.4412, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.9951928369601115, | |
| "grad_norm": 3.622770071029663, | |
| "learning_rate": 1.5356846755654187e-09, | |
| "loss": 0.5059, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.9981285090455396, | |
| "grad_norm": 5.872491359710693, | |
| "learning_rate": 3.4457874847793063e-10, | |
| "loss": 0.4697, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.9998899122967965, | |
| "step": 3406, | |
| "total_flos": 3.2891568428128666e+18, | |
| "train_loss": 0.7668657067657847, | |
| "train_runtime": 24228.5265, | |
| "train_samples_per_second": 4.499, | |
| "train_steps_per_second": 0.141 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 3406, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.2891568428128666e+18, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |