| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.9961261759822913, |
| "eval_steps": 500, |
| "global_step": 3612, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.011068068622025456, |
| "grad_norm": 111.91503449772634, |
| "learning_rate": 9.94475138121547e-07, |
| "loss": 3.4549, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.02213613724405091, |
| "grad_norm": 7.922807222950788, |
| "learning_rate": 2.0994475138121547e-06, |
| "loss": 1.1747, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.03320420586607637, |
| "grad_norm": 4.5465846973535, |
| "learning_rate": 3.204419889502763e-06, |
| "loss": 0.6454, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.04427227448810182, |
| "grad_norm": 5.530354729245049, |
| "learning_rate": 4.309392265193371e-06, |
| "loss": 0.5239, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.05534034311012728, |
| "grad_norm": 5.077908329619468, |
| "learning_rate": 5.414364640883978e-06, |
| "loss": 0.4425, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.06640841173215274, |
| "grad_norm": 9.192826009944866, |
| "learning_rate": 6.5193370165745865e-06, |
| "loss": 0.3716, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0774764803541782, |
| "grad_norm": 3.003413039290293, |
| "learning_rate": 7.624309392265194e-06, |
| "loss": 0.3749, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.08854454897620365, |
| "grad_norm": 3.723093767166122, |
| "learning_rate": 8.729281767955802e-06, |
| "loss": 0.3347, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0996126175982291, |
| "grad_norm": 2.9657326866531983, |
| "learning_rate": 9.834254143646411e-06, |
| "loss": 0.2678, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.11068068622025456, |
| "grad_norm": 5.900212741354982, |
| "learning_rate": 1.0939226519337018e-05, |
| "loss": 0.2791, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.12174875484228002, |
| "grad_norm": 2.69943519276576, |
| "learning_rate": 1.2044198895027625e-05, |
| "loss": 0.2175, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.13281682346430548, |
| "grad_norm": 2.734146918784431, |
| "learning_rate": 1.3149171270718234e-05, |
| "loss": 0.2148, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.14388489208633093, |
| "grad_norm": 4.568726147434186, |
| "learning_rate": 1.425414364640884e-05, |
| "loss": 0.1885, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.1549529607083564, |
| "grad_norm": 2.826200663681499, |
| "learning_rate": 1.535911602209945e-05, |
| "loss": 0.1842, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.16602102933038185, |
| "grad_norm": 4.656857528648145, |
| "learning_rate": 1.6464088397790058e-05, |
| "loss": 0.1586, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.1770890979524073, |
| "grad_norm": 2.6844969949298103, |
| "learning_rate": 1.7569060773480663e-05, |
| "loss": 0.1412, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.18815716657443277, |
| "grad_norm": 3.213817850319435, |
| "learning_rate": 1.8674033149171272e-05, |
| "loss": 0.1398, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.1992252351964582, |
| "grad_norm": 2.1465861431244138, |
| "learning_rate": 1.977900552486188e-05, |
| "loss": 0.1242, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.21029330381848368, |
| "grad_norm": 2.6793701438567465, |
| "learning_rate": 1.9999731708850868e-05, |
| "loss": 0.1232, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.22136137244050913, |
| "grad_norm": 1.5667080473228716, |
| "learning_rate": 1.999864180073034e-05, |
| "loss": 0.1174, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.23242944106253458, |
| "grad_norm": 2.037240256627066, |
| "learning_rate": 1.9996713598750047e-05, |
| "loss": 0.1067, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.24349750968456005, |
| "grad_norm": 1.8670470335951264, |
| "learning_rate": 1.999394726457193e-05, |
| "loss": 0.1005, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.2545655783065855, |
| "grad_norm": 4.718021586405129, |
| "learning_rate": 1.9990343030127588e-05, |
| "loss": 0.0966, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.26563364692861097, |
| "grad_norm": 1.5167562544095952, |
| "learning_rate": 1.998590119759882e-05, |
| "loss": 0.084, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.27670171555063644, |
| "grad_norm": 1.474380476616429, |
| "learning_rate": 1.998062213939231e-05, |
| "loss": 0.0843, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.28776978417266186, |
| "grad_norm": 1.7009367999004055, |
| "learning_rate": 1.997450629810837e-05, |
| "loss": 0.077, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.29883785279468733, |
| "grad_norm": 1.3054232841373299, |
| "learning_rate": 1.9967554186503864e-05, |
| "loss": 0.0645, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.3099059214167128, |
| "grad_norm": 1.4759142904818798, |
| "learning_rate": 1.9959766387449203e-05, |
| "loss": 0.0652, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.3209739900387382, |
| "grad_norm": 3.1917585407150324, |
| "learning_rate": 1.995114355387949e-05, |
| "loss": 0.0646, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.3320420586607637, |
| "grad_norm": 1.897149045890919, |
| "learning_rate": 1.9941686408739748e-05, |
| "loss": 0.0636, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.34311012728278917, |
| "grad_norm": 1.2297497325088935, |
| "learning_rate": 1.9931395744924345e-05, |
| "loss": 0.0573, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.3541781959048146, |
| "grad_norm": 2.120351393194103, |
| "learning_rate": 1.992027242521049e-05, |
| "loss": 0.0552, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.36524626452684006, |
| "grad_norm": 1.1544320003426745, |
| "learning_rate": 1.9908317382185904e-05, |
| "loss": 0.0509, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.37631433314886553, |
| "grad_norm": 1.5719324649495832, |
| "learning_rate": 1.9895531618170646e-05, |
| "loss": 0.055, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.387382401770891, |
| "grad_norm": 2.088937323356875, |
| "learning_rate": 1.9881916205133057e-05, |
| "loss": 0.0507, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.3984504703929164, |
| "grad_norm": 1.0024237634001862, |
| "learning_rate": 1.9867472284599888e-05, |
| "loss": 0.0458, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.4095185390149419, |
| "grad_norm": 0.8620888697564176, |
| "learning_rate": 1.9852201067560607e-05, |
| "loss": 0.0432, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.42058660763696737, |
| "grad_norm": 0.6606971753051816, |
| "learning_rate": 1.9836103834365864e-05, |
| "loss": 0.0444, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.4316546762589928, |
| "grad_norm": 0.9387070037035307, |
| "learning_rate": 1.9819181934620128e-05, |
| "loss": 0.0403, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.44272274488101826, |
| "grad_norm": 0.9093244429730171, |
| "learning_rate": 1.9801436787068563e-05, |
| "loss": 0.0372, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.45379081350304373, |
| "grad_norm": 1.2531686851495056, |
| "learning_rate": 1.9782869879478058e-05, |
| "loss": 0.0415, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.46485888212506915, |
| "grad_norm": 0.907356018111985, |
| "learning_rate": 1.9763482768512507e-05, |
| "loss": 0.0374, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.4759269507470946, |
| "grad_norm": 0.5914417873894128, |
| "learning_rate": 1.974327707960228e-05, |
| "loss": 0.0383, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.4869950193691201, |
| "grad_norm": 0.9164172705484467, |
| "learning_rate": 1.972225450680796e-05, |
| "loss": 0.03, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.49806308799114557, |
| "grad_norm": 1.1027419481663168, |
| "learning_rate": 1.9700416812678303e-05, |
| "loss": 0.0349, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.509131156613171, |
| "grad_norm": 0.8422050935800688, |
| "learning_rate": 1.9677765828102477e-05, |
| "loss": 0.0346, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.5201992252351965, |
| "grad_norm": 0.6238402717183468, |
| "learning_rate": 1.9654303452156535e-05, |
| "loss": 0.0315, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.5312672938572219, |
| "grad_norm": 1.2101485253397948, |
| "learning_rate": 1.963003165194422e-05, |
| "loss": 0.0348, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.5423353624792474, |
| "grad_norm": 0.697551293992083, |
| "learning_rate": 1.9604952462432032e-05, |
| "loss": 0.0325, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.5534034311012729, |
| "grad_norm": 0.5906516672972566, |
| "learning_rate": 1.957906798627861e-05, |
| "loss": 0.0239, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.5644714997232982, |
| "grad_norm": 1.117787904345762, |
| "learning_rate": 1.955238039365845e-05, |
| "loss": 0.0286, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.5755395683453237, |
| "grad_norm": 0.7157423697495711, |
| "learning_rate": 1.952489192207995e-05, |
| "loss": 0.0342, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.5866076369673492, |
| "grad_norm": 1.0572948484139497, |
| "learning_rate": 1.9496604876197826e-05, |
| "loss": 0.0272, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.5976757055893747, |
| "grad_norm": 1.031127851727682, |
| "learning_rate": 1.9467521627619874e-05, |
| "loss": 0.0266, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.6087437742114001, |
| "grad_norm": 0.6403187680270812, |
| "learning_rate": 1.9437644614708143e-05, |
| "loss": 0.0272, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.6198118428334256, |
| "grad_norm": 1.062190185802606, |
| "learning_rate": 1.94069763423745e-05, |
| "loss": 0.0298, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.630879911455451, |
| "grad_norm": 0.7801274055237428, |
| "learning_rate": 1.9375519381870608e-05, |
| "loss": 0.0242, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.6419479800774764, |
| "grad_norm": 1.2015873253294118, |
| "learning_rate": 1.9343276370572357e-05, |
| "loss": 0.0227, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.6530160486995019, |
| "grad_norm": 0.6941953274977626, |
| "learning_rate": 1.9310250011758752e-05, |
| "loss": 0.0235, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.6640841173215274, |
| "grad_norm": 0.7439604839898615, |
| "learning_rate": 1.9276443074385246e-05, |
| "loss": 0.0225, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.6751521859435529, |
| "grad_norm": 0.4090201825493306, |
| "learning_rate": 1.9241858392851612e-05, |
| "loss": 0.0171, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.6862202545655783, |
| "grad_norm": 0.86586702597483, |
| "learning_rate": 1.920649886676429e-05, |
| "loss": 0.0204, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.6972883231876038, |
| "grad_norm": 0.5684391358174806, |
| "learning_rate": 1.917036746069329e-05, |
| "loss": 0.0193, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.7083563918096292, |
| "grad_norm": 1.2006453142034454, |
| "learning_rate": 1.913346720392363e-05, |
| "loss": 0.0183, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.7194244604316546, |
| "grad_norm": 0.5095199603894148, |
| "learning_rate": 1.909580119020138e-05, |
| "loss": 0.0171, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.7304925290536801, |
| "grad_norm": 0.6222791362666046, |
| "learning_rate": 1.9057372577474244e-05, |
| "loss": 0.0203, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.7415605976757056, |
| "grad_norm": 0.5784262806138188, |
| "learning_rate": 1.901818458762683e-05, |
| "loss": 0.0187, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.7526286662977311, |
| "grad_norm": 0.3384795920975776, |
| "learning_rate": 1.897824050621051e-05, |
| "loss": 0.0205, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.7636967349197565, |
| "grad_norm": 0.7403008730689107, |
| "learning_rate": 1.893754368216796e-05, |
| "loss": 0.0144, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.774764803541782, |
| "grad_norm": 0.6886290123638243, |
| "learning_rate": 1.8896097527552362e-05, |
| "loss": 0.018, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.7858328721638074, |
| "grad_norm": 0.7094773397852386, |
| "learning_rate": 1.8853905517241384e-05, |
| "loss": 0.019, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.7969009407858328, |
| "grad_norm": 0.37028282697220893, |
| "learning_rate": 1.8810971188645775e-05, |
| "loss": 0.016, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.8079690094078583, |
| "grad_norm": 0.5783298917051538, |
| "learning_rate": 1.876729814141286e-05, |
| "loss": 0.0155, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.8190370780298838, |
| "grad_norm": 0.817934182382838, |
| "learning_rate": 1.8722890037124674e-05, |
| "loss": 0.0181, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.8301051466519093, |
| "grad_norm": 0.42757623486015045, |
| "learning_rate": 1.8677750598991023e-05, |
| "loss": 0.0121, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.8411732152739347, |
| "grad_norm": 0.6738036479530188, |
| "learning_rate": 1.863188361153731e-05, |
| "loss": 0.0168, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.8522412838959601, |
| "grad_norm": 0.4240964815744989, |
| "learning_rate": 1.8585292920287217e-05, |
| "loss": 0.0121, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.8633093525179856, |
| "grad_norm": 0.6974893779227371, |
| "learning_rate": 1.8537982431440333e-05, |
| "loss": 0.0134, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.874377421140011, |
| "grad_norm": 0.6497026126998792, |
| "learning_rate": 1.8489956111544624e-05, |
| "loss": 0.0117, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.8854454897620365, |
| "grad_norm": 0.6623978615404418, |
| "learning_rate": 1.8441217987163874e-05, |
| "loss": 0.0123, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.896513558384062, |
| "grad_norm": 0.5858726137972703, |
| "learning_rate": 1.8391772144540127e-05, |
| "loss": 0.0171, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.9075816270060875, |
| "grad_norm": 1.103716010768291, |
| "learning_rate": 1.8341622729251062e-05, |
| "loss": 0.0142, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.9186496956281129, |
| "grad_norm": 0.5032363986196893, |
| "learning_rate": 1.8290773945862428e-05, |
| "loss": 0.0149, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.9297177642501383, |
| "grad_norm": 0.5622836058996923, |
| "learning_rate": 1.8239230057575542e-05, |
| "loss": 0.0136, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.9407858328721638, |
| "grad_norm": 0.6104096448589974, |
| "learning_rate": 1.8186995385869857e-05, |
| "loss": 0.0127, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.9518539014941892, |
| "grad_norm": 0.7843382280116427, |
| "learning_rate": 1.8134074310140638e-05, |
| "loss": 0.0139, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.9629219701162147, |
| "grad_norm": 0.5746764736583982, |
| "learning_rate": 1.8080471267331792e-05, |
| "loss": 0.0125, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.9739900387382402, |
| "grad_norm": 0.43873748930902157, |
| "learning_rate": 1.8026190751563874e-05, |
| "loss": 0.0127, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.9850581073602657, |
| "grad_norm": 0.9328533127381619, |
| "learning_rate": 1.79712373137573e-05, |
| "loss": 0.0097, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.9961261759822911, |
| "grad_norm": 0.684178143244021, |
| "learning_rate": 1.7915615561250783e-05, |
| "loss": 0.0085, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.0066408411732153, |
| "grad_norm": 0.21673732820638353, |
| "learning_rate": 1.7859330157415065e-05, |
| "loss": 0.0092, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.0177089097952408, |
| "grad_norm": 0.27867910779840166, |
| "learning_rate": 1.7802385821261922e-05, |
| "loss": 0.0096, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.0287769784172662, |
| "grad_norm": 0.11162636675632644, |
| "learning_rate": 1.7744787327048533e-05, |
| "loss": 0.0084, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.0398450470392917, |
| "grad_norm": 0.22048120465803261, |
| "learning_rate": 1.768653950387718e-05, |
| "loss": 0.0078, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.0509131156613172, |
| "grad_norm": 0.6765205253168616, |
| "learning_rate": 1.7627647235290407e-05, |
| "loss": 0.0068, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.0619811842833426, |
| "grad_norm": 0.4689878039036384, |
| "learning_rate": 1.7568115458861542e-05, |
| "loss": 0.0074, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.073049252905368, |
| "grad_norm": 0.5676023150735331, |
| "learning_rate": 1.7507949165780753e-05, |
| "loss": 0.007, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.0841173215273934, |
| "grad_norm": 0.7911642935038967, |
| "learning_rate": 1.7447153400436577e-05, |
| "loss": 0.0088, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.0951853901494188, |
| "grad_norm": 0.5077417518483057, |
| "learning_rate": 1.738573325999299e-05, |
| "loss": 0.0076, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.1062534587714443, |
| "grad_norm": 0.5625579004448811, |
| "learning_rate": 1.7323693893962055e-05, |
| "loss": 0.0089, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.1173215273934698, |
| "grad_norm": 0.39598238596354546, |
| "learning_rate": 1.7261040503772187e-05, |
| "loss": 0.008, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.1283895960154953, |
| "grad_norm": 0.39572470853097774, |
| "learning_rate": 1.7197778342332075e-05, |
| "loss": 0.0068, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.1394576646375207, |
| "grad_norm": 0.6313738026150261, |
| "learning_rate": 1.7133912713590243e-05, |
| "loss": 0.0122, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.1505257332595462, |
| "grad_norm": 0.6255276960016989, |
| "learning_rate": 1.7069448972090387e-05, |
| "loss": 0.0085, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.1615938018815717, |
| "grad_norm": 0.4559474668836657, |
| "learning_rate": 1.700439252252244e-05, |
| "loss": 0.0069, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.1726618705035972, |
| "grad_norm": 0.2959471392332271, |
| "learning_rate": 1.6938748819269436e-05, |
| "loss": 0.0082, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.1837299391256226, |
| "grad_norm": 0.4089461613308411, |
| "learning_rate": 1.6872523365950218e-05, |
| "loss": 0.0081, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.194798007747648, |
| "grad_norm": 0.1656790995662235, |
| "learning_rate": 1.6805721714957995e-05, |
| "loss": 0.006, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.2058660763696736, |
| "grad_norm": 0.9361330580444202, |
| "learning_rate": 1.6738349466994837e-05, |
| "loss": 0.0064, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.2169341449916988, |
| "grad_norm": 0.21493739963899747, |
| "learning_rate": 1.6670412270602115e-05, |
| "loss": 0.0071, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.2280022136137245, |
| "grad_norm": 0.558379730610169, |
| "learning_rate": 1.6601915821686895e-05, |
| "loss": 0.0056, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.2390702822357498, |
| "grad_norm": 0.4220154829452254, |
| "learning_rate": 1.6532865863044424e-05, |
| "loss": 0.0074, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.2501383508577752, |
| "grad_norm": 0.08715888601622518, |
| "learning_rate": 1.6463268183876627e-05, |
| "loss": 0.0095, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.2612064194798007, |
| "grad_norm": 0.9325238058616875, |
| "learning_rate": 1.6393128619306734e-05, |
| "loss": 0.0087, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.2722744881018262, |
| "grad_norm": 0.3113643778329001, |
| "learning_rate": 1.6322453049890078e-05, |
| "loss": 0.0073, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.2833425567238517, |
| "grad_norm": 0.2476875728900236, |
| "learning_rate": 1.625124740112104e-05, |
| "loss": 0.0062, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.2944106253458771, |
| "grad_norm": 0.21826634116425128, |
| "learning_rate": 1.617951764293628e-05, |
| "loss": 0.008, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.3054786939679026, |
| "grad_norm": 0.48793358414259447, |
| "learning_rate": 1.610726978921418e-05, |
| "loss": 0.0085, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.316546762589928, |
| "grad_norm": 0.4618692190051027, |
| "learning_rate": 1.603450989727066e-05, |
| "loss": 0.0079, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.3276148312119536, |
| "grad_norm": 0.5286134233635146, |
| "learning_rate": 1.5961244067351326e-05, |
| "loss": 0.0072, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.338682899833979, |
| "grad_norm": 0.16659699644844692, |
| "learning_rate": 1.5887478442120007e-05, |
| "loss": 0.0074, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.3497509684560045, |
| "grad_norm": 0.48249847302009274, |
| "learning_rate": 1.5813219206143755e-05, |
| "loss": 0.0076, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.3608190370780298, |
| "grad_norm": 0.3524591713873095, |
| "learning_rate": 1.5738472585374334e-05, |
| "loss": 0.0058, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.3718871057000555, |
| "grad_norm": 0.08214795303298088, |
| "learning_rate": 1.566324484662624e-05, |
| "loss": 0.0061, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.3829551743220807, |
| "grad_norm": 0.4243354207285399, |
| "learning_rate": 1.5587542297051233e-05, |
| "loss": 0.0082, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.3940232429441062, |
| "grad_norm": 0.2496071649771549, |
| "learning_rate": 1.5511371283609622e-05, |
| "loss": 0.0058, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.4050913115661317, |
| "grad_norm": 0.1955901791520104, |
| "learning_rate": 1.5434738192538067e-05, |
| "loss": 0.0041, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.4161593801881571, |
| "grad_norm": 0.5300487568144705, |
| "learning_rate": 1.5357649448814177e-05, |
| "loss": 0.0111, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.4272274488101826, |
| "grad_norm": 0.39956657844001175, |
| "learning_rate": 1.5280111515617835e-05, |
| "loss": 0.0095, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.438295517432208, |
| "grad_norm": 0.5470821515861973, |
| "learning_rate": 1.520213089378931e-05, |
| "loss": 0.0079, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.4493635860542335, |
| "grad_norm": 0.31965121329752255, |
| "learning_rate": 1.512371412128424e-05, |
| "loss": 0.0063, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.460431654676259, |
| "grad_norm": 0.2941816190422621, |
| "learning_rate": 1.5044867772625455e-05, |
| "loss": 0.0061, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.4714997232982845, |
| "grad_norm": 0.3564346350090648, |
| "learning_rate": 1.4965598458351797e-05, |
| "loss": 0.0057, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.48256779192031, |
| "grad_norm": 0.28234215930894024, |
| "learning_rate": 1.4885912824463875e-05, |
| "loss": 0.0059, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.4936358605423354, |
| "grad_norm": 0.8066370524907985, |
| "learning_rate": 1.4805817551866839e-05, |
| "loss": 0.0148, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.5047039291643607, |
| "grad_norm": 0.5977966209875002, |
| "learning_rate": 1.4725319355810282e-05, |
| "loss": 0.0218, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.5157719977863864, |
| "grad_norm": 0.6001415723968081, |
| "learning_rate": 1.4644424985325198e-05, |
| "loss": 0.019, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.5268400664084116, |
| "grad_norm": 0.4494259779868367, |
| "learning_rate": 1.4563141222658163e-05, |
| "loss": 0.0107, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.5379081350304373, |
| "grad_norm": 0.6458187453604811, |
| "learning_rate": 1.4481474882702688e-05, |
| "loss": 0.0135, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.5489762036524626, |
| "grad_norm": 0.45245571994179906, |
| "learning_rate": 1.4399432812427862e-05, |
| "loss": 0.0134, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.560044272274488, |
| "grad_norm": 0.4600542174327164, |
| "learning_rate": 1.4317021890304294e-05, |
| "loss": 0.0072, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.5711123408965135, |
| "grad_norm": 0.31360771518664865, |
| "learning_rate": 1.4234249025727419e-05, |
| "loss": 0.0088, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.582180409518539, |
| "grad_norm": 0.3841495404827664, |
| "learning_rate": 1.4151121158438195e-05, |
| "loss": 0.0056, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.5932484781405645, |
| "grad_norm": 0.46969355836910825, |
| "learning_rate": 1.4067645257941308e-05, |
| "loss": 0.0054, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.60431654676259, |
| "grad_norm": 0.3163537210189324, |
| "learning_rate": 1.3983828322920786e-05, |
| "loss": 0.005, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.6153846153846154, |
| "grad_norm": 0.15773797744824763, |
| "learning_rate": 1.3899677380653276e-05, |
| "loss": 0.0035, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.626452684006641, |
| "grad_norm": 0.49536783037004123, |
| "learning_rate": 1.3815199486418851e-05, |
| "loss": 0.0045, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.6375207526286664, |
| "grad_norm": 0.10176197013762307, |
| "learning_rate": 1.3730401722909479e-05, |
| "loss": 0.0032, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.6485888212506916, |
| "grad_norm": 0.6691956653694718, |
| "learning_rate": 1.3645291199635218e-05, |
| "loss": 0.0045, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.6596568898727173, |
| "grad_norm": 0.3794166881995434, |
| "learning_rate": 1.355987505232815e-05, |
| "loss": 0.0076, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.6707249584947426, |
| "grad_norm": 0.2887141429200375, |
| "learning_rate": 1.3474160442344118e-05, |
| "loss": 0.0065, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.6817930271167683, |
| "grad_norm": 0.38976801790866195, |
| "learning_rate": 1.3388154556062292e-05, |
| "loss": 0.0037, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.6928610957387935, |
| "grad_norm": 0.2228416804146198, |
| "learning_rate": 1.330186460428268e-05, |
| "loss": 0.0032, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.703929164360819, |
| "grad_norm": 0.09090228950977586, |
| "learning_rate": 1.3215297821621565e-05, |
| "loss": 0.0037, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.7149972329828445, |
| "grad_norm": 0.6251199746615973, |
| "learning_rate": 1.3128461465904938e-05, |
| "loss": 0.0037, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.72606530160487, |
| "grad_norm": 0.2501159476688824, |
| "learning_rate": 1.3041362817560007e-05, |
| "loss": 0.0033, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.7371333702268954, |
| "grad_norm": 0.32559821371482117, |
| "learning_rate": 1.2954009179004794e-05, |
| "loss": 0.0025, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.7482014388489209, |
| "grad_norm": 0.3658704749959391, |
| "learning_rate": 1.2866407874035904e-05, |
| "loss": 0.0051, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.7592695074709463, |
| "grad_norm": 0.17147399439018965, |
| "learning_rate": 1.2778566247214474e-05, |
| "loss": 0.0045, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.7703375760929718, |
| "grad_norm": 0.1551464032194942, |
| "learning_rate": 1.2690491663250428e-05, |
| "loss": 0.0018, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.7814056447149973, |
| "grad_norm": 0.5301425104181171, |
| "learning_rate": 1.260219150638498e-05, |
| "loss": 0.0044, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.7924737133370225, |
| "grad_norm": 0.44158919846599415, |
| "learning_rate": 1.2513673179771555e-05, |
| "loss": 0.0062, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.8035417819590482, |
| "grad_norm": 0.1730696100779873, |
| "learning_rate": 1.2424944104855107e-05, |
| "loss": 0.0032, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.8146098505810735, |
| "grad_norm": 0.05809913714960146, |
| "learning_rate": 1.2336011720749881e-05, |
| "loss": 0.0032, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.8256779192030992, |
| "grad_norm": 0.15220878659480627, |
| "learning_rate": 1.2246883483615731e-05, |
| "loss": 0.0024, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.8367459878251244, |
| "grad_norm": 0.44313561326203266, |
| "learning_rate": 1.215756686603296e-05, |
| "loss": 0.0034, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.8478140564471501, |
| "grad_norm": 1.1214134555289978, |
| "learning_rate": 1.2068069356375864e-05, |
| "loss": 0.0044, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.8588821250691754, |
| "grad_norm": 0.37158504715550505, |
| "learning_rate": 1.1978398458184848e-05, |
| "loss": 0.0063, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.8699501936912009, |
| "grad_norm": 0.050902224376628516, |
| "learning_rate": 1.188856168953735e-05, |
| "loss": 0.0026, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.8810182623132263, |
| "grad_norm": 0.16578056285472928, |
| "learning_rate": 1.1798566582417521e-05, |
| "loss": 0.0038, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.8920863309352518, |
| "grad_norm": 0.29387428387180203, |
| "learning_rate": 1.1708420682084722e-05, |
| "loss": 0.0032, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.9031543995572773, |
| "grad_norm": 0.3729500071630355, |
| "learning_rate": 1.1618131546440949e-05, |
| "loss": 0.0033, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.9142224681793027, |
| "grad_norm": 0.8756632832474044, |
| "learning_rate": 1.1527706745397143e-05, |
| "loss": 0.0021, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.9252905368013282, |
| "grad_norm": 0.29073014043576567, |
| "learning_rate": 1.1437153860238541e-05, |
| "loss": 0.0041, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.9363586054233535, |
| "grad_norm": 0.2886330704551084, |
| "learning_rate": 1.1346480482989055e-05, |
| "loss": 0.003, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.9474266740453792, |
| "grad_norm": 0.08054022503195961, |
| "learning_rate": 1.1255694215774743e-05, |
| "loss": 0.0033, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.9584947426674044, |
| "grad_norm": 0.34933906854298136, |
| "learning_rate": 1.1164802670186448e-05, |
| "loss": 0.0042, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.9695628112894301, |
| "grad_norm": 0.26977238441720924, |
| "learning_rate": 1.1073813466641633e-05, |
| "loss": 0.0037, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.9806308799114554, |
| "grad_norm": 0.24400676151661493, |
| "learning_rate": 1.0982734233745473e-05, |
| "loss": 0.0024, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.991698948533481, |
| "grad_norm": 0.32209501603512203, |
| "learning_rate": 1.0891572607651281e-05, |
| "loss": 0.0031, |
| "step": 1800 |
| }, |
| { |
| "epoch": 2.002213613724405, |
| "grad_norm": 0.04360741823732105, |
| "learning_rate": 1.0800336231420278e-05, |
| "loss": 0.0029, |
| "step": 1810 |
| }, |
| { |
| "epoch": 2.0132816823464306, |
| "grad_norm": 0.16855758165616114, |
| "learning_rate": 1.0709032754380797e-05, |
| "loss": 0.0025, |
| "step": 1820 |
| }, |
| { |
| "epoch": 2.024349750968456, |
| "grad_norm": 0.15666932651188484, |
| "learning_rate": 1.0617669831486944e-05, |
| "loss": 0.0013, |
| "step": 1830 |
| }, |
| { |
| "epoch": 2.0354178195904815, |
| "grad_norm": 0.20181179745149389, |
| "learning_rate": 1.0526255122676823e-05, |
| "loss": 0.0026, |
| "step": 1840 |
| }, |
| { |
| "epoch": 2.0464858882125068, |
| "grad_norm": 0.15003685273332168, |
| "learning_rate": 1.0434796292230303e-05, |
| "loss": 0.0008, |
| "step": 1850 |
| }, |
| { |
| "epoch": 2.0575539568345325, |
| "grad_norm": 0.00804358006410917, |
| "learning_rate": 1.0343301008126447e-05, |
| "loss": 0.0016, |
| "step": 1860 |
| }, |
| { |
| "epoch": 2.0686220254565577, |
| "grad_norm": 0.16774600983369314, |
| "learning_rate": 1.025177694140062e-05, |
| "loss": 0.0016, |
| "step": 1870 |
| }, |
| { |
| "epoch": 2.0796900940785834, |
| "grad_norm": 0.20346726127119819, |
| "learning_rate": 1.0160231765501345e-05, |
| "loss": 0.0019, |
| "step": 1880 |
| }, |
| { |
| "epoch": 2.0907581627006087, |
| "grad_norm": 0.08982622627628309, |
| "learning_rate": 1.006867315564696e-05, |
| "loss": 0.0012, |
| "step": 1890 |
| }, |
| { |
| "epoch": 2.1018262313226344, |
| "grad_norm": 0.12775435302757068, |
| "learning_rate": 9.977108788182104e-06, |
| "loss": 0.0017, |
| "step": 1900 |
| }, |
| { |
| "epoch": 2.1128942999446596, |
| "grad_norm": 0.2643672894526569, |
| "learning_rate": 9.885546339934145e-06, |
| "loss": 0.0016, |
| "step": 1910 |
| }, |
| { |
| "epoch": 2.1239623685666853, |
| "grad_norm": 0.03906918847071518, |
| "learning_rate": 9.793993487569544e-06, |
| "loss": 0.0023, |
| "step": 1920 |
| }, |
| { |
| "epoch": 2.1350304371887106, |
| "grad_norm": 0.01975929518029572, |
| "learning_rate": 9.702457906950235e-06, |
| "loss": 0.0006, |
| "step": 1930 |
| }, |
| { |
| "epoch": 2.146098505810736, |
| "grad_norm": 0.015510480606972247, |
| "learning_rate": 9.610947272490077e-06, |
| "loss": 0.0025, |
| "step": 1940 |
| }, |
| { |
| "epoch": 2.1571665744327615, |
| "grad_norm": 0.3546534218940891, |
| "learning_rate": 9.519469256511415e-06, |
| "loss": 0.0023, |
| "step": 1950 |
| }, |
| { |
| "epoch": 2.1682346430547867, |
| "grad_norm": 0.01060473919463944, |
| "learning_rate": 9.428031528601846e-06, |
| "loss": 0.0004, |
| "step": 1960 |
| }, |
| { |
| "epoch": 2.1793027116768124, |
| "grad_norm": 0.22104920814353005, |
| "learning_rate": 9.336641754971183e-06, |
| "loss": 0.0014, |
| "step": 1970 |
| }, |
| { |
| "epoch": 2.1903707802988377, |
| "grad_norm": 0.08689911675197709, |
| "learning_rate": 9.245307597808702e-06, |
| "loss": 0.0005, |
| "step": 1980 |
| }, |
| { |
| "epoch": 2.2014388489208634, |
| "grad_norm": 0.0883619792463627, |
| "learning_rate": 9.154036714640768e-06, |
| "loss": 0.0007, |
| "step": 1990 |
| }, |
| { |
| "epoch": 2.2125069175428886, |
| "grad_norm": 0.15082305658963324, |
| "learning_rate": 9.0628367576888e-06, |
| "loss": 0.0012, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.2235749861649143, |
| "grad_norm": 0.07102549960011285, |
| "learning_rate": 8.971715373227704e-06, |
| "loss": 0.0023, |
| "step": 2010 |
| }, |
| { |
| "epoch": 2.2346430547869396, |
| "grad_norm": 0.27718384861440826, |
| "learning_rate": 8.880680200944812e-06, |
| "loss": 0.001, |
| "step": 2020 |
| }, |
| { |
| "epoch": 2.2457111234089653, |
| "grad_norm": 0.39784384927805344, |
| "learning_rate": 8.789738873299356e-06, |
| "loss": 0.0014, |
| "step": 2030 |
| }, |
| { |
| "epoch": 2.2567791920309905, |
| "grad_norm": 0.10575032505942293, |
| "learning_rate": 8.698899014882572e-06, |
| "loss": 0.0007, |
| "step": 2040 |
| }, |
| { |
| "epoch": 2.2678472606530162, |
| "grad_norm": 0.015437632384868543, |
| "learning_rate": 8.60816824177842e-06, |
| "loss": 0.0004, |
| "step": 2050 |
| }, |
| { |
| "epoch": 2.2789153292750415, |
| "grad_norm": 0.013827548095505848, |
| "learning_rate": 8.517554160925073e-06, |
| "loss": 0.0013, |
| "step": 2060 |
| }, |
| { |
| "epoch": 2.2899833978970667, |
| "grad_norm": 0.1548641691681526, |
| "learning_rate": 8.42706436947714e-06, |
| "loss": 0.0009, |
| "step": 2070 |
| }, |
| { |
| "epoch": 2.3010514665190924, |
| "grad_norm": 0.1347709020457703, |
| "learning_rate": 8.336706454168701e-06, |
| "loss": 0.0012, |
| "step": 2080 |
| }, |
| { |
| "epoch": 2.312119535141118, |
| "grad_norm": 0.008104450008335772, |
| "learning_rate": 8.246487990677242e-06, |
| "loss": 0.0008, |
| "step": 2090 |
| }, |
| { |
| "epoch": 2.3231876037631434, |
| "grad_norm": 0.0069746001251187765, |
| "learning_rate": 8.156416542988505e-06, |
| "loss": 0.0005, |
| "step": 2100 |
| }, |
| { |
| "epoch": 2.3342556723851686, |
| "grad_norm": 0.06413164810520909, |
| "learning_rate": 8.066499662762312e-06, |
| "loss": 0.0017, |
| "step": 2110 |
| }, |
| { |
| "epoch": 2.3453237410071943, |
| "grad_norm": 0.27922715524820757, |
| "learning_rate": 7.976744888699416e-06, |
| "loss": 0.0005, |
| "step": 2120 |
| }, |
| { |
| "epoch": 2.3563918096292196, |
| "grad_norm": 0.1170676822125543, |
| "learning_rate": 7.887159745909484e-06, |
| "loss": 0.0023, |
| "step": 2130 |
| }, |
| { |
| "epoch": 2.3674598782512453, |
| "grad_norm": 0.09899495190658357, |
| "learning_rate": 7.797751745280153e-06, |
| "loss": 0.0012, |
| "step": 2140 |
| }, |
| { |
| "epoch": 2.3785279468732705, |
| "grad_norm": 0.1669162943718525, |
| "learning_rate": 7.708528382847333e-06, |
| "loss": 0.0017, |
| "step": 2150 |
| }, |
| { |
| "epoch": 2.389596015495296, |
| "grad_norm": 0.04531028241647312, |
| "learning_rate": 7.6194971391667126e-06, |
| "loss": 0.0012, |
| "step": 2160 |
| }, |
| { |
| "epoch": 2.4006640841173215, |
| "grad_norm": 0.024530073059923975, |
| "learning_rate": 7.530665478686613e-06, |
| "loss": 0.0006, |
| "step": 2170 |
| }, |
| { |
| "epoch": 2.411732152739347, |
| "grad_norm": 0.1393568845665947, |
| "learning_rate": 7.442040849122127e-06, |
| "loss": 0.0007, |
| "step": 2180 |
| }, |
| { |
| "epoch": 2.4228002213613724, |
| "grad_norm": 0.11837513907578925, |
| "learning_rate": 7.3536306808307256e-06, |
| "loss": 0.0008, |
| "step": 2190 |
| }, |
| { |
| "epoch": 2.4338682899833977, |
| "grad_norm": 0.2239767609538588, |
| "learning_rate": 7.265442386189281e-06, |
| "loss": 0.0006, |
| "step": 2200 |
| }, |
| { |
| "epoch": 2.4449363586054234, |
| "grad_norm": 0.03502224363169152, |
| "learning_rate": 7.177483358972607e-06, |
| "loss": 0.0007, |
| "step": 2210 |
| }, |
| { |
| "epoch": 2.456004427227449, |
| "grad_norm": 0.004121217145321139, |
| "learning_rate": 7.089760973733553e-06, |
| "loss": 0.0005, |
| "step": 2220 |
| }, |
| { |
| "epoch": 2.4670724958494743, |
| "grad_norm": 0.0080795156182359, |
| "learning_rate": 7.002282585184731e-06, |
| "loss": 0.0015, |
| "step": 2230 |
| }, |
| { |
| "epoch": 2.4781405644714996, |
| "grad_norm": 0.018145033752889977, |
| "learning_rate": 6.915055527581878e-06, |
| "loss": 0.0006, |
| "step": 2240 |
| }, |
| { |
| "epoch": 2.4892086330935252, |
| "grad_norm": 0.019850233754552235, |
| "learning_rate": 6.8280871141089415e-06, |
| "loss": 0.0008, |
| "step": 2250 |
| }, |
| { |
| "epoch": 2.5002767017155505, |
| "grad_norm": 0.1741781894645272, |
| "learning_rate": 6.741384636264961e-06, |
| "loss": 0.0031, |
| "step": 2260 |
| }, |
| { |
| "epoch": 2.511344770337576, |
| "grad_norm": 0.04992280498804918, |
| "learning_rate": 6.6549553632527154e-06, |
| "loss": 0.0006, |
| "step": 2270 |
| }, |
| { |
| "epoch": 2.5224128389596014, |
| "grad_norm": 0.01759604589558905, |
| "learning_rate": 6.568806541369287e-06, |
| "loss": 0.0004, |
| "step": 2280 |
| }, |
| { |
| "epoch": 2.533480907581627, |
| "grad_norm": 0.005607314564319438, |
| "learning_rate": 6.4829453933985096e-06, |
| "loss": 0.0003, |
| "step": 2290 |
| }, |
| { |
| "epoch": 2.5445489762036524, |
| "grad_norm": 0.08607954611806186, |
| "learning_rate": 6.397379118005423e-06, |
| "loss": 0.0008, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.555617044825678, |
| "grad_norm": 0.2820276784438115, |
| "learning_rate": 6.312114889132721e-06, |
| "loss": 0.0005, |
| "step": 2310 |
| }, |
| { |
| "epoch": 2.5666851134477033, |
| "grad_norm": 0.006054427468933993, |
| "learning_rate": 6.227159855399276e-06, |
| "loss": 0.0004, |
| "step": 2320 |
| }, |
| { |
| "epoch": 2.5777531820697286, |
| "grad_norm": 0.020395979259911268, |
| "learning_rate": 6.142521139500803e-06, |
| "loss": 0.0005, |
| "step": 2330 |
| }, |
| { |
| "epoch": 2.5888212506917543, |
| "grad_norm": 0.005378706050493182, |
| "learning_rate": 6.058205837612694e-06, |
| "loss": 0.0017, |
| "step": 2340 |
| }, |
| { |
| "epoch": 2.59988931931378, |
| "grad_norm": 0.6929959495629392, |
| "learning_rate": 5.974221018795048e-06, |
| "loss": 0.0018, |
| "step": 2350 |
| }, |
| { |
| "epoch": 2.6109573879358052, |
| "grad_norm": 0.1430231752480323, |
| "learning_rate": 5.89057372440002e-06, |
| "loss": 0.0006, |
| "step": 2360 |
| }, |
| { |
| "epoch": 2.6220254565578305, |
| "grad_norm": 0.08736899606365109, |
| "learning_rate": 5.807270967481442e-06, |
| "loss": 0.0005, |
| "step": 2370 |
| }, |
| { |
| "epoch": 2.633093525179856, |
| "grad_norm": 0.20917966364677992, |
| "learning_rate": 5.724319732206878e-06, |
| "loss": 0.0003, |
| "step": 2380 |
| }, |
| { |
| "epoch": 2.6441615938018814, |
| "grad_norm": 0.020443284638463004, |
| "learning_rate": 5.6417269732720204e-06, |
| "loss": 0.0008, |
| "step": 2390 |
| }, |
| { |
| "epoch": 2.655229662423907, |
| "grad_norm": 0.006232574325332509, |
| "learning_rate": 5.559499615317652e-06, |
| "loss": 0.0015, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.6662977310459324, |
| "grad_norm": 0.09483140471952521, |
| "learning_rate": 5.477644552349033e-06, |
| "loss": 0.001, |
| "step": 2410 |
| }, |
| { |
| "epoch": 2.677365799667958, |
| "grad_norm": 0.01497083814614671, |
| "learning_rate": 5.396168647157942e-06, |
| "loss": 0.0001, |
| "step": 2420 |
| }, |
| { |
| "epoch": 2.6884338682899833, |
| "grad_norm": 0.062082528516403966, |
| "learning_rate": 5.315078730747268e-06, |
| "loss": 0.0014, |
| "step": 2430 |
| }, |
| { |
| "epoch": 2.699501936912009, |
| "grad_norm": 0.016768871806098665, |
| "learning_rate": 5.234381601758306e-06, |
| "loss": 0.0004, |
| "step": 2440 |
| }, |
| { |
| "epoch": 2.7105700055340343, |
| "grad_norm": 0.021142976199730865, |
| "learning_rate": 5.154084025900742e-06, |
| "loss": 0.0004, |
| "step": 2450 |
| }, |
| { |
| "epoch": 2.7216380741560595, |
| "grad_norm": 0.005663906697363443, |
| "learning_rate": 5.0741927353854305e-06, |
| "loss": 0.0001, |
| "step": 2460 |
| }, |
| { |
| "epoch": 2.732706142778085, |
| "grad_norm": 0.0015717000409481488, |
| "learning_rate": 4.994714428359936e-06, |
| "loss": 0.0007, |
| "step": 2470 |
| }, |
| { |
| "epoch": 2.743774211400111, |
| "grad_norm": 0.003253438286001577, |
| "learning_rate": 4.915655768346975e-06, |
| "loss": 0.0005, |
| "step": 2480 |
| }, |
| { |
| "epoch": 2.754842280022136, |
| "grad_norm": 0.30553434446246647, |
| "learning_rate": 4.837023383685736e-06, |
| "loss": 0.0004, |
| "step": 2490 |
| }, |
| { |
| "epoch": 2.7659103486441614, |
| "grad_norm": 0.0015060930301738903, |
| "learning_rate": 4.758823866976152e-06, |
| "loss": 0.0003, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.776978417266187, |
| "grad_norm": 0.0012735780749710956, |
| "learning_rate": 4.681063774526166e-06, |
| "loss": 0.0, |
| "step": 2510 |
| }, |
| { |
| "epoch": 2.7880464858882124, |
| "grad_norm": 0.09126349400935568, |
| "learning_rate": 4.603749625802051e-06, |
| "loss": 0.0003, |
| "step": 2520 |
| }, |
| { |
| "epoch": 2.799114554510238, |
| "grad_norm": 0.14258230843971909, |
| "learning_rate": 4.526887902881822e-06, |
| "loss": 0.0002, |
| "step": 2530 |
| }, |
| { |
| "epoch": 2.8101826231322633, |
| "grad_norm": 0.00165028758610408, |
| "learning_rate": 4.450485049911757e-06, |
| "loss": 0.0011, |
| "step": 2540 |
| }, |
| { |
| "epoch": 2.821250691754289, |
| "grad_norm": 0.0067956970685621836, |
| "learning_rate": 4.374547472566129e-06, |
| "loss": 0.0004, |
| "step": 2550 |
| }, |
| { |
| "epoch": 2.8323187603763142, |
| "grad_norm": 0.003682680710757902, |
| "learning_rate": 4.299081537510143e-06, |
| "loss": 0.0005, |
| "step": 2560 |
| }, |
| { |
| "epoch": 2.84338682899834, |
| "grad_norm": 0.2401069296675629, |
| "learning_rate": 4.2240935718661365e-06, |
| "loss": 0.0003, |
| "step": 2570 |
| }, |
| { |
| "epoch": 2.854454897620365, |
| "grad_norm": 0.005953507115694622, |
| "learning_rate": 4.149589862683141e-06, |
| "loss": 0.0002, |
| "step": 2580 |
| }, |
| { |
| "epoch": 2.8655229662423904, |
| "grad_norm": 0.0014731662378259323, |
| "learning_rate": 4.075576656409733e-06, |
| "loss": 0.0001, |
| "step": 2590 |
| }, |
| { |
| "epoch": 2.876591034864416, |
| "grad_norm": 0.0023579910574366416, |
| "learning_rate": 4.002060158370361e-06, |
| "loss": 0.0001, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.887659103486442, |
| "grad_norm": 0.0011945901783503858, |
| "learning_rate": 3.9290465322450685e-06, |
| "loss": 0.0004, |
| "step": 2610 |
| }, |
| { |
| "epoch": 2.898727172108467, |
| "grad_norm": 0.03407895985174983, |
| "learning_rate": 3.8565418995527185e-06, |
| "loss": 0.0001, |
| "step": 2620 |
| }, |
| { |
| "epoch": 2.9097952407304923, |
| "grad_norm": 0.004566715125346021, |
| "learning_rate": 3.7845523391377815e-06, |
| "loss": 0.0005, |
| "step": 2630 |
| }, |
| { |
| "epoch": 2.920863309352518, |
| "grad_norm": 0.005795874642656273, |
| "learning_rate": 3.7130838866606665e-06, |
| "loss": 0.0004, |
| "step": 2640 |
| }, |
| { |
| "epoch": 2.9319313779745433, |
| "grad_norm": 0.008770525441123975, |
| "learning_rate": 3.642142534091695e-06, |
| "loss": 0.0001, |
| "step": 2650 |
| }, |
| { |
| "epoch": 2.942999446596569, |
| "grad_norm": 0.0013301081718034352, |
| "learning_rate": 3.571734229208712e-06, |
| "loss": 0.0002, |
| "step": 2660 |
| }, |
| { |
| "epoch": 2.9540675152185942, |
| "grad_norm": 0.022671106846233965, |
| "learning_rate": 3.5018648750984473e-06, |
| "loss": 0.0, |
| "step": 2670 |
| }, |
| { |
| "epoch": 2.96513558384062, |
| "grad_norm": 0.0019084527889065164, |
| "learning_rate": 3.4325403296615677e-06, |
| "loss": 0.0001, |
| "step": 2680 |
| }, |
| { |
| "epoch": 2.976203652462645, |
| "grad_norm": 0.0012586049007444003, |
| "learning_rate": 3.3637664051215703e-06, |
| "loss": 0.0002, |
| "step": 2690 |
| }, |
| { |
| "epoch": 2.987271721084671, |
| "grad_norm": 0.008633880712666318, |
| "learning_rate": 3.2955488675374635e-06, |
| "loss": 0.0005, |
| "step": 2700 |
| }, |
| { |
| "epoch": 2.998339789706696, |
| "grad_norm": 0.006829643276901058, |
| "learning_rate": 3.227893436320353e-06, |
| "loss": 0.0004, |
| "step": 2710 |
| }, |
| { |
| "epoch": 3.0088544548976204, |
| "grad_norm": 0.0856770046438516, |
| "learning_rate": 3.1608057837538976e-06, |
| "loss": 0.0001, |
| "step": 2720 |
| }, |
| { |
| "epoch": 3.0199225235196456, |
| "grad_norm": 0.0009597305762044255, |
| "learning_rate": 3.0942915345187617e-06, |
| "loss": 0.0001, |
| "step": 2730 |
| }, |
| { |
| "epoch": 3.0309905921416713, |
| "grad_norm": 0.016142733928253637, |
| "learning_rate": 3.028356265221033e-06, |
| "loss": 0.0001, |
| "step": 2740 |
| }, |
| { |
| "epoch": 3.0420586607636966, |
| "grad_norm": 0.0007246525915734167, |
| "learning_rate": 2.963005503924674e-06, |
| "loss": 0.0, |
| "step": 2750 |
| }, |
| { |
| "epoch": 3.0531267293857223, |
| "grad_norm": 0.0042929326717048525, |
| "learning_rate": 2.8982447296880423e-06, |
| "loss": 0.0001, |
| "step": 2760 |
| }, |
| { |
| "epoch": 3.0641947980077475, |
| "grad_norm": 0.0030026445487718596, |
| "learning_rate": 2.8340793721045266e-06, |
| "loss": 0.0001, |
| "step": 2770 |
| }, |
| { |
| "epoch": 3.075262866629773, |
| "grad_norm": 0.0012849609984761843, |
| "learning_rate": 2.7705148108473177e-06, |
| "loss": 0.0002, |
| "step": 2780 |
| }, |
| { |
| "epoch": 3.0863309352517985, |
| "grad_norm": 0.008697418155296581, |
| "learning_rate": 2.70755637521838e-06, |
| "loss": 0.0, |
| "step": 2790 |
| }, |
| { |
| "epoch": 3.097399003873824, |
| "grad_norm": 0.007466710314150477, |
| "learning_rate": 2.645209343701638e-06, |
| "loss": 0.0, |
| "step": 2800 |
| }, |
| { |
| "epoch": 3.1084670724958494, |
| "grad_norm": 0.000836180974578036, |
| "learning_rate": 2.5834789435204245e-06, |
| "loss": 0.0002, |
| "step": 2810 |
| }, |
| { |
| "epoch": 3.119535141117875, |
| "grad_norm": 0.10328226368751944, |
| "learning_rate": 2.5223703501992234e-06, |
| "loss": 0.0005, |
| "step": 2820 |
| }, |
| { |
| "epoch": 3.1306032097399004, |
| "grad_norm": 0.0014833894480815533, |
| "learning_rate": 2.4618886871297454e-06, |
| "loss": 0.0, |
| "step": 2830 |
| }, |
| { |
| "epoch": 3.141671278361926, |
| "grad_norm": 0.0012426287285033058, |
| "learning_rate": 2.4020390251413893e-06, |
| "loss": 0.0002, |
| "step": 2840 |
| }, |
| { |
| "epoch": 3.1527393469839513, |
| "grad_norm": 0.0012341856887167653, |
| "learning_rate": 2.342826382076098e-06, |
| "loss": 0.0001, |
| "step": 2850 |
| }, |
| { |
| "epoch": 3.1638074156059766, |
| "grad_norm": 0.0012545195151238747, |
| "learning_rate": 2.284255722367643e-06, |
| "loss": 0.0002, |
| "step": 2860 |
| }, |
| { |
| "epoch": 3.1748754842280023, |
| "grad_norm": 0.028964927877798387, |
| "learning_rate": 2.226331956625427e-06, |
| "loss": 0.0, |
| "step": 2870 |
| }, |
| { |
| "epoch": 3.1859435528500275, |
| "grad_norm": 0.0021281137508232786, |
| "learning_rate": 2.16905994122276e-06, |
| "loss": 0.0, |
| "step": 2880 |
| }, |
| { |
| "epoch": 3.197011621472053, |
| "grad_norm": 0.017846974643844316, |
| "learning_rate": 2.1124444778896914e-06, |
| "loss": 0.0001, |
| "step": 2890 |
| }, |
| { |
| "epoch": 3.2080796900940785, |
| "grad_norm": 0.04653872502302951, |
| "learning_rate": 2.0564903133104474e-06, |
| "loss": 0.0001, |
| "step": 2900 |
| }, |
| { |
| "epoch": 3.219147758716104, |
| "grad_norm": 0.00975360262148922, |
| "learning_rate": 2.001202138725451e-06, |
| "loss": 0.0001, |
| "step": 2910 |
| }, |
| { |
| "epoch": 3.2302158273381294, |
| "grad_norm": 0.0011681464770861175, |
| "learning_rate": 1.946584589538013e-06, |
| "loss": 0.0001, |
| "step": 2920 |
| }, |
| { |
| "epoch": 3.241283895960155, |
| "grad_norm": 0.002047929485430651, |
| "learning_rate": 1.8926422449256842e-06, |
| "loss": 0.0001, |
| "step": 2930 |
| }, |
| { |
| "epoch": 3.2523519645821803, |
| "grad_norm": 0.0011644854590848986, |
| "learning_rate": 1.8393796274563458e-06, |
| "loss": 0.0011, |
| "step": 2940 |
| }, |
| { |
| "epoch": 3.263420033204206, |
| "grad_norm": 0.006586949100274725, |
| "learning_rate": 1.786801202709032e-06, |
| "loss": 0.0004, |
| "step": 2950 |
| }, |
| { |
| "epoch": 3.2744881018262313, |
| "grad_norm": 0.05963749406991675, |
| "learning_rate": 1.7349113788995288e-06, |
| "loss": 0.0001, |
| "step": 2960 |
| }, |
| { |
| "epoch": 3.285556170448257, |
| "grad_norm": 0.0008960384323782237, |
| "learning_rate": 1.6837145065107862e-06, |
| "loss": 0.0, |
| "step": 2970 |
| }, |
| { |
| "epoch": 3.2966242390702822, |
| "grad_norm": 0.0010892427464036952, |
| "learning_rate": 1.6332148779281765e-06, |
| "loss": 0.0, |
| "step": 2980 |
| }, |
| { |
| "epoch": 3.3076923076923075, |
| "grad_norm": 0.005552386026199114, |
| "learning_rate": 1.583416727079602e-06, |
| "loss": 0.0, |
| "step": 2990 |
| }, |
| { |
| "epoch": 3.318760376314333, |
| "grad_norm": 0.0009765106236611565, |
| "learning_rate": 1.5343242290805348e-06, |
| "loss": 0.0, |
| "step": 3000 |
| }, |
| { |
| "epoch": 3.3298284449363584, |
| "grad_norm": 0.022067256735230037, |
| "learning_rate": 1.4859414998839694e-06, |
| "loss": 0.0003, |
| "step": 3010 |
| }, |
| { |
| "epoch": 3.340896513558384, |
| "grad_norm": 0.0006437609301820168, |
| "learning_rate": 1.4382725959353305e-06, |
| "loss": 0.0, |
| "step": 3020 |
| }, |
| { |
| "epoch": 3.3519645821804094, |
| "grad_norm": 0.0020417629447496534, |
| "learning_rate": 1.3913215138323877e-06, |
| "loss": 0.0, |
| "step": 3030 |
| }, |
| { |
| "epoch": 3.363032650802435, |
| "grad_norm": 0.0005098328209815861, |
| "learning_rate": 1.3450921899901637e-06, |
| "loss": 0.0001, |
| "step": 3040 |
| }, |
| { |
| "epoch": 3.3741007194244603, |
| "grad_norm": 0.0014165659540379754, |
| "learning_rate": 1.2995885003109166e-06, |
| "loss": 0.0002, |
| "step": 3050 |
| }, |
| { |
| "epoch": 3.385168788046486, |
| "grad_norm": 0.02746560881484342, |
| "learning_rate": 1.254814259859175e-06, |
| "loss": 0.0001, |
| "step": 3060 |
| }, |
| { |
| "epoch": 3.3962368566685113, |
| "grad_norm": 0.01739320847725493, |
| "learning_rate": 1.2107732225418766e-06, |
| "loss": 0.0, |
| "step": 3070 |
| }, |
| { |
| "epoch": 3.407304925290537, |
| "grad_norm": 0.0006431886745766922, |
| "learning_rate": 1.167469080793645e-06, |
| "loss": 0.0, |
| "step": 3080 |
| }, |
| { |
| "epoch": 3.418372993912562, |
| "grad_norm": 0.003257490633909583, |
| "learning_rate": 1.1249054652672097e-06, |
| "loss": 0.0, |
| "step": 3090 |
| }, |
| { |
| "epoch": 3.429441062534588, |
| "grad_norm": 0.00043153195343080357, |
| "learning_rate": 1.0830859445290044e-06, |
| "loss": 0.0001, |
| "step": 3100 |
| }, |
| { |
| "epoch": 3.440509131156613, |
| "grad_norm": 0.0016745154217626835, |
| "learning_rate": 1.0420140247599842e-06, |
| "loss": 0.0, |
| "step": 3110 |
| }, |
| { |
| "epoch": 3.4515771997786384, |
| "grad_norm": 0.009523095826316882, |
| "learning_rate": 1.0016931494616644e-06, |
| "loss": 0.0001, |
| "step": 3120 |
| }, |
| { |
| "epoch": 3.462645268400664, |
| "grad_norm": 0.0025193648225743472, |
| "learning_rate": 9.621266991674017e-07, |
| "loss": 0.0, |
| "step": 3130 |
| }, |
| { |
| "epoch": 3.4737133370226894, |
| "grad_norm": 0.004360486555943711, |
| "learning_rate": 9.233179911589874e-07, |
| "loss": 0.0, |
| "step": 3140 |
| }, |
| { |
| "epoch": 3.484781405644715, |
| "grad_norm": 0.0011194325613279412, |
| "learning_rate": 8.852702791885048e-07, |
| "loss": 0.0, |
| "step": 3150 |
| }, |
| { |
| "epoch": 3.4958494742667403, |
| "grad_norm": 0.006186485269155249, |
| "learning_rate": 8.479867532055452e-07, |
| "loss": 0.0001, |
| "step": 3160 |
| }, |
| { |
| "epoch": 3.506917542888766, |
| "grad_norm": 0.0707236321603267, |
| "learning_rate": 8.114705390897581e-07, |
| "loss": 0.0001, |
| "step": 3170 |
| }, |
| { |
| "epoch": 3.5179856115107913, |
| "grad_norm": 0.0008315016846056175, |
| "learning_rate": 7.757246983887679e-07, |
| "loss": 0.0001, |
| "step": 3180 |
| }, |
| { |
| "epoch": 3.529053680132817, |
| "grad_norm": 0.003081278609407882, |
| "learning_rate": 7.40752228061502e-07, |
| "loss": 0.0, |
| "step": 3190 |
| }, |
| { |
| "epoch": 3.540121748754842, |
| "grad_norm": 0.03262274667027854, |
| "learning_rate": 7.065560602269106e-07, |
| "loss": 0.0001, |
| "step": 3200 |
| }, |
| { |
| "epoch": 3.551189817376868, |
| "grad_norm": 0.0049780417471171675, |
| "learning_rate": 6.731390619181466e-07, |
| "loss": 0.0, |
| "step": 3210 |
| }, |
| { |
| "epoch": 3.562257885998893, |
| "grad_norm": 0.0006067046828209429, |
| "learning_rate": 6.405040348421876e-07, |
| "loss": 0.0004, |
| "step": 3220 |
| }, |
| { |
| "epoch": 3.573325954620919, |
| "grad_norm": 0.10601414226734147, |
| "learning_rate": 6.08653715144939e-07, |
| "loss": 0.0001, |
| "step": 3230 |
| }, |
| { |
| "epoch": 3.584394023242944, |
| "grad_norm": 0.014565859239713545, |
| "learning_rate": 5.775907731818308e-07, |
| "loss": 0.0006, |
| "step": 3240 |
| }, |
| { |
| "epoch": 3.5954620918649693, |
| "grad_norm": 0.0006330087021193012, |
| "learning_rate": 5.47317813293935e-07, |
| "loss": 0.0, |
| "step": 3250 |
| }, |
| { |
| "epoch": 3.606530160486995, |
| "grad_norm": 0.0008371382231997046, |
| "learning_rate": 5.17837373589618e-07, |
| "loss": 0.0001, |
| "step": 3260 |
| }, |
| { |
| "epoch": 3.6175982291090207, |
| "grad_norm": 0.0004617966536746336, |
| "learning_rate": 4.891519257317379e-07, |
| "loss": 0.0002, |
| "step": 3270 |
| }, |
| { |
| "epoch": 3.628666297731046, |
| "grad_norm": 0.04963699258614127, |
| "learning_rate": 4.612638747304243e-07, |
| "loss": 0.0001, |
| "step": 3280 |
| }, |
| { |
| "epoch": 3.6397343663530712, |
| "grad_norm": 0.00045115875633578936, |
| "learning_rate": 4.3417555874143644e-07, |
| "loss": 0.0001, |
| "step": 3290 |
| }, |
| { |
| "epoch": 3.650802434975097, |
| "grad_norm": 0.0003660195467245479, |
| "learning_rate": 4.078892488701347e-07, |
| "loss": 0.0, |
| "step": 3300 |
| }, |
| { |
| "epoch": 3.661870503597122, |
| "grad_norm": 0.005736554465665363, |
| "learning_rate": 3.824071489810599e-07, |
| "loss": 0.0, |
| "step": 3310 |
| }, |
| { |
| "epoch": 3.672938572219148, |
| "grad_norm": 0.007943425544438845, |
| "learning_rate": 3.5773139551317226e-07, |
| "loss": 0.0, |
| "step": 3320 |
| }, |
| { |
| "epoch": 3.684006640841173, |
| "grad_norm": 0.004240017703104073, |
| "learning_rate": 3.3386405730072237e-07, |
| "loss": 0.0, |
| "step": 3330 |
| }, |
| { |
| "epoch": 3.695074709463199, |
| "grad_norm": 0.006848121458682678, |
| "learning_rate": 3.108071353997999e-07, |
| "loss": 0.0, |
| "step": 3340 |
| }, |
| { |
| "epoch": 3.706142778085224, |
| "grad_norm": 0.0008720424724852751, |
| "learning_rate": 2.8856256292056797e-07, |
| "loss": 0.0006, |
| "step": 3350 |
| }, |
| { |
| "epoch": 3.7172108467072498, |
| "grad_norm": 0.0009071607947124355, |
| "learning_rate": 2.671322048651781e-07, |
| "loss": 0.0, |
| "step": 3360 |
| }, |
| { |
| "epoch": 3.728278915329275, |
| "grad_norm": 0.0006526998309807914, |
| "learning_rate": 2.4651785797142447e-07, |
| "loss": 0.0, |
| "step": 3370 |
| }, |
| { |
| "epoch": 3.7393469839513003, |
| "grad_norm": 0.000525162417622788, |
| "learning_rate": 2.267212505620886e-07, |
| "loss": 0.0, |
| "step": 3380 |
| }, |
| { |
| "epoch": 3.750415052573326, |
| "grad_norm": 0.0007723423555957404, |
| "learning_rate": 2.0774404240004432e-07, |
| "loss": 0.0001, |
| "step": 3390 |
| }, |
| { |
| "epoch": 3.7614831211953517, |
| "grad_norm": 0.0006151742760236711, |
| "learning_rate": 1.8958782454909563e-07, |
| "loss": 0.0, |
| "step": 3400 |
| }, |
| { |
| "epoch": 3.772551189817377, |
| "grad_norm": 0.019346914707342473, |
| "learning_rate": 1.72254119240588e-07, |
| "loss": 0.0, |
| "step": 3410 |
| }, |
| { |
| "epoch": 3.783619258439402, |
| "grad_norm": 0.000574300771385019, |
| "learning_rate": 1.5574437974577473e-07, |
| "loss": 0.0001, |
| "step": 3420 |
| }, |
| { |
| "epoch": 3.794687327061428, |
| "grad_norm": 0.0012025102235216681, |
| "learning_rate": 1.4005999025398231e-07, |
| "loss": 0.0004, |
| "step": 3430 |
| }, |
| { |
| "epoch": 3.805755395683453, |
| "grad_norm": 0.0019927107249734836, |
| "learning_rate": 1.2520226575655325e-07, |
| "loss": 0.0, |
| "step": 3440 |
| }, |
| { |
| "epoch": 3.816823464305479, |
| "grad_norm": 0.0004585933553038856, |
| "learning_rate": 1.1117245193659864e-07, |
| "loss": 0.0003, |
| "step": 3450 |
| }, |
| { |
| "epoch": 3.827891532927504, |
| "grad_norm": 0.0008599742748381332, |
| "learning_rate": 9.79717250645551e-08, |
| "loss": 0.0, |
| "step": 3460 |
| }, |
| { |
| "epoch": 3.8389596015495298, |
| "grad_norm": 0.005158902732885002, |
| "learning_rate": 8.56011918995725e-08, |
| "loss": 0.0001, |
| "step": 3470 |
| }, |
| { |
| "epoch": 3.850027670171555, |
| "grad_norm": 0.0005943589755676227, |
| "learning_rate": 7.406188959671601e-08, |
| "loss": 0.0, |
| "step": 3480 |
| }, |
| { |
| "epoch": 3.8610957387935807, |
| "grad_norm": 0.013582756443605442, |
| "learning_rate": 6.33547856200134e-08, |
| "loss": 0.0, |
| "step": 3490 |
| }, |
| { |
| "epoch": 3.872163807415606, |
| "grad_norm": 0.0015272986821739895, |
| "learning_rate": 5.3480777661341077e-08, |
| "loss": 0.0001, |
| "step": 3500 |
| }, |
| { |
| "epoch": 3.883231876037631, |
| "grad_norm": 0.023645089836197807, |
| "learning_rate": 4.4440693565160895e-08, |
| "loss": 0.0001, |
| "step": 3510 |
| }, |
| { |
| "epoch": 3.894299944659657, |
| "grad_norm": 0.004636458141232424, |
| "learning_rate": 3.6235291259113516e-08, |
| "loss": 0.0001, |
| "step": 3520 |
| }, |
| { |
| "epoch": 3.9053680132816826, |
| "grad_norm": 0.0005499173157726086, |
| "learning_rate": 2.886525869047363e-08, |
| "loss": 0.0, |
| "step": 3530 |
| }, |
| { |
| "epoch": 3.916436081903708, |
| "grad_norm": 0.0006622306223321599, |
| "learning_rate": 2.2331213768468363e-08, |
| "loss": 0.0, |
| "step": 3540 |
| }, |
| { |
| "epoch": 3.927504150525733, |
| "grad_norm": 0.0008544205486865539, |
| "learning_rate": 1.6633704312478683e-08, |
| "loss": 0.0001, |
| "step": 3550 |
| }, |
| { |
| "epoch": 3.938572219147759, |
| "grad_norm": 0.0027443144589558884, |
| "learning_rate": 1.177320800610171e-08, |
| "loss": 0.0, |
| "step": 3560 |
| }, |
| { |
| "epoch": 3.949640287769784, |
| "grad_norm": 0.0005251118311918424, |
| "learning_rate": 7.750132357106089e-09, |
| "loss": 0.0001, |
| "step": 3570 |
| }, |
| { |
| "epoch": 3.9607083563918097, |
| "grad_norm": 0.0012279003225779102, |
| "learning_rate": 4.5648146632648605e-09, |
| "loss": 0.0002, |
| "step": 3580 |
| }, |
| { |
| "epoch": 3.971776425013835, |
| "grad_norm": 0.001971040506480486, |
| "learning_rate": 2.217521984076987e-09, |
| "loss": 0.0, |
| "step": 3590 |
| }, |
| { |
| "epoch": 3.9828444936358607, |
| "grad_norm": 0.0005086949695235262, |
| "learning_rate": 7.08451118375253e-10, |
| "loss": 0.0, |
| "step": 3600 |
| }, |
| { |
| "epoch": 3.993912562257886, |
| "grad_norm": 0.001967814841098194, |
| "learning_rate": 3.772858782724598e-11, |
| "loss": 0.0, |
| "step": 3610 |
| }, |
| { |
| "epoch": 3.9961261759822913, |
| "step": 3612, |
| "total_flos": 3601561862275072.0, |
| "train_loss": 0.03465571804643162, |
| "train_runtime": 106374.8788, |
| "train_samples_per_second": 8.697, |
| "train_steps_per_second": 0.034 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 3612, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 800, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3601561862275072.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|