| {"epoch": 0, "step": 3010, "loss": 1.7905868291854858, "aux_loss": null, "grad_norm": null, "learning_rate": 4.050073637702504e-06, "steps_per_sec": 0.06838124832090296, "eta_seconds": 128397.77300930182, "timestamp": 1762287277.2086208} |
| {"epoch": 0, "step": 3020, "loss": 0.9879558086395264, "aux_loss": null, "grad_norm": null, "learning_rate": 4.050073637702504e-06, "steps_per_sec": 0.06842042936399181, "eta_seconds": 128178.09068902834, "timestamp": 1762287398.157034} |
| {"epoch": 0, "step": 3030, "loss": 0.6812519431114197, "aux_loss": null, "grad_norm": null, "learning_rate": 4.042709867452136e-06, "steps_per_sec": 0.06844821648401, "eta_seconds": 127979.9598875801, "timestamp": 1762287526.3344395} |
| {"epoch": 0, "step": 3040, "loss": 0.9578639268875122, "aux_loss": null, "grad_norm": null, "learning_rate": 4.0353460972017674e-06, "steps_per_sec": 0.06848944736948888, "eta_seconds": 127756.9076122814, "timestamp": 1762287645.6932392} |
| {"epoch": 0, "step": 3050, "loss": 1.2844058275222778, "aux_loss": null, "grad_norm": null, "learning_rate": 4.0353460972017674e-06, "steps_per_sec": 0.06852618111869326, "eta_seconds": 127542.49335537269, "timestamp": 1762287767.829508} |
| {"epoch": 0, "step": 3060, "loss": 0.6171494722366333, "aux_loss": null, "grad_norm": null, "learning_rate": 4.0279823269514e-06, "steps_per_sec": 0.06856313771463715, "eta_seconds": 127327.89500291907, "timestamp": 1762287889.689558} |
| {"epoch": 0, "step": 3070, "loss": 0.7861765623092651, "aux_loss": null, "grad_norm": null, "learning_rate": 4.0279823269514e-06, "steps_per_sec": 0.06859477006086992, "eta_seconds": 127123.39428008883, "timestamp": 1762288014.8921301} |
| {"epoch": 0, "step": 3080, "loss": 1.2514609098434448, "aux_loss": null, "grad_norm": null, "learning_rate": 4.020618556701032e-06, "steps_per_sec": 0.06863161591426403, "eta_seconds": 126909.4408454655, "timestamp": 1762288136.5698197} |
| {"epoch": 0, "step": 3090, "loss": 1.1160598993301392, "aux_loss": null, "grad_norm": null, "learning_rate": 4.013254786450663e-06, "steps_per_sec": 0.06864109737159138, "eta_seconds": 126746.22541219286, "timestamp": 1762288276.0564067} |
| {"epoch": 0, "step": 3100, "loss": 0.7542535066604614, "aux_loss": null, "grad_norm": null, "learning_rate": 4.013254786450663e-06, "steps_per_sec": 0.06867648982718624, "eta_seconds": 126535.29645832279, "timestamp": 1762288398.466877} |
| {"epoch": 0, "step": 3110, "loss": 1.4957629442214966, "aux_loss": null, "grad_norm": null, "learning_rate": 4.005891016200295e-06, "steps_per_sec": 0.06870665340307405, "eta_seconds": 126334.1986558123, "timestamp": 1762288524.1964452} |
| {"epoch": 0, "step": 3120, "loss": 1.3032238483428955, "aux_loss": null, "grad_norm": null, "learning_rate": 3.998527245949927e-06, "steps_per_sec": 0.06873399499477868, "eta_seconds": 126138.45595121612, "timestamp": 1762288651.6790466} |
| {"epoch": 0, "step": 3130, "loss": 0.94720059633255, "aux_loss": null, "grad_norm": null, "learning_rate": 3.998527245949927e-06, "steps_per_sec": 0.06877214929799776, "eta_seconds": 125923.06752658272, "timestamp": 1762288771.9033043} |
| {"epoch": 0, "step": 3140, "loss": 1.679433822631836, "aux_loss": null, "grad_norm": null, "learning_rate": 3.991163475699558e-06, "steps_per_sec": 0.06879600028407853, "eta_seconds": 125734.0537862907, "timestamp": 1762288901.481811} |
| {"epoch": 0, "step": 3150, "loss": 1.5829728841781616, "aux_loss": null, "grad_norm": null, "learning_rate": 3.991163475699558e-06, "steps_per_sec": 0.06883004030041952, "eta_seconds": 125526.58639003208, "timestamp": 1762289024.1951203} |
| {"epoch": 0, "step": 3160, "loss": 1.3609462976455688, "aux_loss": null, "grad_norm": null, "learning_rate": 3.98379970544919e-06, "steps_per_sec": 0.06886988088198823, "eta_seconds": 125308.76907988139, "timestamp": 1762289142.9213157} |
| {"epoch": 0, "step": 3170, "loss": 0.7934466600418091, "aux_loss": null, "grad_norm": null, "learning_rate": 3.976435935198822e-06, "steps_per_sec": 0.06889894333965514, "eta_seconds": 125110.77212759973, "timestamp": 1762289268.7075028} |
| {"epoch": 0, "step": 3180, "loss": 0.8451188802719116, "aux_loss": null, "grad_norm": null, "learning_rate": 3.976435935198822e-06, "steps_per_sec": 0.0689376106877141, "eta_seconds": 124895.53835863438, "timestamp": 1762289387.95931} |
| {"epoch": 0, "step": 3190, "loss": 1.6783169507980347, "aux_loss": null, "grad_norm": null, "learning_rate": 3.969072164948453e-06, "steps_per_sec": 0.06896646681339474, "eta_seconds": 124698.28305500056, "timestamp": 1762289513.6567216} |
| {"epoch": 0, "step": 3200, "loss": 1.5136433839797974, "aux_loss": null, "grad_norm": null, "learning_rate": 3.961708394698086e-06, "steps_per_sec": 0.06898757944388659, "eta_seconds": 124515.16735685691, "timestamp": 1762289644.4546373} |
| {"epoch": 0, "step": 3210, "loss": 0.6686916947364807, "aux_loss": null, "grad_norm": null, "learning_rate": 3.961708394698086e-06, "steps_per_sec": 0.06902853133868946, "eta_seconds": 124296.42980381707, "timestamp": 1762289761.8039842} |
| {"epoch": 0, "step": 3220, "loss": 1.2500436305999756, "aux_loss": null, "grad_norm": null, "learning_rate": 3.9543446244477176e-06, "steps_per_sec": 0.06905081210068038, "eta_seconds": 124111.5019401134, "timestamp": 1762289891.6194885} |
| {"epoch": 0, "step": 3230, "loss": 1.924522042274475, "aux_loss": null, "grad_norm": null, "learning_rate": 3.9543446244477176e-06, "steps_per_sec": 0.06908885280907952, "eta_seconds": 123898.42430376934, "timestamp": 1762290010.68455} |
| {"epoch": 0, "step": 3240, "loss": 1.3434759378433228, "aux_loss": null, "grad_norm": null, "learning_rate": 3.946980854197349e-06, "steps_per_sec": 0.06911246394821859, "eta_seconds": 123711.40473888982, "timestamp": 1762290139.4049168} |
| {"epoch": 0, "step": 3250, "loss": 0.5981472730636597, "aux_loss": null, "grad_norm": null, "learning_rate": 3.939617083946981e-06, "steps_per_sec": 0.06912514833568827, "eta_seconds": 123544.03868368884, "timestamp": 1762290275.3852117} |
| {"epoch": 0, "step": 3260, "loss": 1.5535166263580322, "aux_loss": null, "grad_norm": null, "learning_rate": 3.939617083946981e-06, "steps_per_sec": 0.0691583649263252, "eta_seconds": 123340.10512086366, "timestamp": 1762290397.4812913} |
| {"epoch": 0, "step": 3270, "loss": 0.9952253699302673, "aux_loss": null, "grad_norm": null, "learning_rate": 3.9322533136966126e-06, "steps_per_sec": 0.06919233090087477, "eta_seconds": 123135.03373958869, "timestamp": 1762290518.865835} |
| {"epoch": 0, "step": 3280, "loss": 1.252375841140747, "aux_loss": null, "grad_norm": null, "learning_rate": 3.924889543446245e-06, "steps_per_sec": 0.06922020775123075, "eta_seconds": 122940.97744670075, "timestamp": 1762290644.3001163} |
| {"epoch": 0, "step": 3290, "loss": 0.9018387794494629, "aux_loss": null, "grad_norm": null, "learning_rate": 3.924889543446245e-06, "steps_per_sec": 0.06925687767502729, "eta_seconds": 122731.49303502224, "timestamp": 1762290763.6007388} |
| {"epoch": 0, "step": 3300, "loss": 0.9647537469863892, "aux_loss": null, "grad_norm": null, "learning_rate": 3.917525773195877e-06, "steps_per_sec": 0.06927627861833976, "eta_seconds": 122552.77231003587, "timestamp": 1762290894.6464443} |
| {"epoch": 0, "step": 3310, "loss": 0.9527402520179749, "aux_loss": null, "grad_norm": null, "learning_rate": 3.917525773195877e-06, "steps_per_sec": 0.06931039810556806, "eta_seconds": 122348.16465898725, "timestamp": 1762291015.4756951} |
| {"epoch": 0, "step": 3320, "loss": 0.5467164516448975, "aux_loss": null, "grad_norm": null, "learning_rate": 3.9101620029455084e-06, "steps_per_sec": 0.06934602243356425, "eta_seconds": 122141.1077775158, "timestamp": 1762291135.0647168} |
| {"epoch": 0, "step": 3330, "loss": 1.395045280456543, "aux_loss": null, "grad_norm": null, "learning_rate": 3.90279823269514e-06, "steps_per_sec": 0.06936377726145711, "eta_seconds": 121965.67623633308, "timestamp": 1762291267.0597827} |
| {"epoch": 0, "step": 3340, "loss": 1.1397160291671753, "aux_loss": null, "grad_norm": null, "learning_rate": 3.90279823269514e-06, "steps_per_sec": 0.06940190095231424, "eta_seconds": 121754.59006239555, "timestamp": 1762291384.7764711} |
| {"epoch": 0, "step": 3350, "loss": 0.9800203442573547, "aux_loss": null, "grad_norm": null, "learning_rate": 3.895434462444772e-06, "steps_per_sec": 0.06943071655829589, "eta_seconds": 121560.03017646448, "timestamp": 1762291508.8316185} |
| {"epoch": 0, "step": 3360, "loss": 1.1715285778045654, "aux_loss": null, "grad_norm": null, "learning_rate": 3.888070692194404e-06, "steps_per_sec": 0.06944562861480423, "eta_seconds": 121389.93005245712, "timestamp": 1762291642.4682097} |
| {"epoch": 0, "step": 3370, "loss": 2.462388753890991, "aux_loss": null, "grad_norm": null, "learning_rate": 3.888070692194404e-06, "steps_per_sec": 0.06948649046933471, "eta_seconds": 121174.63327228845, "timestamp": 1762291757.9290693} |
| {"epoch": 0, "step": 3380, "loss": 0.8288570642471313, "aux_loss": null, "grad_norm": null, "learning_rate": 3.880706921944036e-06, "steps_per_sec": 0.06950050411432677, "eta_seconds": 121006.31653211807, "timestamp": 1762291892.0337515} |
| {"epoch": 0, "step": 3390, "loss": 0.4193393290042877, "aux_loss": null, "grad_norm": null, "learning_rate": 3.880706921944036e-06, "steps_per_sec": 0.06953351722554127, "eta_seconds": 120805.04963891696, "timestamp": 1762292012.7602866} |
| {"epoch": 0, "step": 3400, "loss": 0.8877553343772888, "aux_loss": null, "grad_norm": null, "learning_rate": 3.873343151693668e-06, "steps_per_sec": 0.06955446254865338, "eta_seconds": 120624.89871345337, "timestamp": 1762292141.8505304} |
| {"epoch": 0, "step": 3410, "loss": 0.9264300465583801, "aux_loss": null, "grad_norm": null, "learning_rate": 3.865979381443299e-06, "steps_per_sec": 0.06957816856458479, "eta_seconds": 120440.07729552989, "timestamp": 1762292268.9192693} |
| {"epoch": 0, "step": 3420, "loss": 1.6867860555648804, "aux_loss": null, "grad_norm": null, "learning_rate": 3.865979381443299e-06, "steps_per_sec": 0.06960133823711936, "eta_seconds": 120256.30845609463, "timestamp": 1762292396.2797241} |
| {"epoch": 0, "step": 3430, "loss": 0.7585884928703308, "aux_loss": null, "grad_norm": null, "learning_rate": 3.858615611192931e-06, "steps_per_sec": 0.06963355219631534, "eta_seconds": 120057.06640429539, "timestamp": 1762292517.1568875} |
| {"epoch": 0, "step": 3440, "loss": 1.1949326992034912, "aux_loss": null, "grad_norm": null, "learning_rate": 3.8512518409425635e-06, "steps_per_sec": 0.06966302495638795, "eta_seconds": 119862.72495671068, "timestamp": 1762292639.865182} |
| {"epoch": 0, "step": 3450, "loss": 0.763168215751648, "aux_loss": null, "grad_norm": null, "learning_rate": 3.8512518409425635e-06, "steps_per_sec": 0.06969580173850017, "eta_seconds": 119662.87483558654, "timestamp": 1762292760.123095} |
| {"epoch": 0, "step": 3460, "loss": 1.3400377035140991, "aux_loss": null, "grad_norm": null, "learning_rate": 3.843888070692195e-06, "steps_per_sec": 0.06971939042033416, "eta_seconds": 119478.95628144355, "timestamp": 1762292886.8071306} |
| {"epoch": 0, "step": 3470, "loss": 0.8136744499206543, "aux_loss": null, "grad_norm": null, "learning_rate": 3.843888070692195e-06, "steps_per_sec": 0.0697557622900986, "eta_seconds": 119273.30053965983, "timestamp": 1762293004.287837} |
| {"epoch": 0, "step": 3480, "loss": 0.34827953577041626, "aux_loss": null, "grad_norm": null, "learning_rate": 3.836524300441827e-06, "steps_per_sec": 0.06977320968307026, "eta_seconds": 119100.15373732097, "timestamp": 1762293135.0879087} |
| {"epoch": 0, "step": 3490, "loss": 1.3931970596313477, "aux_loss": null, "grad_norm": null, "learning_rate": 3.8291605301914585e-06, "steps_per_sec": 0.06979942869663927, "eta_seconds": 118912.14806461062, "timestamp": 1762293259.702618} |
| {"epoch": 0, "step": 3500, "loss": 1.1523898839950562, "aux_loss": null, "grad_norm": null, "learning_rate": 3.8291605301914585e-06, "steps_per_sec": 0.0698303850404129, "eta_seconds": 118716.22926326888, "timestamp": 1762293380.7412844} |
| {"epoch": 0, "step": 3510, "loss": 1.018060326576233, "aux_loss": null, "grad_norm": null, "learning_rate": 3.82179675994109e-06, "steps_per_sec": 0.06984679994644605, "eta_seconds": 118545.15892422505, "timestamp": 1762293512.1325836} |
| {"epoch": 0, "step": 3520, "loss": 1.4063701629638672, "aux_loss": null, "grad_norm": null, "learning_rate": 3.814432989690722e-06, "steps_per_sec": 0.0698657323949578, "eta_seconds": 118369.90347784352, "timestamp": 1762293641.646556} |
| {"epoch": 0, "step": 3530, "loss": 0.6301552057266235, "aux_loss": null, "grad_norm": null, "learning_rate": 3.814432989690722e-06, "steps_per_sec": 0.06989347227647892, "eta_seconds": 118179.8490040066, "timestamp": 1762293764.7252922} |
| {"epoch": 0, "step": 3540, "loss": 1.3425190448760986, "aux_loss": null, "grad_norm": null, "learning_rate": 3.8070692194403536e-06, "steps_per_sec": 0.06992309534161445, "eta_seconds": 117986.76760080508, "timestamp": 1762293886.3429341} |
| {"epoch": 0, "step": 3550, "loss": 0.6646825075149536, "aux_loss": null, "grad_norm": null, "learning_rate": 3.8070692194403536e-06, "steps_per_sec": 0.06995016815810291, "eta_seconds": 117798.14426429641, "timestamp": 1762294009.7075508} |
| {"epoch": 0, "step": 3560, "loss": 0.8151065707206726, "aux_loss": null, "grad_norm": null, "learning_rate": 3.7997054491899852e-06, "steps_per_sec": 0.06998077269316487, "eta_seconds": 117603.73147185665, "timestamp": 1762294130.4094715} |
| {"epoch": 0, "step": 3570, "loss": 0.8902726769447327, "aux_loss": null, "grad_norm": null, "learning_rate": 3.7923416789396173e-06, "steps_per_sec": 0.0699938154262346, "eta_seconds": 117438.94728332006, "timestamp": 1762294263.7990925} |
| {"epoch": 0, "step": 3580, "loss": 1.2539348602294922, "aux_loss": null, "grad_norm": null, "learning_rate": 3.7923416789396173e-06, "steps_per_sec": 0.07002761552752329, "eta_seconds": 117239.46243426186, "timestamp": 1762294381.9820871} |
| {"epoch": 0, "step": 3590, "loss": 1.65322744846344, "aux_loss": null, "grad_norm": null, "learning_rate": 3.784977908689249e-06, "steps_per_sec": 0.07003667809353721, "eta_seconds": 117081.50962055229, "timestamp": 1762294518.1492455} |
| {"epoch": 0, "step": 3600, "loss": 1.6736979484558105, "aux_loss": null, "grad_norm": null, "learning_rate": 3.777614138438881e-06, "steps_per_sec": 0.07006269653714273, "eta_seconds": 116895.30099170805, "timestamp": 1762294641.843035} |
| {"epoch": 0, "step": 3610, "loss": 1.3704173564910889, "aux_loss": null, "grad_norm": null, "learning_rate": 3.777614138438881e-06, "steps_per_sec": 0.07009425437721965, "eta_seconds": 116700.00733552944, "timestamp": 1762294761.374629} |
| {"epoch": 0, "step": 3620, "loss": 0.8424362540245056, "aux_loss": null, "grad_norm": null, "learning_rate": 3.7702503681885128e-06, "steps_per_sec": 0.07010737227984727, "eta_seconds": 116535.53305903192, "timestamp": 1762294894.376253} |
| {"epoch": 0, "step": 3630, "loss": 1.129278302192688, "aux_loss": null, "grad_norm": null, "learning_rate": 3.7702503681885128e-06, "steps_per_sec": 0.07013708869682815, "eta_seconds": 116343.5801459068, "timestamp": 1762295015.077068} |
| {"epoch": 0, "step": 3640, "loss": 1.287545084953308, "aux_loss": null, "grad_norm": null, "learning_rate": 3.7628865979381445e-06, "steps_per_sec": 0.07015380957770637, "eta_seconds": 116173.30618335979, "timestamp": 1762295145.2851555} |
| {"epoch": 0, "step": 3650, "loss": 1.7461676597595215, "aux_loss": null, "grad_norm": null, "learning_rate": 3.7555228276877766e-06, "steps_per_sec": 0.0701736157127404, "eta_seconds": 115998.01317523017, "timestamp": 1762295273.1440876} |
| {"epoch": 0, "step": 3660, "loss": 1.4394328594207764, "aux_loss": null, "grad_norm": null, "learning_rate": 3.7555228276877766e-06, "steps_per_sec": 0.07020606239966902, "eta_seconds": 115801.96527356202, "timestamp": 1762295391.5435882} |
| {"epoch": 0, "step": 3670, "loss": 1.4037214517593384, "aux_loss": null, "grad_norm": null, "learning_rate": 3.7481590574374082e-06, "steps_per_sec": 0.07021925567876465, "eta_seconds": 115637.79652047221, "timestamp": 1762295524.1594133} |
| {"epoch": 0, "step": 3680, "loss": 0.5572405457496643, "aux_loss": null, "grad_norm": null, "learning_rate": 3.74079528718704e-06, "steps_per_sec": 0.07023555864850359, "eta_seconds": 115468.57682996146, "timestamp": 1762295654.4052484} |
| {"epoch": 0, "step": 3690, "loss": 1.5607267618179321, "aux_loss": null, "grad_norm": null, "learning_rate": 3.74079528718704e-06, "steps_per_sec": 0.07025523300271996, "eta_seconds": 115293.90272873204, "timestamp": 1762295782.070967} |
| {"epoch": 0, "step": 3700, "loss": 1.1940839290618896, "aux_loss": null, "grad_norm": null, "learning_rate": 3.733431516936672e-06, "steps_per_sec": 0.070286086395862, "eta_seconds": 115101.01664269486, "timestamp": 1762295901.2909274} |
| {"epoch": 0, "step": 3710, "loss": 0.9461363554000854, "aux_loss": null, "grad_norm": null, "learning_rate": 3.733431516936672e-06, "steps_per_sec": 0.07031516771969991, "eta_seconds": 114911.19572109418, "timestamp": 1762296021.7353609} |
| {"epoch": 0, "step": 3720, "loss": 1.433383584022522, "aux_loss": null, "grad_norm": null, "learning_rate": 3.7260677466863037e-06, "steps_per_sec": 0.07033653799466237, "eta_seconds": 114734.10875884179, "timestamp": 1762296147.878763} |
| {"epoch": 0, "step": 3730, "loss": 0.8512032628059387, "aux_loss": null, "grad_norm": null, "learning_rate": 3.7187039764359358e-06, "steps_per_sec": 0.07036513012706808, "eta_seconds": 114545.37191141323, "timestamp": 1762296268.5039515} |
| {"epoch": 0, "step": 3740, "loss": 1.4598040580749512, "aux_loss": null, "grad_norm": null, "learning_rate": 3.7187039764359358e-06, "steps_per_sec": 0.07039276475656296, "eta_seconds": 114358.34389853924, "timestamp": 1762296389.753616} |
| {"epoch": 0, "step": 3750, "loss": 1.4494574069976807, "aux_loss": null, "grad_norm": null, "learning_rate": 3.7113402061855674e-06, "steps_per_sec": 0.07041562088306046, "eta_seconds": 114179.21051000977, "timestamp": 1762296514.5219338} |
| {"epoch": 0, "step": 3760, "loss": 1.1962698698043823, "aux_loss": null, "grad_norm": null, "learning_rate": 3.703976435935199e-06, "steps_per_sec": 0.07042516061277283, "eta_seconds": 114021.7491892183, "timestamp": 1762296649.3028786} |
| {"epoch": 0, "step": 3770, "loss": 1.5250399112701416, "aux_loss": null, "grad_norm": null, "learning_rate": 3.703976435935199e-06, "steps_per_sec": 0.0704564189285061, "eta_seconds": 113829.23120373313, "timestamp": 1762296767.5479138} |
| {"epoch": 0, "step": 3780, "loss": 1.583367943763733, "aux_loss": null, "grad_norm": null, "learning_rate": 3.696612665684831e-06, "steps_per_sec": 0.0704699305025119, "eta_seconds": 113665.50162433443, "timestamp": 1762296899.1928306} |
| {"epoch": 0, "step": 3790, "loss": 0.7978602647781372, "aux_loss": null, "grad_norm": null, "learning_rate": 3.696612665684831e-06, "steps_per_sec": 0.07050281010021164, "eta_seconds": 113470.6544126244, "timestamp": 1762297016.0160213} |
| {"epoch": 0, "step": 3800, "loss": 0.7679869532585144, "aux_loss": null, "grad_norm": null, "learning_rate": 3.689248895434463e-06, "steps_per_sec": 0.07052043045651238, "eta_seconds": 113300.4995612891, "timestamp": 1762297144.3865619} |
| {"epoch": 0, "step": 3810, "loss": 0.9753299951553345, "aux_loss": null, "grad_norm": null, "learning_rate": 3.681885125184095e-06, "steps_per_sec": 0.07053432999650762, "eta_seconds": 113136.39755839625, "timestamp": 1762297275.5431848} |
| {"epoch": 0, "step": 3820, "loss": 0.9845657348632812, "aux_loss": null, "grad_norm": null, "learning_rate": 3.681885125184095e-06, "steps_per_sec": 0.0705613132955525, "eta_seconds": 112951.412434983, "timestamp": 1762297396.6076772} |
| {"epoch": 0, "step": 3830, "loss": 0.6917205452919006, "aux_loss": null, "grad_norm": null, "learning_rate": 3.6745213549337267e-06, "steps_per_sec": 0.07057283547852643, "eta_seconds": 112791.27366820951, "timestamp": 1762297529.3841302} |
| {"epoch": 0, "step": 3840, "loss": 0.7711517810821533, "aux_loss": null, "grad_norm": null, "learning_rate": 3.667157584683358e-06, "steps_per_sec": 0.07058926054550711, "eta_seconds": 112623.36421380751, "timestamp": 1762297658.5022984} |
| {"epoch": 0, "step": 3850, "loss": 1.886587381362915, "aux_loss": null, "grad_norm": null, "learning_rate": 3.667157584683358e-06, "steps_per_sec": 0.07061685694196343, "eta_seconds": 112437.7428257038, "timestamp": 1762297778.8536937} |
| {"epoch": 0, "step": 3860, "loss": 0.579103410243988, "aux_loss": null, "grad_norm": null, "learning_rate": 3.6597938144329896e-06, "steps_per_sec": 0.07063458677862151, "eta_seconds": 112267.94636533668, "timestamp": 1762297906.7425408} |
| {"epoch": 0, "step": 3870, "loss": 1.3592575788497925, "aux_loss": null, "grad_norm": null, "learning_rate": 3.6597938144329896e-06, "steps_per_sec": 0.07066528582532311, "eta_seconds": 112077.66171890081, "timestamp": 1762298024.5139048} |
| {"epoch": 0, "step": 3880, "loss": 1.4555362462997437, "aux_loss": null, "grad_norm": null, "learning_rate": 3.6524300441826217e-06, "steps_per_sec": 0.07068932377855539, "eta_seconds": 111898.0855550299, "timestamp": 1762298147.3550842} |
| {"epoch": 0, "step": 3890, "loss": 0.721537709236145, "aux_loss": null, "grad_norm": null, "learning_rate": 3.6450662739322534e-06, "steps_per_sec": 0.07070377075333394, "eta_seconds": 111733.78613088308, "timestamp": 1762298277.5749328} |
| {"epoch": 0, "step": 3900, "loss": 0.5428478121757507, "aux_loss": null, "grad_norm": null, "learning_rate": 3.6450662739322534e-06, "steps_per_sec": 0.07072726557201495, "eta_seconds": 111555.28120857934, "timestamp": 1762298400.6867373} |
| {"epoch": 0, "step": 3910, "loss": 0.9135788679122925, "aux_loss": null, "grad_norm": null, "learning_rate": 3.6377025036818854e-06, "steps_per_sec": 0.07075416821820592, "eta_seconds": 111371.53044747941, "timestamp": 1762298521.055439} |
| {"epoch": 0, "step": 3920, "loss": 1.6048812866210938, "aux_loss": null, "grad_norm": null, "learning_rate": 3.630338733431517e-06, "steps_per_sec": 0.070770737314295, "eta_seconds": 111204.15440988116, "timestamp": 1762298649.4184585} |
| {"epoch": 0, "step": 3000, "loss": 0.3086067736148834, "aux_loss": null, "grad_norm": null, "learning_rate": 0.0, "steps_per_sec": 123.1166376318561, "eta_seconds": 71.39571197748184, "timestamp": 1762331122.6745183} |
| {"epoch": 0, "step": 3000, "loss": 0.3086067736148834, "aux_loss": null, "grad_norm": null, "learning_rate": 0.0, "steps_per_sec": 130.28459519207033, "eta_seconds": 67.46768477916717, "timestamp": 1762331399.848369} |
| {"epoch": 0, "step": 3000, "loss": 0.3086067736148834, "aux_loss": null, "grad_norm": null, "learning_rate": 0.0, "steps_per_sec": 127.07859230151433, "eta_seconds": 38.24404970169067, "timestamp": 1762331875.4295733} |
|
|
|
|
|
|
|
|
|
|