Lanni-ni's picture
add remote code + model files
efc5e67 verified
{"step": 20971520, "train/token_count": 20971520, "train/batch_count": 10, "train/flop_count": 0, "train/total_time": 97.20068372006062, "train/update_time": 96.98365108307917, "train/lr": 0.0009000000000000001, "train/loss": 8.76564884185791, "train/global_grad_norm": 1.4130078554153442}
{"step": 41943040, "train/token_count": 41943040, "train/batch_count": 20, "train/flop_count": 0, "train/total_time": 190.58031271304935, "train/update_time": 190.21444558817893, "train/lr": 0.0009997960964140947, "train/loss": 7.474191188812256, "train/global_grad_norm": 0.8227528929710388}
{"step": 62914560, "train/token_count": 62914560, "train/batch_count": 30, "train/flop_count": 0, "train/total_time": 454.2337091190275, "train/update_time": 283.44298765144777, "train/lr": 0.0009990914580222257, "train/loss": 7.089620590209961, "train/global_grad_norm": 0.43369758129119873}
{"step": 83886080, "train/token_count": 83886080, "train/batch_count": 40, "train/flop_count": 0, "train/total_time": 547.6119900090853, "train/update_time": 376.67628215253353, "train/lr": 0.0009978842768382998, "train/loss": 6.745384216308594, "train/global_grad_norm": 0.30941468477249146}
{"step": 104857600, "train/token_count": 104857600, "train/batch_count": 50, "train/flop_count": 0, "train/total_time": 811.2910252050497, "train/update_time": 469.82893142174, "train/lr": 0.0009961757683914405, "train/loss": 6.485077857971191, "train/global_grad_norm": 0.3194890022277832}
{"step": 125829120, "train/token_count": 125829120, "train/batch_count": 60, "train/flop_count": 0, "train/total_time": 904.5979733880376, "train/update_time": 562.9825718456414, "train/lr": 0.00099396765300483, "train/loss": 6.212056636810303, "train/global_grad_norm": 0.42947375774383545}
{"step": 146800640, "train/token_count": 146800640, "train/batch_count": 70, "train/flop_count": 0, "train/total_time": 1167.0270900100004, "train/update_time": 656.2661910566967, "train/lr": 0.0009912621540634887, "train/loss": 6.0160932540893555, "train/global_grad_norm": 0.40116411447525024}
{"step": 167772160, "train/token_count": 167772160, "train/batch_count": 80, "train/flop_count": 0, "train/total_time": 1260.3716635780875, "train/update_time": 749.4701656188117, "train/lr": 0.000988061995775515, "train/loss": 5.953179359436035, "train/global_grad_norm": 1.0869561433792114}
{"step": 188743680, "train/token_count": 188743680, "train/batch_count": 90, "train/flop_count": 0, "train/total_time": 1522.4740127840778, "train/update_time": 842.772364483797, "train/lr": 0.0009843704004290394, "train/loss": 5.676019191741943, "train/global_grad_norm": 0.280754029750824}
{"step": 209715200, "train/token_count": 209715200, "train/batch_count": 100, "train/flop_count": 0, "train/total_time": 1615.8070381759899, "train/update_time": 935.9601778858341, "train/lr": 0.0009801910851476522, "train/loss": 5.544191837310791, "train/global_grad_norm": 0.46917492151260376}
{"step": 230686720, "train/token_count": 230686720, "train/batch_count": 110, "train/flop_count": 0, "train/total_time": 1880.450380234979, "train/update_time": 1029.0689218619373, "train/lr": 0.0009755282581475768, "train/loss": 5.429840087890625, "train/global_grad_norm": 0.7274503111839294}
{"step": 251658240, "train/token_count": 251658240, "train/batch_count": 120, "train/flop_count": 0, "train/total_time": 1973.9993880910333, "train/update_time": 1122.4686564019648, "train/lr": 0.0009703866145003512, "train/loss": 5.2810444831848145, "train/global_grad_norm": 0.2969799339771271}
{"step": 272629760, "train/token_count": 272629760, "train/batch_count": 130, "train/flop_count": 0, "train/total_time": 2237.285542534082, "train/update_time": 1215.6973058142466, "train/lr": 0.0009647713314052896, "train/loss": 5.193550109863281, "train/global_grad_norm": 0.3795139491558075}
{"step": 293601280, "train/token_count": 293601280, "train/batch_count": 140, "train/flop_count": 0, "train/total_time": 2330.883187837084, "train/update_time": 1309.140196379507, "train/lr": 0.0009586880629764817, "train/loss": 5.119898319244385, "train/global_grad_norm": 0.43129095435142517}
{"step": 314572800, "train/token_count": 314572800, "train/batch_count": 150, "train/flop_count": 0, "train/total_time": 2593.152709970018, "train/update_time": 1402.5526540756691, "train/lr": 0.0009521429345495787, "train/loss": 4.974904537200928, "train/global_grad_norm": 0.3256843686103821}
{"step": 335544320, "train/token_count": 335544320, "train/batch_count": 160, "train/flop_count": 0, "train/total_time": 2686.8860215520253, "train/update_time": 1496.1389014086453, "train/lr": 0.0009451425365140996, "train/loss": 4.9173712730407715, "train/global_grad_norm": 0.39894500374794006}
{"step": 356515840, "train/token_count": 356515840, "train/batch_count": 170, "train/flop_count": 0, "train/total_time": 2949.368387905997, "train/update_time": 1589.4967352375388, "train/lr": 0.000937693917677468, "train/loss": 4.806858539581299, "train/global_grad_norm": 0.3854577839374542}
{"step": 377487360, "train/token_count": 377487360, "train/batch_count": 180, "train/flop_count": 0, "train/total_time": 3042.959401597036, "train/update_time": 1682.9467192575103, "train/lr": 0.0009298045781674596, "train/loss": 4.758651256561279, "train/global_grad_norm": 0.3088115155696869}
{"step": 398458880, "train/token_count": 398458880, "train/batch_count": 190, "train/flop_count": 0, "train/total_time": 3305.444267996005, "train/update_time": 1776.3336466513574, "train/lr": 0.0009214824618802108, "train/loss": 4.72238826751709, "train/global_grad_norm": 0.37908124923706055}
{"step": 419430400, "train/token_count": 419430400, "train/batch_count": 200, "train/flop_count": 0, "train/total_time": 3399.0298356669955, "train/update_time": 1869.7792980262311, "train/lr": 0.000912735948481387, "train/loss": 4.6122002601623535, "train/global_grad_norm": 0.4956414997577667}
{"step": 440401920, "train/token_count": 440401920, "train/batch_count": 210, "train/flop_count": 0, "train/total_time": 3663.3731433160137, "train/update_time": 1963.0694206270855, "train/lr": 0.0009035738449685707, "train/loss": 4.518575191497803, "train/global_grad_norm": 0.33904722332954407}
{"step": 461373440, "train/token_count": 461373440, "train/batch_count": 220, "train/flop_count": 0, "train/total_time": 3756.8895379800815, "train/update_time": 2056.446317301248, "train/lr": 0.0008940053768033609, "train/loss": 4.449559211730957, "train/global_grad_norm": 0.5069319605827332}
{"step": 482344960, "train/token_count": 482344960, "train/batch_count": 230, "train/flop_count": 0, "train/total_time": 4019.5361906640464, "train/update_time": 2149.77923363226, "train/lr": 0.0008840401786221159, "train/loss": 4.342103004455566, "train/global_grad_norm": 0.29293933510780334}
{"step": 503316480, "train/token_count": 503316480, "train/batch_count": 240, "train/flop_count": 0, "train/total_time": 4112.964794773026, "train/update_time": 2243.0702600192744, "train/lr": 0.0008736882845346905, "train/loss": 4.284090518951416, "train/global_grad_norm": 0.36110353469848633}
{"step": 524288000, "train/token_count": 524288000, "train/batch_count": 250, "train/flop_count": 0, "train/total_time": 4376.393655605032, "train/update_time": 2336.412492537289, "train/lr": 0.0008629601180209381, "train/loss": 4.201655387878418, "train/global_grad_norm": 0.3496641218662262}
{"step": 545259520, "train/token_count": 545259520, "train/batch_count": 260, "train/flop_count": 0, "train/total_time": 4470.06449480704, "train/update_time": 2429.936827432248, "train/lr": 0.0008518664814351503, "train/loss": 4.142204761505127, "train/global_grad_norm": 0.39031898975372314}
{"step": 566231040, "train/token_count": 566231040, "train/batch_count": 270, "train/flop_count": 0, "train/total_time": 4734.132506006979, "train/update_time": 2523.346581262187, "train/lr": 0.0008404185451290017, "train/loss": 4.099599838256836, "train/global_grad_norm": 0.37471985816955566}
{"step": 587202560, "train/token_count": 587202560, "train/batch_count": 280, "train/flop_count": 0, "train/total_time": 4827.830736390082, "train/update_time": 2616.8884120163275, "train/lr": 0.0008286278362039527, "train/loss": 4.009082317352295, "train/global_grad_norm": 0.34484514594078064}
{"step": 608174080, "train/token_count": 608174080, "train/batch_count": 290, "train/flop_count": 0, "train/total_time": 5090.937317650998, "train/update_time": 2710.3318849493517, "train/lr": 0.0008165062269044352, "train/loss": 3.97619891166687, "train/global_grad_norm": 0.40576431155204773}
{"step": 629145600, "train/token_count": 629145600, "train/batch_count": 300, "train/flop_count": 0, "train/total_time": 5184.687426269054, "train/update_time": 2803.9371257049497, "train/lr": 0.0008040659226635089, "train/loss": 3.9234979152679443, "train/global_grad_norm": 0.44408538937568665}
{"step": 650117120, "train/token_count": 650117120, "train/batch_count": 310, "train/flop_count": 0, "train/total_time": 5450.616348118056, "train/update_time": 2897.275741666206, "train/lr": 0.0007913194498130252, "train/loss": 3.9410295486450195, "train/global_grad_norm": 0.43244609236717224}
{"step": 671088640, "train/token_count": 671088640, "train/batch_count": 320, "train/flop_count": 0, "train/total_time": 5543.975930526038, "train/update_time": 2990.483508925303, "train/lr": 0.000778279642970672, "train/loss": 3.8507468700408936, "train/global_grad_norm": 0.48972949385643005}
{"step": 692060160, "train/token_count": 692060160, "train/batch_count": 330, "train/flop_count": 0, "train/total_time": 5806.160792221082, "train/update_time": 3083.611473838333, "train/lr": 0.0007649596321166025, "train/loss": 3.8703322410583496, "train/global_grad_norm": 0.3615987300872803}
{"step": 713031680, "train/token_count": 713031680, "train/batch_count": 340, "train/flop_count": 0, "train/total_time": 5899.68232584605, "train/update_time": 3176.9836701630848, "train/lr": 0.0007513728293726579, "train/loss": 3.8107964992523193, "train/global_grad_norm": 0.36133530735969543}
{"step": 734003200, "train/token_count": 734003200, "train/batch_count": 350, "train/flop_count": 0, "train/total_time": 6162.256411690032, "train/update_time": 3270.438694642042, "train/lr": 0.0007375329154974975, "train/loss": 3.7763936519622803, "train/global_grad_norm": 0.40389642119407654}
{"step": 754974720, "train/token_count": 754974720, "train/batch_count": 360, "train/flop_count": 0, "train/total_time": 6255.94410856301, "train/update_time": 3363.9935755479382, "train/lr": 0.0007234538261112341, "train/loss": 3.7278685569763184, "train/global_grad_norm": 0.42608577013015747}
{"step": 775946240, "train/token_count": 775946240, "train/batch_count": 370, "train/flop_count": 0, "train/total_time": 6518.670226996997, "train/update_time": 3457.366311661084, "train/lr": 0.0007091497376634464, "train/loss": 3.7271111011505127, "train/global_grad_norm": 0.27613717317581177}
{"step": 796917760, "train/token_count": 796917760, "train/batch_count": 380, "train/flop_count": 0, "train/total_time": 6612.111398222041, "train/update_time": 3550.66807099001, "train/lr": 0.0006946350531586958, "train/loss": 3.699812412261963, "train/global_grad_norm": 0.3713103234767914}
{"step": 817889280, "train/token_count": 817889280, "train/batch_count": 390, "train/flop_count": 0, "train/total_time": 6874.636505312985, "train/update_time": 3643.886076678871, "train/lr": 0.0006799243876539214, "train/loss": 3.6990554332733154, "train/global_grad_norm": 0.2907034456729889}
{"step": 838860800, "train/token_count": 838860800, "train/batch_count": 400, "train/flop_count": 0, "train/total_time": 6968.171832425054, "train/update_time": 3737.2908036899753, "train/lr": 0.0006650325535423166, "train/loss": 3.581249475479126, "train/global_grad_norm": 0.4145627021789551}
{"step": 859832320, "train/token_count": 859832320, "train/batch_count": 410, "train/flop_count": 0, "train/total_time": 7233.323964387993, "train/update_time": 3830.6057550550904, "train/lr": 0.0006499745456385053, "train/loss": 3.623004913330078, "train/global_grad_norm": 0.3520248532295227}
{"step": 880803840, "train/token_count": 880803840, "train/batch_count": 420, "train/flop_count": 0, "train/total_time": 7326.922928820015, "train/update_time": 3924.0719714582665, "train/lr": 0.0006347655260800339, "train/loss": 3.6238791942596436, "train/global_grad_norm": 0.379281610250473}
{"step": 901775360, "train/token_count": 901775360, "train/batch_count": 430, "train/flop_count": 0, "train/total_time": 7591.2802984340815, "train/update_time": 4017.3633328623837, "train/lr": 0.0006194208090603844, "train/loss": 3.62347412109375, "train/global_grad_norm": 0.36047083139419556}
{"step": 922746880, "train/token_count": 922746880, "train/batch_count": 440, "train/flop_count": 0, "train/total_time": 7684.845341357053, "train/update_time": 4110.783207958448, "train/lr": 0.0006039558454088796, "train/loss": 3.6568057537078857, "train/global_grad_norm": 0.49488669633865356}
{"step": 943718400, "train/token_count": 943718400, "train/batch_count": 450, "train/flop_count": 0, "train/total_time": 7948.907636292046, "train/update_time": 4204.296549194376, "train/lr": 0.0005883862070330078, "train/loss": 3.6046764850616455, "train/global_grad_norm": 0.3740111291408539}
{"step": 964689920, "train/token_count": 964689920, "train/batch_count": 460, "train/flop_count": 0, "train/total_time": 8042.599143097992, "train/update_time": 4297.83159168635, "train/lr": 0.0005727275712388317, "train/loss": 3.572643280029297, "train/global_grad_norm": 0.3081468641757965}
{"step": 985661440, "train/token_count": 985661440, "train/batch_count": 470, "train/flop_count": 0, "train/total_time": 8305.275031784084, "train/update_time": 4391.248921588529, "train/lr": 0.0005569957049452703, "train/loss": 3.553377389907837, "train/global_grad_norm": 0.33262893557548523}
{"step": 1006632960, "train/token_count": 1006632960, "train/batch_count": 480, "train/flop_count": 0, "train/total_time": 8398.81305668701, "train/update_time": 4484.626386589487, "train/lr": 0.0005412064488081482, "train/loss": 3.564133644104004, "train/global_grad_norm": 0.4224238991737366}
{"step": 1027604480, "train/token_count": 1027604480, "train/batch_count": 490, "train/flop_count": 0, "train/total_time": 8662.347046800074, "train/update_time": 4577.979232876445, "train/lr": 0.0005253757012699972, "train/loss": 3.5546345710754395, "train/global_grad_norm": 0.2766055762767792}
{"step": 1048576000, "train/token_count": 1048576000, "train/batch_count": 500, "train/flop_count": 0, "train/total_time": 8755.968361919047, "train/update_time": 4671.454424570431, "train/lr": 0.0005095194025516734, "train/loss": 3.536339044570923, "train/global_grad_norm": 0.3986065685749054}
{"step": 1069547520, "train/token_count": 1069547520, "train/batch_count": 510, "train/flop_count": 0, "train/total_time": 9021.309424316045, "train/update_time": 4764.801301084226, "train/lr": 0.0004936535186019053, "train/loss": 3.5047459602355957, "train/global_grad_norm": 0.244685560464859}
{"step": 1090519040, "train/token_count": 1090519040, "train/batch_count": 520, "train/flop_count": 0, "train/total_time": 9114.625705051003, "train/update_time": 4857.963408218231, "train/lr": 0.00047779402502093696, "train/loss": 3.520270586013794, "train/global_grad_norm": 0.4062435030937195}
{"step": 1111490560, "train/token_count": 1111490560, "train/batch_count": 530, "train/flop_count": 0, "train/total_time": 9398.054264329025, "train/update_time": 4970.6049944133265, "train/lr": 0.0004619568909744525, "train/loss": 3.4980483055114746, "train/global_grad_norm": 0.3901892602443695}
{"step": 1132462080, "train/token_count": 1132462080, "train/batch_count": 540, "train/flop_count": 0, "train/total_time": 9606.105255214032, "train/update_time": 5178.118606777163, "train/lr": 0.00044615806311398067, "train/loss": 3.511883497238159, "train/global_grad_norm": 0.34147942066192627}
{"step": 1153433600, "train/token_count": 1153433600, "train/batch_count": 550, "train/flop_count": 0, "train/total_time": 10003.42233235808, "train/update_time": 5362.020449607167, "train/lr": 0.0004304134495199673, "train/loss": 3.468095064163208, "train/global_grad_norm": 0.3075481653213501}
{"step": 1174405120, "train/token_count": 1174405120, "train/batch_count": 560, "train/flop_count": 0, "train/total_time": 10150.524502051994, "train/update_time": 5508.941767138196, "train/lr": 0.0004147389036836882, "train/loss": 3.5062997341156006, "train/global_grad_norm": 0.2764611542224884}
{"step": 1195376640, "train/token_count": 1195376640, "train/batch_count": 570, "train/flop_count": 0, "train/total_time": 10566.198982721078, "train/update_time": 5678.196010601241, "train/lr": 0.0003991502085441259, "train/loss": 3.4300529956817627, "train/global_grad_norm": 0.29730668663978577}
{"step": 1216348160, "train/token_count": 1216348160, "train/batch_count": 580, "train/flop_count": 0, "train/total_time": 10760.646769667044, "train/update_time": 5872.135236821254, "train/lr": 0.0003836630605958888, "train/loss": 3.489516258239746, "train/global_grad_norm": 0.32831132411956787}
{"step": 1237319680, "train/token_count": 1237319680, "train/batch_count": 590, "train/flop_count": 0, "train/total_time": 11156.295638620039, "train/update_time": 6039.766134388163, "train/lr": 0.00036829305408417155, "train/loss": 3.5055065155029297, "train/global_grad_norm": 0.3446820080280304}
{"step": 1258291200, "train/token_count": 1258291200, "train/batch_count": 600, "train/flop_count": 0, "train/total_time": 11310.365665178979, "train/update_time": 6193.65441437799, "train/lr": 0.000353055665302672, "train/loss": 3.5088326930999756, "train/global_grad_norm": 0.2981908321380615}
{"step": 1279262720, "train/token_count": 1279262720, "train/batch_count": 610, "train/flop_count": 0, "train/total_time": 11736.789658116992, "train/update_time": 6379.89381142694, "train/lr": 0.0003379662370102746, "train/loss": 3.466792345046997, "train/global_grad_norm": 0.33163660764694214}
{"step": 1300234240, "train/token_count": 1300234240, "train/batch_count": 620, "train/flop_count": 0, "train/total_time": 11914.080722624087, "train/update_time": 6556.690899457899, "train/lr": 0.00032303996298219405, "train/loss": 3.4406988620758057, "train/global_grad_norm": 0.28660744428634644}
{"step": 1321205760, "train/token_count": 1321205760, "train/batch_count": 630, "train/flop_count": 0, "train/total_time": 12308.019568020012, "train/update_time": 6703.663481310825, "train/lr": 0.00030829187271113034, "train/loss": 3.4547548294067383, "train/global_grad_norm": 0.29186901450157166}
{"step": 1342177280, "train/token_count": 1342177280, "train/batch_count": 640, "train/flop_count": 0, "train/total_time": 12483.778021455975, "train/update_time": 6879.228759005899, "train/lr": 0.0002937368162738445, "train/loss": 3.4471235275268555, "train/global_grad_norm": 0.27723199129104614}
{"step": 1363148800, "train/token_count": 1363148800, "train/batch_count": 650, "train/flop_count": 0, "train/total_time": 12905.641336273053, "train/update_time": 7080.2444989720825, "train/lr": 0.0002793894493783894, "train/loss": 3.409696578979492, "train/global_grad_norm": 0.2166973203420639}
{"step": 1384120320, "train/token_count": 1384120320, "train/batch_count": 660, "train/flop_count": 0, "train/total_time": 13068.73947558098, "train/update_time": 7242.851195934112, "train/lr": 0.00026526421860705474, "train/loss": 3.4526355266571045, "train/global_grad_norm": 0.3342931270599365}
{"step": 1405091840, "train/token_count": 1405091840, "train/batch_count": 670, "train/flop_count": 0, "train/total_time": 13461.124407541007, "train/update_time": 7389.911005128175, "train/lr": 0.0002513753468698824, "train/loss": 3.3823962211608887, "train/global_grad_norm": 0.24517573416233063}
{"step": 1426063360, "train/token_count": 1426063360, "train/batch_count": 680, "train/flop_count": 0, "train/total_time": 13651.783316303976, "train/update_time": 7580.372131183161, "train/lr": 0.00023773681908340283, "train/loss": 3.412515163421631, "train/global_grad_norm": 0.3234269320964813}
{"step": 1447034880, "train/token_count": 1447034880, "train/batch_count": 690, "train/flop_count": 0, "train/total_time": 14071.216295699007, "train/update_time": 7793.8012771270005, "train/lr": 0.00022436236808900823, "train/loss": 3.4163782596588135, "train/global_grad_norm": 0.22176125645637512}
{"step": 1468006400, "train/token_count": 1468006400, "train/batch_count": 700, "train/flop_count": 0, "train/total_time": 14221.494554364006, "train/update_time": 7943.600337238051, "train/lr": 0.00021126546082514682, "train/loss": 3.4145960807800293, "train/global_grad_norm": 0.24960394203662872}
{"step": 1488977920, "train/token_count": 1488977920, "train/batch_count": 710, "train/flop_count": 0, "train/total_time": 14616.199775510002, "train/update_time": 8090.416964606033, "train/lr": 0.00019845928476725522, "train/loss": 3.4254729747772217, "train/global_grad_norm": 0.21741226315498352}
{"step": 1509949440, "train/token_count": 1509949440, "train/batch_count": 720, "train/flop_count": 0, "train/total_time": 14821.623726570979, "train/update_time": 8295.638736715424, "train/lr": 0.0001859567346490913, "train/loss": 3.3944356441497803, "train/global_grad_norm": 0.2173576056957245}
{"step": 1530920960, "train/token_count": 1530920960, "train/batch_count": 730, "train/flop_count": 0, "train/total_time": 15232.941850902047, "train/update_time": 8512.26620015048, "train/lr": 0.00017377039947882782, "train/loss": 3.4161322116851807, "train/global_grad_norm": 0.19619792699813843}
{"step": 1551892480, "train/token_count": 1551892480, "train/batch_count": 740, "train/flop_count": 0, "train/total_time": 15380.238736488041, "train/update_time": 8659.387788099353, "train/lr": 0.00016191254986299043, "train/loss": 3.377074718475342, "train/global_grad_norm": 0.18498826026916504}
{"step": 1572864000, "train/token_count": 1572864000, "train/batch_count": 750, "train/flop_count": 0, "train/total_time": 15773.814038521028, "train/update_time": 8806.402406750247, "train/lr": 0.00015039512565099468, "train/loss": 3.3878912925720215, "train/global_grad_norm": 0.23870347440242767}
{"step": 1593835520, "train/token_count": 1593835520, "train/batch_count": 760, "train/flop_count": 0, "train/total_time": 15988.252661015023, "train/update_time": 9020.618778260308, "train/lr": 0.00013922972391273224, "train/loss": 3.3374383449554443, "train/global_grad_norm": 0.19240827858448029}
{"step": 1614807040, "train/token_count": 1614807040, "train/batch_count": 770, "train/flop_count": 0, "train/total_time": 16392.140212937025, "train/update_time": 9237.03149983345, "train/lr": 0.00012842758726130281, "train/loss": 3.408134937286377, "train/global_grad_norm": 0.20149269700050354}
{"step": 1635778560, "train/token_count": 1635778560, "train/batch_count": 780, "train/flop_count": 0, "train/total_time": 16539.33981846203, "train/update_time": 9384.053210339276, "train/lr": 0.00011799959253265679, "train/loss": 3.354210615158081, "train/global_grad_norm": 0.18948209285736084}
{"step": 1656750080, "train/token_count": 1656750080, "train/batch_count": 790, "train/flop_count": 0, "train/total_time": 16932.73472051299, "train/update_time": 9530.820382265258, "train/lr": 0.00010795623983354214, "train/loss": 3.369267225265503, "train/global_grad_norm": 0.1685154139995575}
{"step": 1677721600, "train/token_count": 1677721600, "train/batch_count": 800, "train/flop_count": 0, "train/total_time": 17149.437690322986, "train/update_time": 9747.28814456216, "train/lr": 9.830764196878872e-05, "train/loss": 3.3556768894195557, "train/global_grad_norm": 0.1737906038761139}
{"step": 1698693120, "train/token_count": 1698693120, "train/batch_count": 810, "train/flop_count": 0, "train/total_time": 17549.297566386987, "train/update_time": 9954.213539150194, "train/lr": 8.906351425856951e-05, "train/loss": 3.3356785774230957, "train/global_grad_norm": 0.1702912300825119}
{"step": 1719664640, "train/token_count": 1719664640, "train/batch_count": 820, "train/flop_count": 0, "train/total_time": 17696.358517175075, "train/update_time": 10101.101109507843, "train/lr": 8.02331647558977e-05, "train/loss": 3.3448057174682617, "train/global_grad_norm": 0.16285409033298492}
{"step": 1740636160, "train/token_count": 1740636160, "train/batch_count": 830, "train/flop_count": 0, "train/total_time": 18089.859054057975, "train/update_time": 10248.098395384499, "train/lr": 7.182548487420554e-05, "train/loss": 3.379516363143921, "train/global_grad_norm": 0.1631559580564499}
{"step": 1761607680, "train/token_count": 1761607680, "train/batch_count": 840, "train/flop_count": 0, "train/total_time": 18306.998820198, "train/update_time": 10465.017247352516, "train/lr": 6.384894043444556e-05, "train/loss": 3.309666872024536, "train/global_grad_norm": 0.15072140097618103}
{"step": 1782579200, "train/token_count": 1782579200, "train/batch_count": 850, "train/flop_count": 0, "train/total_time": 18706.20748156903, "train/update_time": 10662.364285671501, "train/lr": 5.6311563140726166e-05, "train/loss": 3.3882648944854736, "train/global_grad_norm": 0.15730835497379303}
{"step": 1803550720, "train/token_count": 1803550720, "train/batch_count": 860, "train/flop_count": 0, "train/total_time": 18853.344159234082, "train/update_time": 10809.319413250545, "train/lr": 4.922094249306547e-05, "train/loss": 3.371917724609375, "train/global_grad_norm": 0.1638113260269165}
{"step": 1824522240, "train/token_count": 1824522240, "train/batch_count": 870, "train/flop_count": 0, "train/total_time": 19254.14253187005, "train/update_time": 10965.228872205596, "train/lr": 4.2584218145409916e-05, "train/loss": 3.314046621322632, "train/global_grad_norm": 0.14649039506912231}
{"step": 1845493760, "train/token_count": 1845493760, "train/batch_count": 880, "train/flop_count": 0, "train/total_time": 19462.212121990975, "train/update_time": 11172.7729124584, "train/lr": 3.6408072716606236e-05, "train/loss": 3.328913688659668, "train/global_grad_norm": 0.14682640135288239}
{"step": 1866465280, "train/token_count": 1866465280, "train/batch_count": 890, "train/flop_count": 0, "train/total_time": 19858.031892596046, "train/update_time": 11356.353174015298, "train/lr": 3.069872506157217e-05, "train/loss": 3.39823842048645, "train/global_grad_norm": 0.14888012409210205}
{"step": 1887436800, "train/token_count": 1887436800, "train/batch_count": 900, "train/flop_count": 0, "train/total_time": 20004.831071814988, "train/update_time": 11502.97223422851, "train/lr": 2.5461924009435368e-05, "train/loss": 3.309504508972168, "train/global_grad_norm": 0.13238337635993958}
{"step": 1908408320, "train/token_count": 1908408320, "train/batch_count": 910, "train/flop_count": 0, "train/total_time": 20424.197218824993, "train/update_time": 11674.226087162388, "train/lr": 2.0702942574950812e-05, "train/loss": 3.359825849533081, "train/global_grad_norm": 0.14266924560070038}
{"step": 1929379840, "train/token_count": 1929379840, "train/batch_count": 920, "train/flop_count": 0, "train/total_time": 20616.578735099058, "train/update_time": 11866.318235529237, "train/lr": 1.642657264902142e-05, "train/loss": 3.3724734783172607, "train/global_grad_norm": 0.1331755369901657}
{"step": 1950351360, "train/token_count": 1950351360, "train/batch_count": 930, "train/flop_count": 0, "train/total_time": 21010.326182788005, "train/update_time": 12031.375137096155, "train/lr": 1.2637120173670358e-05, "train/loss": 3.365408182144165, "train/global_grad_norm": 0.12957783043384552}
{"step": 1971322880, "train/token_count": 1971322880, "train/batch_count": 940, "train/flop_count": 0, "train/total_time": 21166.986645585042, "train/update_time": 12187.849889185163, "train/lr": 9.338400806321978e-06, "train/loss": 3.319882869720459, "train/global_grad_norm": 0.12174921482801437}
{"step": 1992294400, "train/token_count": 1992294400, "train/batch_count": 950, "train/flop_count": 0, "train/total_time": 21589.57625599904, "train/update_time": 12373.759245545021, "train/lr": 6.533736077758867e-06, "train/loss": 3.326748847961426, "train/global_grad_norm": 0.120763398706913}
{"step": 2013265920, "train/token_count": 2013265920, "train/batch_count": 960, "train/flop_count": 0, "train/total_time": 21767.68956994405, "train/update_time": 12551.347988351132, "train/lr": 4.2259500476214406e-06, "train/loss": 3.3314108848571777, "train/global_grad_norm": 0.12533509731292725}
{"step": 2034237440, "train/token_count": 2034237440, "train/batch_count": 970, "train/flop_count": 0, "train/total_time": 22159.499484215048, "train/update_time": 12698.275234204368, "train/lr": 2.417366460819359e-06, "train/loss": 3.3585853576660156, "train/global_grad_norm": 0.11902791261672974}
{"step": 2055208960, "train/token_count": 2055208960, "train/batch_count": 980, "train/flop_count": 0, "train/total_time": 22335.74923630804, "train/update_time": 12874.329680579598, "train/lr": 1.1098064077174619e-06, "train/loss": 3.3340089321136475, "train/global_grad_norm": 0.1221228837966919}