fullrun / eval_metrics.jsonl
huiting tang
Add files using upload-large-folder tool
fad46a0 verified
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 500, "epoch": 0, "val_loss": 6.963184833526611, "val_ppl": 1056.994553986817, "is_best": true, "timestamp": "2026-05-04T20:27:54.606992"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 1000, "epoch": 0, "val_loss": 6.023243689537049, "val_ppl": 412.91579647088025, "is_best": true, "timestamp": "2026-05-04T20:31:37.250611"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 1500, "epoch": 0, "val_loss": 5.245665073394775, "val_ppl": 189.74196560843794, "is_best": true, "timestamp": "2026-05-04T20:35:21.206122"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 2000, "epoch": 0, "val_loss": 4.888807916641236, "val_ppl": 132.79517673832615, "is_best": true, "timestamp": "2026-05-04T20:39:04.688350"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 2500, "epoch": 0, "val_loss": 4.763248586654663, "val_ppl": 117.12580185174781, "is_best": true, "timestamp": "2026-05-04T20:42:48.855070"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 3000, "epoch": 0, "val_loss": 4.52814793586731, "val_ppl": 92.58692526746094, "is_best": true, "timestamp": "2026-05-04T20:46:32.211049"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 3500, "epoch": 0, "val_loss": 4.4403280258178714, "val_ppl": 84.8027546048359, "is_best": true, "timestamp": "2026-05-04T20:50:16.662061"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 4000, "epoch": 0, "val_loss": 4.41436402797699, "val_ppl": 82.62927428570266, "is_best": true, "timestamp": "2026-05-04T20:54:00.610007"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 4500, "epoch": 0, "val_loss": 4.288580441474915, "val_ppl": 72.86296181151131, "is_best": true, "timestamp": "2026-05-04T20:57:45.016097"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 5000, "epoch": 0, "val_loss": 4.244233429431915, "val_ppl": 69.70230792129031, "is_best": true, "timestamp": "2026-05-04T21:01:29.803259"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 5500, "epoch": 0, "val_loss": 4.301356756687165, "val_ppl": 73.79985425110237, "is_best": false, "timestamp": "2026-05-04T21:05:13.866524"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 6000, "epoch": 0, "val_loss": 4.1867230296134945, "val_ppl": 65.80679033671602, "is_best": true, "timestamp": "2026-05-04T21:08:59.436201"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 6500, "epoch": 0, "val_loss": 4.182440400123596, "val_ppl": 65.52556685360001, "is_best": true, "timestamp": "2026-05-04T21:12:44.931234"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 7000, "epoch": 0, "val_loss": 4.069165503978729, "val_ppl": 58.50811742297585, "is_best": true, "timestamp": "2026-05-04T21:16:30.165588"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 7500, "epoch": 0, "val_loss": 4.176451122760772, "val_ppl": 65.13428896489941, "is_best": false, "timestamp": "2026-05-04T21:20:14.872220"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 8000, "epoch": 0, "val_loss": 4.200441122055054, "val_ppl": 66.7157543414742, "is_best": false, "timestamp": "2026-05-04T21:23:58.945632"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 8500, "epoch": 0, "val_loss": 3.970609200000763, "val_ppl": 53.016818849833456, "is_best": true, "timestamp": "2026-05-04T21:27:41.551048"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 9000, "epoch": 0, "val_loss": 3.967017316818237, "val_ppl": 52.82673022225897, "is_best": true, "timestamp": "2026-05-04T21:31:25.829859"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 9500, "epoch": 0, "val_loss": 3.9729262471199034, "val_ppl": 53.13980374308927, "is_best": false, "timestamp": "2026-05-04T21:35:09.699423"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 10000, "epoch": 0, "val_loss": 3.9935925126075746, "val_ppl": 54.24943147312409, "is_best": false, "timestamp": "2026-05-04T21:38:53.054619"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 10500, "epoch": 0, "val_loss": 4.043687236309052, "val_ppl": 57.036261732483744, "is_best": false, "timestamp": "2026-05-04T21:42:36.693184"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 11000, "epoch": 0, "val_loss": 4.042443215847015, "val_ppl": 56.96535157180087, "is_best": false, "timestamp": "2026-05-04T21:46:20.657926"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 11500, "epoch": 0, "val_loss": 4.129199600219726, "val_ppl": 62.12817565041951, "is_best": false, "timestamp": "2026-05-04T21:50:04.220375"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 12000, "epoch": 0, "val_loss": 3.9572473049163817, "val_ppl": 52.31312548686545, "is_best": true, "timestamp": "2026-05-04T21:53:47.679877"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 12500, "epoch": 0, "val_loss": 3.9124502658843996, "val_ppl": 50.02136758725016, "is_best": true, "timestamp": "2026-05-04T21:57:33.076374"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 13000, "epoch": 0, "val_loss": 3.902157258987427, "val_ppl": 49.50913801929759, "is_best": true, "timestamp": "2026-05-04T22:01:16.966381"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 13500, "epoch": 0, "val_loss": 3.9133140563964846, "val_ppl": 50.06459423667062, "is_best": false, "timestamp": "2026-05-04T22:05:01.873700"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 14000, "epoch": 0, "val_loss": 3.8813209891319276, "val_ppl": 48.48822520221194, "is_best": true, "timestamp": "2026-05-04T22:08:48.266269"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 14500, "epoch": 0, "val_loss": 3.9586165308952332, "val_ppl": 52.38480303750091, "is_best": false, "timestamp": "2026-05-04T22:12:34.779947"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 15000, "epoch": 0, "val_loss": 3.981242668628693, "val_ppl": 53.58357950532751, "is_best": false, "timestamp": "2026-05-04T22:16:18.288974"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 15500, "epoch": 0, "val_loss": 3.9764000415802, "val_ppl": 53.32472149613234, "is_best": false, "timestamp": "2026-05-04T22:20:01.878554"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 16000, "epoch": 0, "val_loss": 3.884390819072723, "val_ppl": 48.63730451470712, "is_best": false, "timestamp": "2026-05-04T22:23:46.166016"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 16500, "epoch": 0, "val_loss": 3.8880950450897216, "val_ppl": 48.81780217917117, "is_best": false, "timestamp": "2026-05-04T22:27:30.137373"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 17000, "epoch": 0, "val_loss": 3.9025931477546694, "val_ppl": 49.53072320046329, "is_best": false, "timestamp": "2026-05-04T22:31:12.857082"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 17500, "epoch": 0, "val_loss": 3.840912938117981, "val_ppl": 46.56796871323203, "is_best": true, "timestamp": "2026-05-04T22:34:56.919072"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 18000, "epoch": 0, "val_loss": 3.917324936389923, "val_ppl": 50.26580055353312, "is_best": false, "timestamp": "2026-05-04T22:38:42.037340"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 18500, "epoch": 0, "val_loss": 3.780675542354584, "val_ppl": 43.84565132771732, "is_best": true, "timestamp": "2026-05-04T22:42:25.551739"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 19000, "epoch": 0, "val_loss": 3.885009217262268, "val_ppl": 48.66739103753017, "is_best": false, "timestamp": "2026-05-04T22:46:10.373906"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 19500, "epoch": 0, "val_loss": 3.865347349643707, "val_ppl": 47.719845025447334, "is_best": false, "timestamp": "2026-05-04T22:49:53.544826"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 20000, "epoch": 0, "val_loss": 3.9186877608299255, "val_ppl": 50.334350715331894, "is_best": false, "timestamp": "2026-05-04T22:53:35.825741"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 20500, "epoch": 0, "val_loss": 3.881156694889069, "val_ppl": 48.48025952034054, "is_best": false, "timestamp": "2026-05-04T22:57:19.162225"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 21000, "epoch": 0, "val_loss": 3.8103327989578246, "val_ppl": 45.16546738591788, "is_best": false, "timestamp": "2026-05-04T23:01:02.308628"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 21500, "epoch": 0, "val_loss": 3.9301170110702515, "val_ppl": 50.912934697654116, "is_best": false, "timestamp": "2026-05-04T23:04:45.538060"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 22000, "epoch": 0, "val_loss": 3.7983683824539183, "val_ppl": 44.62830872520921, "is_best": false, "timestamp": "2026-05-04T23:08:27.690976"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 22500, "epoch": 0, "val_loss": 3.849896454811096, "val_ppl": 46.98819757788166, "is_best": false, "timestamp": "2026-05-04T23:12:08.483085"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 23000, "epoch": 0, "val_loss": 3.8871836066246033, "val_ppl": 48.7733280272877, "is_best": false, "timestamp": "2026-05-04T23:15:48.740565"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 23500, "epoch": 0, "val_loss": 3.753203237056732, "val_ppl": 42.65750548735234, "is_best": true, "timestamp": "2026-05-04T23:19:27.010926"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 24000, "epoch": 0, "val_loss": 3.869513010978699, "val_ppl": 47.919044349187736, "is_best": false, "timestamp": "2026-05-04T23:23:07.701143"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 24500, "epoch": 0, "val_loss": 3.8887292981147765, "val_ppl": 48.84877483909317, "is_best": false, "timestamp": "2026-05-04T23:26:47.717650"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 25000, "epoch": 0, "val_loss": 3.769439470767975, "val_ppl": 43.355755855007644, "is_best": false, "timestamp": "2026-05-04T23:30:27.035726"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 25500, "epoch": 0, "val_loss": 3.805223262310028, "val_ppl": 44.93528134788892, "is_best": false, "timestamp": "2026-05-04T23:34:07.758379"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 26000, "epoch": 0, "val_loss": 3.9492916226387025, "val_ppl": 51.89859002418288, "is_best": false, "timestamp": "2026-05-04T23:37:48.044118"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 26500, "epoch": 0, "val_loss": 3.974674201011658, "val_ppl": 53.23277089732805, "is_best": false, "timestamp": "2026-05-04T23:41:27.485880"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 27000, "epoch": 0, "val_loss": 3.8572953581809997, "val_ppl": 47.337148044775134, "is_best": false, "timestamp": "2026-05-04T23:45:06.826959"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 27500, "epoch": 0, "val_loss": 3.8848944664001466, "val_ppl": 48.66180673285943, "is_best": false, "timestamp": "2026-05-04T23:48:46.632746"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 28000, "epoch": 0, "val_loss": 3.832193064689636, "val_ppl": 46.163667210993054, "is_best": false, "timestamp": "2026-05-04T23:52:27.265508"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 28500, "epoch": 0, "val_loss": 3.773273742198944, "val_ppl": 43.52231269907017, "is_best": false, "timestamp": "2026-05-04T23:56:06.835699"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 29000, "epoch": 0, "val_loss": 3.8240302324295046, "val_ppl": 45.788374747617034, "is_best": false, "timestamp": "2026-05-04T23:59:46.994539"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 29500, "epoch": 0, "val_loss": 3.949233388900757, "val_ppl": 51.895567863288385, "is_best": false, "timestamp": "2026-05-05T00:03:26.586071"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 30000, "epoch": 0, "val_loss": 3.9040127277374266, "val_ppl": 49.60108595461253, "is_best": false, "timestamp": "2026-05-05T00:07:06.255833"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 30500, "epoch": 0, "val_loss": 3.710973834991455, "val_ppl": 40.89361077155776, "is_best": true, "timestamp": "2026-05-05T00:10:45.977140"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 31000, "epoch": 0, "val_loss": 3.7265238165855408, "val_ppl": 41.53447547002938, "is_best": false, "timestamp": "2026-05-05T00:14:27.059691"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 31500, "epoch": 0, "val_loss": 3.701988971233368, "val_ppl": 40.52783294287655, "is_best": true, "timestamp": "2026-05-05T00:18:05.628207"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 32000, "epoch": 0, "val_loss": 3.8078968644142153, "val_ppl": 45.05558115590273, "is_best": false, "timestamp": "2026-05-05T00:21:46.507808"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 32500, "epoch": 0, "val_loss": 3.8319401144981384, "val_ppl": 46.15199157927069, "is_best": false, "timestamp": "2026-05-05T00:25:25.954415"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 33000, "epoch": 0, "val_loss": 3.7400962829589846, "val_ppl": 42.10204367919633, "is_best": false, "timestamp": "2026-05-05T00:29:04.606309"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 33500, "epoch": 0, "val_loss": 3.743556320667267, "val_ppl": 42.24797064874599, "is_best": false, "timestamp": "2026-05-05T00:32:44.230910"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 34000, "epoch": 0, "val_loss": 3.7966277599334717, "val_ppl": 44.55069525346696, "is_best": false, "timestamp": "2026-05-05T00:36:27.445535"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 34500, "epoch": 0, "val_loss": 3.8384269952774046, "val_ppl": 46.45234717861651, "is_best": false, "timestamp": "2026-05-05T00:40:12.253865"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 35000, "epoch": 0, "val_loss": 3.612187314033508, "val_ppl": 37.04699767454925, "is_best": true, "timestamp": "2026-05-05T00:43:56.592096"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 35500, "epoch": 0, "val_loss": 3.8184075355529785, "val_ppl": 45.531643034617545, "is_best": false, "timestamp": "2026-05-05T00:47:43.514747"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 36000, "epoch": 0, "val_loss": 3.7444037795066833, "val_ppl": 42.28378924016693, "is_best": false, "timestamp": "2026-05-05T00:51:26.623633"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 36500, "epoch": 0, "val_loss": 3.830995798110962, "val_ppl": 46.1084300684829, "is_best": false, "timestamp": "2026-05-05T00:55:08.337363"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 37000, "epoch": 0, "val_loss": 3.677153217792511, "val_ppl": 39.53368992153608, "is_best": false, "timestamp": "2026-05-05T00:58:52.170450"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 37500, "epoch": 0, "val_loss": 3.739810216426849, "val_ppl": 42.09000141609155, "is_best": false, "timestamp": "2026-05-05T01:02:34.096647"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 38000, "epoch": 0, "val_loss": 3.7296661019325255, "val_ppl": 41.66519391345922, "is_best": false, "timestamp": "2026-05-05T01:06:16.836005"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 38500, "epoch": 0, "val_loss": 3.767585837841034, "val_ppl": 43.275464636595224, "is_best": false, "timestamp": "2026-05-05T01:09:58.217304"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 39000, "epoch": 0, "val_loss": 3.77775297164917, "val_ppl": 43.71769638135246, "is_best": false, "timestamp": "2026-05-05T01:13:40.082830"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 39500, "epoch": 0, "val_loss": 3.831667184829712, "val_ppl": 46.13939705030023, "is_best": false, "timestamp": "2026-05-05T01:17:23.936934"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 40000, "epoch": 0, "val_loss": 3.844462442398071, "val_ppl": 46.73355561935861, "is_best": false, "timestamp": "2026-05-05T01:21:08.814789"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 40500, "epoch": 0, "val_loss": 3.721152687072754, "val_ppl": 41.31198646654307, "is_best": false, "timestamp": "2026-05-05T01:24:53.178640"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 41000, "epoch": 0, "val_loss": 3.887613821029663, "val_ppl": 48.79431552982699, "is_best": false, "timestamp": "2026-05-05T01:28:39.392230"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 41500, "epoch": 0, "val_loss": 3.8255509734153748, "val_ppl": 45.858059978936, "is_best": false, "timestamp": "2026-05-05T01:32:35.306234"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 42000, "epoch": 0, "val_loss": 3.6802701473236086, "val_ppl": 39.657105886640835, "is_best": false, "timestamp": "2026-05-05T01:36:21.447124"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 42500, "epoch": 0, "val_loss": 3.919534695148468, "val_ppl": 50.376998661809736, "is_best": false, "timestamp": "2026-05-05T01:39:58.229277"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 43000, "epoch": 0, "val_loss": 3.849488317966461, "val_ppl": 46.969023876200346, "is_best": false, "timestamp": "2026-05-05T01:43:35.127810"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 43500, "epoch": 0, "val_loss": 3.81555757522583, "val_ppl": 45.40206439303897, "is_best": false, "timestamp": "2026-05-05T01:47:11.254590"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 44000, "epoch": 0, "val_loss": 3.770134484767914, "val_ppl": 43.38589918610998, "is_best": false, "timestamp": "2026-05-05T01:50:47.579975"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 44500, "epoch": 0, "val_loss": 3.8434589862823487, "val_ppl": 46.68668406786842, "is_best": false, "timestamp": "2026-05-05T01:54:24.221919"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 45000, "epoch": 0, "val_loss": 3.750814938545227, "val_ppl": 42.55574819225895, "is_best": false, "timestamp": "2026-05-05T01:58:05.517077"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 45500, "epoch": 0, "val_loss": 3.747899663448334, "val_ppl": 42.43186714073363, "is_best": false, "timestamp": "2026-05-05T02:01:44.523465"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 46000, "epoch": 0, "val_loss": 3.6734747171401976, "val_ppl": 39.38853236214147, "is_best": false, "timestamp": "2026-05-05T02:05:23.036056"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 46500, "epoch": 0, "val_loss": 3.8030235409736632, "val_ppl": 44.836544886915966, "is_best": false, "timestamp": "2026-05-05T02:09:02.516817"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 47000, "epoch": 0, "val_loss": 3.634922742843628, "val_ppl": 37.89892482087495, "is_best": false, "timestamp": "2026-05-05T02:12:41.277940"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 47500, "epoch": 0, "val_loss": 3.764600372314453, "val_ppl": 43.14645989419981, "is_best": false, "timestamp": "2026-05-05T02:16:20.219040"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 48000, "epoch": 0, "val_loss": 3.5797553420066834, "val_ppl": 35.864765172109244, "is_best": true, "timestamp": "2026-05-05T02:20:00.957892"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 48500, "epoch": 0, "val_loss": 3.748921608924866, "val_ppl": 42.475252360307486, "is_best": false, "timestamp": "2026-05-05T02:23:56.182059"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 49000, "epoch": 0, "val_loss": 3.740371787548065, "val_ppl": 42.11364458341741, "is_best": false, "timestamp": "2026-05-05T02:27:51.969067"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 49500, "epoch": 0, "val_loss": 3.7604124903678895, "val_ppl": 42.966145445098626, "is_best": false, "timestamp": "2026-05-05T02:31:36.100279"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 50000, "epoch": 0, "val_loss": 3.774665880203247, "val_ppl": 43.58294395836165, "is_best": false, "timestamp": "2026-05-05T02:35:18.644511"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 50500, "epoch": 0, "val_loss": 3.6694130897521973, "val_ppl": 39.22887527361407, "is_best": false, "timestamp": "2026-05-05T02:39:02.584135"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 51000, "epoch": 0, "val_loss": 3.6590145587921143, "val_ppl": 38.82306616549242, "is_best": false, "timestamp": "2026-05-05T02:42:46.534493"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 51500, "epoch": 0, "val_loss": 3.7529546499252318, "val_ppl": 42.64690269833933, "is_best": false, "timestamp": "2026-05-05T02:46:31.771492"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 52000, "epoch": 0, "val_loss": 3.8498271465301515, "val_ppl": 46.98494101953738, "is_best": false, "timestamp": "2026-05-05T02:50:13.574460"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 52500, "epoch": 0, "val_loss": 3.811490166187286, "val_ppl": 45.217770678994, "is_best": false, "timestamp": "2026-05-05T02:53:53.221245"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 53000, "epoch": 0, "val_loss": 3.8117419838905335, "val_ppl": 45.229158747950855, "is_best": false, "timestamp": "2026-05-05T02:57:33.337646"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 53500, "epoch": 0, "val_loss": 3.7191628217697144, "val_ppl": 41.22986291257675, "is_best": false, "timestamp": "2026-05-05T03:01:13.036547"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 54000, "epoch": 0, "val_loss": 3.8724397897720335, "val_ppl": 48.059498230445314, "is_best": false, "timestamp": "2026-05-05T03:04:53.502088"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 54500, "epoch": 0, "val_loss": 3.7079468250274656, "val_ppl": 40.77001256516523, "is_best": false, "timestamp": "2026-05-05T03:08:32.886643"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 55000, "epoch": 0, "val_loss": 3.6920026898384095, "val_ppl": 40.12512472427156, "is_best": false, "timestamp": "2026-05-05T03:12:13.766270"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 55500, "epoch": 0, "val_loss": 3.6871882915496825, "val_ppl": 39.93241066581467, "is_best": false, "timestamp": "2026-05-05T03:15:53.648800"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 56000, "epoch": 0, "val_loss": 3.781884014606476, "val_ppl": 43.89866960982203, "is_best": false, "timestamp": "2026-05-05T03:19:33.198311"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 56500, "epoch": 0, "val_loss": 3.6616137504577635, "val_ppl": 38.92410600958132, "is_best": false, "timestamp": "2026-05-05T03:23:13.145572"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 57000, "epoch": 0, "val_loss": 3.531203866004944, "val_ppl": 34.165073037065355, "is_best": true, "timestamp": "2026-05-05T03:26:52.867561"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 57500, "epoch": 0, "val_loss": 3.795089507102966, "val_ppl": 44.48221770178893, "is_best": false, "timestamp": "2026-05-05T03:30:32.851411"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 58000, "epoch": 0, "val_loss": 3.8297982692718504, "val_ppl": 46.05324694203715, "is_best": false, "timestamp": "2026-05-05T03:34:12.555698"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 58500, "epoch": 0, "val_loss": 3.7228244066238405, "val_ppl": 41.38110628038839, "is_best": false, "timestamp": "2026-05-05T03:37:52.723264"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 59000, "epoch": 0, "val_loss": 3.644485902786255, "val_ppl": 38.26309684282455, "is_best": false, "timestamp": "2026-05-05T03:41:31.937413"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 59500, "epoch": 0, "val_loss": 3.7388701915740965, "val_ppl": 42.05045435922832, "is_best": false, "timestamp": "2026-05-05T03:45:11.938148"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 60000, "epoch": 0, "val_loss": 3.713189721107483, "val_ppl": 40.984326827005454, "is_best": false, "timestamp": "2026-05-05T03:48:51.631351"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 60500, "epoch": 0, "val_loss": 3.6464321613311768, "val_ppl": 38.337639237866654, "is_best": false, "timestamp": "2026-05-05T03:52:30.988339"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 61000, "epoch": 0, "val_loss": 3.7975163221359254, "val_ppl": 44.59029890991259, "is_best": false, "timestamp": "2026-05-05T03:56:10.730758"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 61500, "epoch": 0, "val_loss": 3.561064159870148, "val_ppl": 35.20063632551809, "is_best": false, "timestamp": "2026-05-05T03:59:50.605190"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 62000, "epoch": 0, "val_loss": 3.744683527946472, "val_ppl": 42.29561971893706, "is_best": false, "timestamp": "2026-05-05T04:03:29.947486"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 62500, "epoch": 0, "val_loss": 3.8274471282958986, "val_ppl": 45.945096454417694, "is_best": false, "timestamp": "2026-05-05T04:07:09.097393"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 63000, "epoch": 0, "val_loss": 3.7136154055595396, "val_ppl": 41.00177693156932, "is_best": false, "timestamp": "2026-05-05T04:10:48.503565"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 63500, "epoch": 0, "val_loss": 3.9260930895805357, "val_ppl": 50.70847668300017, "is_best": false, "timestamp": "2026-05-05T04:14:28.417501"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 64000, "epoch": 0, "val_loss": 3.763543725013733, "val_ppl": 43.10089338192266, "is_best": false, "timestamp": "2026-05-05T04:18:07.310113"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 64500, "epoch": 0, "val_loss": 3.699180567264557, "val_ppl": 40.41417409068178, "is_best": false, "timestamp": "2026-05-05T04:21:47.877858"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 65000, "epoch": 0, "val_loss": 3.66406387090683, "val_ppl": 39.01959168561332, "is_best": false, "timestamp": "2026-05-05T04:25:27.919481"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 65500, "epoch": 0, "val_loss": 3.763517451286316, "val_ppl": 43.099760975674855, "is_best": false, "timestamp": "2026-05-05T04:29:06.666028"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 66000, "epoch": 0, "val_loss": 3.7695255279541016, "val_ppl": 43.359487089906395, "is_best": false, "timestamp": "2026-05-05T04:32:46.250910"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 66500, "epoch": 0, "val_loss": 3.745917534828186, "val_ppl": 42.34784502130058, "is_best": false, "timestamp": "2026-05-05T04:36:24.325984"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 67000, "epoch": 0, "val_loss": 3.764746975898743, "val_ppl": 43.152785783557405, "is_best": false, "timestamp": "2026-05-05T04:40:03.143328"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 67500, "epoch": 0, "val_loss": 3.7942780017852784, "val_ppl": 44.446134788299645, "is_best": false, "timestamp": "2026-05-05T04:43:41.447776"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 68000, "epoch": 0, "val_loss": 3.7456096172332765, "val_ppl": 42.33480738207428, "is_best": false, "timestamp": "2026-05-05T04:47:26.547091"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 68500, "epoch": 0, "val_loss": 3.7102041721343992, "val_ppl": 40.862148587446384, "is_best": false, "timestamp": "2026-05-05T04:51:12.596034"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 69000, "epoch": 0, "val_loss": 3.6849011063575743, "val_ppl": 39.841182215404714, "is_best": false, "timestamp": "2026-05-05T04:54:50.978629"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 69500, "epoch": 0, "val_loss": 3.8018407583236695, "val_ppl": 44.78354434977481, "is_best": false, "timestamp": "2026-05-05T04:58:31.751510"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 70000, "epoch": 0, "val_loss": 3.5476924657821653, "val_ppl": 34.73307718049491, "is_best": false, "timestamp": "2026-05-05T05:02:15.143915"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 70500, "epoch": 0, "val_loss": 3.7260159254074097, "val_ppl": 41.51338583242364, "is_best": false, "timestamp": "2026-05-05T05:05:57.806064"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 71000, "epoch": 0, "val_loss": 3.680525517463684, "val_ppl": 39.66723442053362, "is_best": false, "timestamp": "2026-05-05T05:09:50.093021"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 71500, "epoch": 0, "val_loss": 3.6530403971672056, "val_ppl": 38.59182232525244, "is_best": false, "timestamp": "2026-05-05T05:13:36.452799"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 72000, "epoch": 0, "val_loss": 3.703611981868744, "val_ppl": 40.59366345413084, "is_best": false, "timestamp": "2026-05-05T05:17:21.129969"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 72500, "epoch": 0, "val_loss": 3.707802879810333, "val_ppl": 40.764144339215704, "is_best": false, "timestamp": "2026-05-05T05:21:04.119358"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 73000, "epoch": 0, "val_loss": 3.605094015598297, "val_ppl": 36.785142071818896, "is_best": false, "timestamp": "2026-05-05T05:24:49.800968"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 73500, "epoch": 0, "val_loss": 3.7087042331695557, "val_ppl": 40.8009038017955, "is_best": false, "timestamp": "2026-05-05T05:28:40.456657"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 74000, "epoch": 0, "val_loss": 3.7855979561805726, "val_ppl": 44.06200983420766, "is_best": false, "timestamp": "2026-05-05T05:32:26.653532"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 74500, "epoch": 0, "val_loss": 3.7297961592674254, "val_ppl": 41.67061312993448, "is_best": false, "timestamp": "2026-05-05T05:36:12.315038"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 75000, "epoch": 0, "val_loss": 3.627360689640045, "val_ppl": 37.61341202706721, "is_best": false, "timestamp": "2026-05-05T05:39:56.730234"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 75500, "epoch": 0, "val_loss": 3.7224854111671446, "val_ppl": 41.36708065081295, "is_best": false, "timestamp": "2026-05-05T05:43:37.293236"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 76000, "epoch": 0, "val_loss": 4.322610449790955, "val_ppl": 75.38516083859768, "is_best": false, "timestamp": "2026-05-05T05:47:18.155784"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 76500, "epoch": 0, "val_loss": 3.610724115371704, "val_ppl": 36.992830195683446, "is_best": false, "timestamp": "2026-05-05T05:50:59.607889"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 77000, "epoch": 0, "val_loss": 3.6297528624534605, "val_ppl": 37.70349751580724, "is_best": false, "timestamp": "2026-05-05T05:54:40.026459"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 77500, "epoch": 0, "val_loss": 3.623749053478241, "val_ppl": 37.47781108604975, "is_best": false, "timestamp": "2026-05-05T05:58:21.443705"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 78000, "epoch": 0, "val_loss": 3.7540708899497988, "val_ppl": 42.694533456788896, "is_best": false, "timestamp": "2026-05-05T06:02:03.155138"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 78500, "epoch": 0, "val_loss": 3.7519269824028014, "val_ppl": 42.60309837350014, "is_best": false, "timestamp": "2026-05-05T06:05:44.011486"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 79000, "epoch": 0, "val_loss": 3.7970654726028443, "val_ppl": 44.57019992561841, "is_best": false, "timestamp": "2026-05-05T06:09:27.588297"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 79500, "epoch": 0, "val_loss": 3.728978431224823, "val_ppl": 41.63655182936403, "is_best": false, "timestamp": "2026-05-05T06:13:09.663470"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 80000, "epoch": 0, "val_loss": 3.770101523399353, "val_ppl": 43.38446915106466, "is_best": false, "timestamp": "2026-05-05T06:16:51.884800"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 80500, "epoch": 0, "val_loss": 3.725416195392609, "val_ppl": 41.48849647311785, "is_best": false, "timestamp": "2026-05-05T06:20:32.722979"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 81000, "epoch": 0, "val_loss": 3.675900602340698, "val_ppl": 39.4842004128058, "is_best": false, "timestamp": "2026-05-05T06:24:14.544122"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 81500, "epoch": 0, "val_loss": 3.5948123574256896, "val_ppl": 36.4088674932415, "is_best": false, "timestamp": "2026-05-05T06:27:54.969070"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 82000, "epoch": 0, "val_loss": 3.713227462768555, "val_ppl": 40.9858736727679, "is_best": false, "timestamp": "2026-05-05T06:31:35.763374"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 82500, "epoch": 0, "val_loss": 3.4857298374176025, "val_ppl": 32.64624486830563, "is_best": true, "timestamp": "2026-05-05T06:35:16.948693"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 83000, "epoch": 0, "val_loss": 3.8382776975631714, "val_ppl": 46.445412467043155, "is_best": false, "timestamp": "2026-05-05T06:38:59.855119"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 83500, "epoch": 0, "val_loss": 3.532168471813202, "val_ppl": 34.198044764759906, "is_best": false, "timestamp": "2026-05-05T06:42:39.826377"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 84000, "epoch": 0, "val_loss": 3.6334757924079897, "val_ppl": 37.84412660980362, "is_best": false, "timestamp": "2026-05-05T06:46:20.770169"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 84500, "epoch": 0, "val_loss": 3.5922468185424803, "val_ppl": 36.31557884698738, "is_best": false, "timestamp": "2026-05-05T06:50:01.909748"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 85000, "epoch": 0, "val_loss": 3.74115891456604, "val_ppl": 42.14680642047121, "is_best": false, "timestamp": "2026-05-05T06:53:42.998967"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 85500, "epoch": 0, "val_loss": 3.6651230454444885, "val_ppl": 39.06094213840417, "is_best": false, "timestamp": "2026-05-05T06:57:24.641496"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 86000, "epoch": 0, "val_loss": 3.67037273645401, "val_ppl": 39.26653920352882, "is_best": false, "timestamp": "2026-05-05T07:01:05.092682"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 86500, "epoch": 0, "val_loss": 3.701577532291412, "val_ppl": 40.51116164401673, "is_best": false, "timestamp": "2026-05-05T07:04:46.858029"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 87000, "epoch": 0, "val_loss": 3.474389135837555, "val_ppl": 32.278104975454525, "is_best": true, "timestamp": "2026-05-05T07:08:29.025951"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 87500, "epoch": 0, "val_loss": 3.466200029850006, "val_ppl": 32.01485551276493, "is_best": true, "timestamp": "2026-05-05T07:12:12.186007"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 88000, "epoch": 0, "val_loss": 3.7414761543273927, "val_ppl": 42.16017918435586, "is_best": false, "timestamp": "2026-05-05T07:15:54.203134"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 88500, "epoch": 0, "val_loss": 4.189276897907257, "val_ppl": 65.97506699882547, "is_best": false, "timestamp": "2026-05-05T07:19:34.138532"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 89000, "epoch": 0, "val_loss": 3.596878969669342, "val_ppl": 36.48418830723153, "is_best": false, "timestamp": "2026-05-05T07:23:16.076802"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 89500, "epoch": 0, "val_loss": 3.556162965297699, "val_ppl": 35.02853325764114, "is_best": false, "timestamp": "2026-05-05T07:26:56.481670"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 90000, "epoch": 0, "val_loss": 3.6374723434448244, "val_ppl": 37.9956752274201, "is_best": false, "timestamp": "2026-05-05T07:30:36.974144"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 90500, "epoch": 0, "val_loss": 3.4989893555641176, "val_ppl": 33.08200091782994, "is_best": false, "timestamp": "2026-05-05T07:34:17.234085"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 91000, "epoch": 0, "val_loss": 3.631144380569458, "val_ppl": 37.755999135642845, "is_best": false, "timestamp": "2026-05-05T07:37:58.234545"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 91500, "epoch": 0, "val_loss": 3.515159583091736, "val_ppl": 33.621292885699646, "is_best": false, "timestamp": "2026-05-05T07:41:38.715059"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 92000, "epoch": 0, "val_loss": 4.059471273422242, "val_ppl": 57.94366662184797, "is_best": false, "timestamp": "2026-05-05T07:45:19.259650"}
{"run_name": "final_c6_18l448_factorized_aggressive", "stage": "pretraining", "event": "eval", "step": 92500, "epoch": 0, "val_loss": 3.4958460688591004, "val_ppl": 32.9781779623105, "is_best": false, "timestamp": "2026-05-05T07:49:00.817899"}