{ "lane": "Lane-G-ref", "substrate": "PyTorch-CUDA", "note": "BASELINE REFERENCE \u2014 NOT the hexa-native flame+forge production artifact (a_train_flame_forge)", "config": { "vocab": 256, "d": 768, "n_layer": 12, "n_head": 12, "block": 512, "batch": 32, "steps": 3000, "n_params": 85645824 }, "descent": { "first_val_ce": 5.580406188964844, "last_val_ce": 1.5688461065292358, "F_CLM_REF_DESCENT": 1, "verdict": "PASS" }, "util": { "n": 89, "util_peak": 100.0, "util_mean": 98.85393258426966, "mem_peak_mib": 8529.0, "power_mean_w": 587.7849438202246 }, "throughput": { "total_s": 180.3, "tok_per_s_final": 272622.0, "tok_seen": 49152000 }, "ckpt": { "path": "/root/laneg_ref/clm_ref_pytorch_cuda.pt", "sha256": "9882f5cbfeb24283fe00d19bbaf6947fac339f5f06ef7a37b8a727aa2371d321", "bytes": 342636482 }, "curve": [ { "step": 0, "train_ce": 5.709890365600586, "val_ce": 5.580406188964844, "lr": 2.9999999999999997e-06, "elapsed_s": 0.5, "tok_per_s": 34154.1 }, { "step": 100, "train_ce": 2.6970996856689453, "val_ce": 2.8268299102783203, "lr": 0.000285, "elapsed_s": 6.5, "tok_per_s": 255507.4 }, { "step": 200, "train_ce": 2.5875449180603027, "val_ce": 2.643609046936035, "lr": 0.0002842086242158385, "elapsed_s": 12.5, "tok_per_s": 263980.9 }, { "step": 300, "train_ce": 2.3915350437164307, "val_ce": 2.399930477142334, "lr": 0.00028184377502086166, "elapsed_s": 18.5, "tok_per_s": 266946.5 }, { "step": 400, "train_ce": 2.365194320678711, "val_ce": 2.2105484008789062, "lr": 0.00027793317810967833, "elapsed_s": 24.5, "tok_per_s": 268463.1 }, { "step": 500, "train_ce": 2.119870901107788, "val_ce": 2.2813305854797363, "lr": 0.0002725226816555792, "elapsed_s": 30.5, "tok_per_s": 269381.8 }, { "step": 600, "train_ce": 1.9277604818344116, "val_ce": 2.1148173809051514, "lr": 0.0002656757187826245, "elapsed_s": 36.5, "tok_per_s": 269994.2 }, { "step": 700, "train_ce": 1.9212350845336914, "val_ce": 1.991155743598938, "lr": 0.00025747256387026186, "elapsed_s": 42.5, "tok_per_s": 270382.1 }, { "step": 800, "train_ce": 2.0267608165740967, "val_ce": 1.8992573022842407, "lr": 0.00024800939140962264, "elapsed_s": 48.5, "tok_per_s": 270654.9 }, { "step": 900, "train_ce": 1.825111985206604, "val_ce": 2.033867835998535, "lr": 0.00023739714844554674, "elapsed_s": 54.5, "tok_per_s": 270923.1 }, { "step": 1000, "train_ce": 1.9068242311477661, "val_ce": 1.785348653793335, "lr": 0.0002257602538239216, "elapsed_s": 60.5, "tok_per_s": 271018.6 }, { "step": 1100, "train_ce": 1.8573015928268433, "val_ce": 1.8098082542419434, "lr": 0.00021323513949447168, "elapsed_s": 66.5, "tok_per_s": 271212.5 }, { "step": 1200, "train_ce": 1.7617135047912598, "val_ce": 1.8456193208694458, "lr": 0.00019996865097088843, "elapsed_s": 72.5, "tok_per_s": 271376.3 }, { "step": 1300, "train_ce": 1.8214399814605713, "val_ce": 1.7033685445785522, "lr": 0.00018611632570144482, "elapsed_s": 78.5, "tok_per_s": 271514.4 }, { "step": 1400, "train_ce": 1.6957789659500122, "val_ce": 1.7081215381622314, "lr": 0.00017184056953462327, "elapsed_s": 84.5, "tok_per_s": 271660.1 }, { "step": 1500, "train_ce": 1.545870304107666, "val_ce": 1.6709611415863037, "lr": 0.00015730875265903134, "elapsed_s": 90.5, "tok_per_s": 271784.7 }, { "step": 1600, "train_ce": 1.6873689889907837, "val_ce": 1.5884978771209717, "lr": 0.00014269124734096864, "elapsed_s": 96.5, "tok_per_s": 271893.3 }, { "step": 1700, "train_ce": 1.6488295793533325, "val_ce": 1.6994558572769165, "lr": 0.00012815943046537675, "elapsed_s": 102.5, "tok_per_s": 271989.1 }, { "step": 1800, "train_ce": 1.6918094158172607, "val_ce": 1.634321689605713, "lr": 0.0001138836742985552, "elapsed_s": 108.5, "tok_per_s": 272074.2 }, { "step": 1900, "train_ce": 1.5671133995056152, "val_ce": 1.5018056631088257, "lr": 0.00010003134902911154, "elapsed_s": 114.4, "tok_per_s": 272151.1 }, { "step": 2000, "train_ce": 1.6319389343261719, "val_ce": 1.4990273714065552, "lr": 8.676486050552834e-05, "elapsed_s": 120.4, "tok_per_s": 272220.7 }, { "step": 2100, "train_ce": 1.6000128984451294, "val_ce": 1.63832426071167, "lr": 7.42397461760784e-05, "elapsed_s": 126.4, "tok_per_s": 272283.9 }, { "step": 2200, "train_ce": 1.5814253091812134, "val_ce": 1.617719292640686, "lr": 6.260285155445328e-05, "elapsed_s": 132.4, "tok_per_s": 272341.1 }, { "step": 2300, "train_ce": 1.6231330633163452, "val_ce": 1.6176594495773315, "lr": 5.199060859037736e-05, "elapsed_s": 138.4, "tok_per_s": 272392.6 }, { "step": 2400, "train_ce": 1.4534480571746826, "val_ce": 1.6460052728652954, "lr": 4.2527436129738086e-05, "elapsed_s": 144.4, "tok_per_s": 272415.0 }, { "step": 2500, "train_ce": 1.5609209537506104, "val_ce": 1.4581724405288696, "lr": 3.4324281217375475e-05, "elapsed_s": 150.4, "tok_per_s": 272441.1 }, { "step": 2600, "train_ce": 1.4334359169006348, "val_ce": 1.5472081899642944, "lr": 2.7477318344420816e-05, "elapsed_s": 156.4, "tok_per_s": 272482.8 }, { "step": 2700, "train_ce": 1.5293118953704834, "val_ce": 1.5294520854949951, "lr": 2.2066821890321682e-05, "elapsed_s": 162.4, "tok_per_s": 272522.4 }, { "step": 2800, "train_ce": 1.410809874534607, "val_ce": 1.4850399494171143, "lr": 1.81562249791383e-05, "elapsed_s": 168.4, "tok_per_s": 272557.7 }, { "step": 2900, "train_ce": 1.4719370603561401, "val_ce": 1.488185167312622, "lr": 1.5791375784161453e-05, "elapsed_s": 174.4, "tok_per_s": 272591.0 }, { "step": 2999, "train_ce": 1.419477939605713, "val_ce": 1.5688461065292358, "lr": 1.5000079215009745e-05, "elapsed_s": 180.3, "tok_per_s": 272622.6 } ] }