| { |
| "lane": "Lane-G-ref", |
| "substrate": "PyTorch-CUDA", |
| "note": "BASELINE REFERENCE \u2014 NOT the hexa-native flame+forge production artifact (a_train_flame_forge)", |
| "config": { |
| "vocab": 256, |
| "d": 768, |
| "n_layer": 12, |
| "n_head": 12, |
| "block": 512, |
| "batch": 32, |
| "steps": 3000, |
| "n_params": 85645824 |
| }, |
| "descent": { |
| "first_val_ce": 5.580406188964844, |
| "last_val_ce": 1.5688461065292358, |
| "F_CLM_REF_DESCENT": 1, |
| "verdict": "PASS" |
| }, |
| "util": { |
| "n": 89, |
| "util_peak": 100.0, |
| "util_mean": 98.85393258426966, |
| "mem_peak_mib": 8529.0, |
| "power_mean_w": 587.7849438202246 |
| }, |
| "throughput": { |
| "total_s": 180.3, |
| "tok_per_s_final": 272622.0, |
| "tok_seen": 49152000 |
| }, |
| "ckpt": { |
| "path": "/root/laneg_ref/clm_ref_pytorch_cuda.pt", |
| "sha256": "9882f5cbfeb24283fe00d19bbaf6947fac339f5f06ef7a37b8a727aa2371d321", |
| "bytes": 342636482 |
| }, |
| "curve": [ |
| { |
| "step": 0, |
| "train_ce": 5.709890365600586, |
| "val_ce": 5.580406188964844, |
| "lr": 2.9999999999999997e-06, |
| "elapsed_s": 0.5, |
| "tok_per_s": 34154.1 |
| }, |
| { |
| "step": 100, |
| "train_ce": 2.6970996856689453, |
| "val_ce": 2.8268299102783203, |
| "lr": 0.000285, |
| "elapsed_s": 6.5, |
| "tok_per_s": 255507.4 |
| }, |
| { |
| "step": 200, |
| "train_ce": 2.5875449180603027, |
| "val_ce": 2.643609046936035, |
| "lr": 0.0002842086242158385, |
| "elapsed_s": 12.5, |
| "tok_per_s": 263980.9 |
| }, |
| { |
| "step": 300, |
| "train_ce": 2.3915350437164307, |
| "val_ce": 2.399930477142334, |
| "lr": 0.00028184377502086166, |
| "elapsed_s": 18.5, |
| "tok_per_s": 266946.5 |
| }, |
| { |
| "step": 400, |
| "train_ce": 2.365194320678711, |
| "val_ce": 2.2105484008789062, |
| "lr": 0.00027793317810967833, |
| "elapsed_s": 24.5, |
| "tok_per_s": 268463.1 |
| }, |
| { |
| "step": 500, |
| "train_ce": 2.119870901107788, |
| "val_ce": 2.2813305854797363, |
| "lr": 0.0002725226816555792, |
| "elapsed_s": 30.5, |
| "tok_per_s": 269381.8 |
| }, |
| { |
| "step": 600, |
| "train_ce": 1.9277604818344116, |
| "val_ce": 2.1148173809051514, |
| "lr": 0.0002656757187826245, |
| "elapsed_s": 36.5, |
| "tok_per_s": 269994.2 |
| }, |
| { |
| "step": 700, |
| "train_ce": 1.9212350845336914, |
| "val_ce": 1.991155743598938, |
| "lr": 0.00025747256387026186, |
| "elapsed_s": 42.5, |
| "tok_per_s": 270382.1 |
| }, |
| { |
| "step": 800, |
| "train_ce": 2.0267608165740967, |
| "val_ce": 1.8992573022842407, |
| "lr": 0.00024800939140962264, |
| "elapsed_s": 48.5, |
| "tok_per_s": 270654.9 |
| }, |
| { |
| "step": 900, |
| "train_ce": 1.825111985206604, |
| "val_ce": 2.033867835998535, |
| "lr": 0.00023739714844554674, |
| "elapsed_s": 54.5, |
| "tok_per_s": 270923.1 |
| }, |
| { |
| "step": 1000, |
| "train_ce": 1.9068242311477661, |
| "val_ce": 1.785348653793335, |
| "lr": 0.0002257602538239216, |
| "elapsed_s": 60.5, |
| "tok_per_s": 271018.6 |
| }, |
| { |
| "step": 1100, |
| "train_ce": 1.8573015928268433, |
| "val_ce": 1.8098082542419434, |
| "lr": 0.00021323513949447168, |
| "elapsed_s": 66.5, |
| "tok_per_s": 271212.5 |
| }, |
| { |
| "step": 1200, |
| "train_ce": 1.7617135047912598, |
| "val_ce": 1.8456193208694458, |
| "lr": 0.00019996865097088843, |
| "elapsed_s": 72.5, |
| "tok_per_s": 271376.3 |
| }, |
| { |
| "step": 1300, |
| "train_ce": 1.8214399814605713, |
| "val_ce": 1.7033685445785522, |
| "lr": 0.00018611632570144482, |
| "elapsed_s": 78.5, |
| "tok_per_s": 271514.4 |
| }, |
| { |
| "step": 1400, |
| "train_ce": 1.6957789659500122, |
| "val_ce": 1.7081215381622314, |
| "lr": 0.00017184056953462327, |
| "elapsed_s": 84.5, |
| "tok_per_s": 271660.1 |
| }, |
| { |
| "step": 1500, |
| "train_ce": 1.545870304107666, |
| "val_ce": 1.6709611415863037, |
| "lr": 0.00015730875265903134, |
| "elapsed_s": 90.5, |
| "tok_per_s": 271784.7 |
| }, |
| { |
| "step": 1600, |
| "train_ce": 1.6873689889907837, |
| "val_ce": 1.5884978771209717, |
| "lr": 0.00014269124734096864, |
| "elapsed_s": 96.5, |
| "tok_per_s": 271893.3 |
| }, |
| { |
| "step": 1700, |
| "train_ce": 1.6488295793533325, |
| "val_ce": 1.6994558572769165, |
| "lr": 0.00012815943046537675, |
| "elapsed_s": 102.5, |
| "tok_per_s": 271989.1 |
| }, |
| { |
| "step": 1800, |
| "train_ce": 1.6918094158172607, |
| "val_ce": 1.634321689605713, |
| "lr": 0.0001138836742985552, |
| "elapsed_s": 108.5, |
| "tok_per_s": 272074.2 |
| }, |
| { |
| "step": 1900, |
| "train_ce": 1.5671133995056152, |
| "val_ce": 1.5018056631088257, |
| "lr": 0.00010003134902911154, |
| "elapsed_s": 114.4, |
| "tok_per_s": 272151.1 |
| }, |
| { |
| "step": 2000, |
| "train_ce": 1.6319389343261719, |
| "val_ce": 1.4990273714065552, |
| "lr": 8.676486050552834e-05, |
| "elapsed_s": 120.4, |
| "tok_per_s": 272220.7 |
| }, |
| { |
| "step": 2100, |
| "train_ce": 1.6000128984451294, |
| "val_ce": 1.63832426071167, |
| "lr": 7.42397461760784e-05, |
| "elapsed_s": 126.4, |
| "tok_per_s": 272283.9 |
| }, |
| { |
| "step": 2200, |
| "train_ce": 1.5814253091812134, |
| "val_ce": 1.617719292640686, |
| "lr": 6.260285155445328e-05, |
| "elapsed_s": 132.4, |
| "tok_per_s": 272341.1 |
| }, |
| { |
| "step": 2300, |
| "train_ce": 1.6231330633163452, |
| "val_ce": 1.6176594495773315, |
| "lr": 5.199060859037736e-05, |
| "elapsed_s": 138.4, |
| "tok_per_s": 272392.6 |
| }, |
| { |
| "step": 2400, |
| "train_ce": 1.4534480571746826, |
| "val_ce": 1.6460052728652954, |
| "lr": 4.2527436129738086e-05, |
| "elapsed_s": 144.4, |
| "tok_per_s": 272415.0 |
| }, |
| { |
| "step": 2500, |
| "train_ce": 1.5609209537506104, |
| "val_ce": 1.4581724405288696, |
| "lr": 3.4324281217375475e-05, |
| "elapsed_s": 150.4, |
| "tok_per_s": 272441.1 |
| }, |
| { |
| "step": 2600, |
| "train_ce": 1.4334359169006348, |
| "val_ce": 1.5472081899642944, |
| "lr": 2.7477318344420816e-05, |
| "elapsed_s": 156.4, |
| "tok_per_s": 272482.8 |
| }, |
| { |
| "step": 2700, |
| "train_ce": 1.5293118953704834, |
| "val_ce": 1.5294520854949951, |
| "lr": 2.2066821890321682e-05, |
| "elapsed_s": 162.4, |
| "tok_per_s": 272522.4 |
| }, |
| { |
| "step": 2800, |
| "train_ce": 1.410809874534607, |
| "val_ce": 1.4850399494171143, |
| "lr": 1.81562249791383e-05, |
| "elapsed_s": 168.4, |
| "tok_per_s": 272557.7 |
| }, |
| { |
| "step": 2900, |
| "train_ce": 1.4719370603561401, |
| "val_ce": 1.488185167312622, |
| "lr": 1.5791375784161453e-05, |
| "elapsed_s": 174.4, |
| "tok_per_s": 272591.0 |
| }, |
| { |
| "step": 2999, |
| "train_ce": 1.419477939605713, |
| "val_ce": 1.5688461065292358, |
| "lr": 1.5000079215009745e-05, |
| "elapsed_s": 180.3, |
| "tok_per_s": 272622.6 |
| } |
| ] |
| } |