clm-v1-ref-pytorch-cuda / clm_ref_train.log.json
dancinlife's picture
Upload clm_ref_train.log.json with huggingface_hub
ec8a7c5 verified
{
"lane": "Lane-G-ref",
"substrate": "PyTorch-CUDA",
"note": "BASELINE REFERENCE \u2014 NOT the hexa-native flame+forge production artifact (a_train_flame_forge)",
"config": {
"vocab": 256,
"d": 768,
"n_layer": 12,
"n_head": 12,
"block": 512,
"batch": 32,
"steps": 3000,
"n_params": 85645824
},
"descent": {
"first_val_ce": 5.580406188964844,
"last_val_ce": 1.5688461065292358,
"F_CLM_REF_DESCENT": 1,
"verdict": "PASS"
},
"util": {
"n": 89,
"util_peak": 100.0,
"util_mean": 98.85393258426966,
"mem_peak_mib": 8529.0,
"power_mean_w": 587.7849438202246
},
"throughput": {
"total_s": 180.3,
"tok_per_s_final": 272622.0,
"tok_seen": 49152000
},
"ckpt": {
"path": "/root/laneg_ref/clm_ref_pytorch_cuda.pt",
"sha256": "9882f5cbfeb24283fe00d19bbaf6947fac339f5f06ef7a37b8a727aa2371d321",
"bytes": 342636482
},
"curve": [
{
"step": 0,
"train_ce": 5.709890365600586,
"val_ce": 5.580406188964844,
"lr": 2.9999999999999997e-06,
"elapsed_s": 0.5,
"tok_per_s": 34154.1
},
{
"step": 100,
"train_ce": 2.6970996856689453,
"val_ce": 2.8268299102783203,
"lr": 0.000285,
"elapsed_s": 6.5,
"tok_per_s": 255507.4
},
{
"step": 200,
"train_ce": 2.5875449180603027,
"val_ce": 2.643609046936035,
"lr": 0.0002842086242158385,
"elapsed_s": 12.5,
"tok_per_s": 263980.9
},
{
"step": 300,
"train_ce": 2.3915350437164307,
"val_ce": 2.399930477142334,
"lr": 0.00028184377502086166,
"elapsed_s": 18.5,
"tok_per_s": 266946.5
},
{
"step": 400,
"train_ce": 2.365194320678711,
"val_ce": 2.2105484008789062,
"lr": 0.00027793317810967833,
"elapsed_s": 24.5,
"tok_per_s": 268463.1
},
{
"step": 500,
"train_ce": 2.119870901107788,
"val_ce": 2.2813305854797363,
"lr": 0.0002725226816555792,
"elapsed_s": 30.5,
"tok_per_s": 269381.8
},
{
"step": 600,
"train_ce": 1.9277604818344116,
"val_ce": 2.1148173809051514,
"lr": 0.0002656757187826245,
"elapsed_s": 36.5,
"tok_per_s": 269994.2
},
{
"step": 700,
"train_ce": 1.9212350845336914,
"val_ce": 1.991155743598938,
"lr": 0.00025747256387026186,
"elapsed_s": 42.5,
"tok_per_s": 270382.1
},
{
"step": 800,
"train_ce": 2.0267608165740967,
"val_ce": 1.8992573022842407,
"lr": 0.00024800939140962264,
"elapsed_s": 48.5,
"tok_per_s": 270654.9
},
{
"step": 900,
"train_ce": 1.825111985206604,
"val_ce": 2.033867835998535,
"lr": 0.00023739714844554674,
"elapsed_s": 54.5,
"tok_per_s": 270923.1
},
{
"step": 1000,
"train_ce": 1.9068242311477661,
"val_ce": 1.785348653793335,
"lr": 0.0002257602538239216,
"elapsed_s": 60.5,
"tok_per_s": 271018.6
},
{
"step": 1100,
"train_ce": 1.8573015928268433,
"val_ce": 1.8098082542419434,
"lr": 0.00021323513949447168,
"elapsed_s": 66.5,
"tok_per_s": 271212.5
},
{
"step": 1200,
"train_ce": 1.7617135047912598,
"val_ce": 1.8456193208694458,
"lr": 0.00019996865097088843,
"elapsed_s": 72.5,
"tok_per_s": 271376.3
},
{
"step": 1300,
"train_ce": 1.8214399814605713,
"val_ce": 1.7033685445785522,
"lr": 0.00018611632570144482,
"elapsed_s": 78.5,
"tok_per_s": 271514.4
},
{
"step": 1400,
"train_ce": 1.6957789659500122,
"val_ce": 1.7081215381622314,
"lr": 0.00017184056953462327,
"elapsed_s": 84.5,
"tok_per_s": 271660.1
},
{
"step": 1500,
"train_ce": 1.545870304107666,
"val_ce": 1.6709611415863037,
"lr": 0.00015730875265903134,
"elapsed_s": 90.5,
"tok_per_s": 271784.7
},
{
"step": 1600,
"train_ce": 1.6873689889907837,
"val_ce": 1.5884978771209717,
"lr": 0.00014269124734096864,
"elapsed_s": 96.5,
"tok_per_s": 271893.3
},
{
"step": 1700,
"train_ce": 1.6488295793533325,
"val_ce": 1.6994558572769165,
"lr": 0.00012815943046537675,
"elapsed_s": 102.5,
"tok_per_s": 271989.1
},
{
"step": 1800,
"train_ce": 1.6918094158172607,
"val_ce": 1.634321689605713,
"lr": 0.0001138836742985552,
"elapsed_s": 108.5,
"tok_per_s": 272074.2
},
{
"step": 1900,
"train_ce": 1.5671133995056152,
"val_ce": 1.5018056631088257,
"lr": 0.00010003134902911154,
"elapsed_s": 114.4,
"tok_per_s": 272151.1
},
{
"step": 2000,
"train_ce": 1.6319389343261719,
"val_ce": 1.4990273714065552,
"lr": 8.676486050552834e-05,
"elapsed_s": 120.4,
"tok_per_s": 272220.7
},
{
"step": 2100,
"train_ce": 1.6000128984451294,
"val_ce": 1.63832426071167,
"lr": 7.42397461760784e-05,
"elapsed_s": 126.4,
"tok_per_s": 272283.9
},
{
"step": 2200,
"train_ce": 1.5814253091812134,
"val_ce": 1.617719292640686,
"lr": 6.260285155445328e-05,
"elapsed_s": 132.4,
"tok_per_s": 272341.1
},
{
"step": 2300,
"train_ce": 1.6231330633163452,
"val_ce": 1.6176594495773315,
"lr": 5.199060859037736e-05,
"elapsed_s": 138.4,
"tok_per_s": 272392.6
},
{
"step": 2400,
"train_ce": 1.4534480571746826,
"val_ce": 1.6460052728652954,
"lr": 4.2527436129738086e-05,
"elapsed_s": 144.4,
"tok_per_s": 272415.0
},
{
"step": 2500,
"train_ce": 1.5609209537506104,
"val_ce": 1.4581724405288696,
"lr": 3.4324281217375475e-05,
"elapsed_s": 150.4,
"tok_per_s": 272441.1
},
{
"step": 2600,
"train_ce": 1.4334359169006348,
"val_ce": 1.5472081899642944,
"lr": 2.7477318344420816e-05,
"elapsed_s": 156.4,
"tok_per_s": 272482.8
},
{
"step": 2700,
"train_ce": 1.5293118953704834,
"val_ce": 1.5294520854949951,
"lr": 2.2066821890321682e-05,
"elapsed_s": 162.4,
"tok_per_s": 272522.4
},
{
"step": 2800,
"train_ce": 1.410809874534607,
"val_ce": 1.4850399494171143,
"lr": 1.81562249791383e-05,
"elapsed_s": 168.4,
"tok_per_s": 272557.7
},
{
"step": 2900,
"train_ce": 1.4719370603561401,
"val_ce": 1.488185167312622,
"lr": 1.5791375784161453e-05,
"elapsed_s": 174.4,
"tok_per_s": 272591.0
},
{
"step": 2999,
"train_ce": 1.419477939605713,
"val_ce": 1.5688461065292358,
"lr": 1.5000079215009745e-05,
"elapsed_s": 180.3,
"tok_per_s": 272622.6
}
]
}