atome-lm / atome_1m_v1.train.json
TilelliLab's picture
Atome LM v0.3.0 — checkpoints + honest model card
9e3a160 verified
{
"params": 944640,
"args": {
"data": "data/tinystories_full.txt",
"output": "checkpoints/atome_1m_v1.pt",
"steps": 30000,
"seq_len": 256,
"batch_size": 64,
"accum_steps": 4,
"lr": 0.0003,
"min_lr": 3e-05,
"warmup": 1000,
"weight_decay": 0.1,
"d_model": 256,
"n_layers": 8,
"d_head": 64,
"top_k": 4,
"bf16": true,
"eval_every": 1000,
"seed": 0
},
"log": [
{
"step": 1000,
"train_loss": 1.689065933227539,
"val_loss": 1.6851140782237053,
"val_ppl": 5.3930661286628725,
"lr": 0.0003
},
{
"step": 2000,
"train_loss": 1.475701928138733,
"val_loss": 1.4368714336305857,
"val_ppl": 4.207511724416042,
"lr": 0.0002992086242158385
},
{
"step": 3000,
"train_loss": 1.3402614891529083,
"val_loss": 1.355498529970646,
"val_ppl": 3.8786941199889884,
"lr": 0.00029684377502086165
},
{
"step": 4000,
"train_loss": 1.2906470894813538,
"val_loss": 1.298057682812214,
"val_ppl": 3.662176646542712,
"lr": 0.0002929331781096783
},
{
"step": 5000,
"train_loss": 1.2640663385391235,
"val_loss": 1.2564894184470177,
"val_ppl": 3.513066906295889,
"lr": 0.00028752268165557917
},
{
"step": 6000,
"train_loss": 1.205640196800232,
"val_loss": 1.2161348164081573,
"val_ppl": 3.374120900293555,
"lr": 0.0002806757187826245
},
{
"step": 7000,
"train_loss": 1.1917544305324554,
"val_loss": 1.1835042145103216,
"val_ppl": 3.2657982326287116,
"lr": 0.00027247256387026185
},
{
"step": 8000,
"train_loss": 1.1544596254825592,
"val_loss": 1.1677243299782276,
"val_ppl": 3.2146687829705525,
"lr": 0.0002630093914096226
},
{
"step": 9000,
"train_loss": 1.1510637402534485,
"val_loss": 1.1527819111943245,
"val_ppl": 3.166990953913901,
"lr": 0.0002523971484455467
},
{
"step": 10000,
"train_loss": 1.140123575925827,
"val_loss": 1.1461433116346598,
"val_ppl": 3.146036201225796,
"lr": 0.0002407602538239216
},
{
"step": 11000,
"train_loss": 1.1275735795497894,
"val_loss": 1.131921675056219,
"val_ppl": 3.1016110655411038,
"lr": 0.00022823513949447164
},
{
"step": 12000,
"train_loss": 1.1099890172481537,
"val_loss": 1.112453417852521,
"val_ppl": 3.041812083259338,
"lr": 0.00021496865097088842
},
{
"step": 13000,
"train_loss": 1.1127586960792542,
"val_loss": 1.112892348319292,
"val_ppl": 3.043147520317438,
"lr": 0.0002011163257014448
},
{
"step": 14000,
"train_loss": 1.0873990654945374,
"val_loss": 1.1024821121245623,
"val_ppl": 3.0116319626741244,
"lr": 0.00018684056953462323
},
{
"step": 15000,
"train_loss": 1.0949949026107788,
"val_loss": 1.1003286074846983,
"val_ppl": 3.0051533776041945,
"lr": 0.00017230875265903135
},
{
"step": 16000,
"train_loss": 1.092372715473175,
"val_loss": 1.0886210184544325,
"val_ppl": 2.9701754301311736,
"lr": 0.00015769124734096862
},
{
"step": 17000,
"train_loss": 1.0719301402568817,
"val_loss": 1.087962357327342,
"val_ppl": 2.968219735175533,
"lr": 0.00014315943046537674
},
{
"step": 18000,
"train_loss": 1.0894330739974976,
"val_loss": 1.0875801891088486,
"val_ppl": 2.9670855926576603,
"lr": 0.0001288836742985552
},
{
"step": 19000,
"train_loss": 1.0676527321338654,
"val_loss": 1.0716162715107203,
"val_ppl": 2.920095354830056,
"lr": 0.00011503134902911152
},
{
"step": 20000,
"train_loss": 1.0742259323596954,
"val_loss": 1.0812196973711252,
"val_ppl": 2.948273360207015,
"lr": 0.00010176486050552833
},
{
"step": 21000,
"train_loss": 1.0726729929447174,
"val_loss": 1.0718515273183584,
"val_ppl": 2.9207824050342435,
"lr": 8.923974617607838e-05
},
{
"step": 22000,
"train_loss": 1.0701198875904083,
"val_loss": 1.0739975553005934,
"val_ppl": 2.927057216357621,
"lr": 7.760285155445327e-05
},
{
"step": 23000,
"train_loss": 1.0675779581069946,
"val_loss": 1.0646078549325466,
"val_ppl": 2.899701657373658,
"lr": 6.699060859037736e-05
},
{
"step": 24000,
"train_loss": 1.0793527662754059,
"val_loss": 1.0707154776901007,
"val_ppl": 2.917466135348921,
"lr": 5.7527436129738084e-05
},
{
"step": 25000,
"train_loss": 1.0686360597610474,
"val_loss": 1.067691769450903,
"val_ppl": 2.9086578924472115,
"lr": 4.9324281217375474e-05
},
{
"step": 26000,
"train_loss": 1.079252928495407,
"val_loss": 1.064154027029872,
"val_ppl": 2.8983859904178786,
"lr": 4.247731834442082e-05
},
{
"step": 27000,
"train_loss": 1.0666958093643188,
"val_loss": 1.0639245696365833,
"val_ppl": 2.8977210106189566,
"lr": 3.7066821890321684e-05
},
{
"step": 28000,
"train_loss": 1.065284639596939,
"val_loss": 1.0690924655646086,
"val_ppl": 2.912734892906038,
"lr": 3.31562249791383e-05
},
{
"step": 29000,
"train_loss": 1.06133571267128,
"val_loss": 1.0545352958142757,
"val_ppl": 2.8706408450794916,
"lr": 3.0791375784161455e-05
}
],
"final_val": 1.0572172198444605,
"best_val": 1.0545352958142757
}