| { |
| "params": 944640, |
| "args": { |
| "data": "data/tinystories_full.txt", |
| "output": "checkpoints/atome_1m_v1.pt", |
| "steps": 30000, |
| "seq_len": 256, |
| "batch_size": 64, |
| "accum_steps": 4, |
| "lr": 0.0003, |
| "min_lr": 3e-05, |
| "warmup": 1000, |
| "weight_decay": 0.1, |
| "d_model": 256, |
| "n_layers": 8, |
| "d_head": 64, |
| "top_k": 4, |
| "bf16": true, |
| "eval_every": 1000, |
| "seed": 0 |
| }, |
| "log": [ |
| { |
| "step": 1000, |
| "train_loss": 1.689065933227539, |
| "val_loss": 1.6851140782237053, |
| "val_ppl": 5.3930661286628725, |
| "lr": 0.0003 |
| }, |
| { |
| "step": 2000, |
| "train_loss": 1.475701928138733, |
| "val_loss": 1.4368714336305857, |
| "val_ppl": 4.207511724416042, |
| "lr": 0.0002992086242158385 |
| }, |
| { |
| "step": 3000, |
| "train_loss": 1.3402614891529083, |
| "val_loss": 1.355498529970646, |
| "val_ppl": 3.8786941199889884, |
| "lr": 0.00029684377502086165 |
| }, |
| { |
| "step": 4000, |
| "train_loss": 1.2906470894813538, |
| "val_loss": 1.298057682812214, |
| "val_ppl": 3.662176646542712, |
| "lr": 0.0002929331781096783 |
| }, |
| { |
| "step": 5000, |
| "train_loss": 1.2640663385391235, |
| "val_loss": 1.2564894184470177, |
| "val_ppl": 3.513066906295889, |
| "lr": 0.00028752268165557917 |
| }, |
| { |
| "step": 6000, |
| "train_loss": 1.205640196800232, |
| "val_loss": 1.2161348164081573, |
| "val_ppl": 3.374120900293555, |
| "lr": 0.0002806757187826245 |
| }, |
| { |
| "step": 7000, |
| "train_loss": 1.1917544305324554, |
| "val_loss": 1.1835042145103216, |
| "val_ppl": 3.2657982326287116, |
| "lr": 0.00027247256387026185 |
| }, |
| { |
| "step": 8000, |
| "train_loss": 1.1544596254825592, |
| "val_loss": 1.1677243299782276, |
| "val_ppl": 3.2146687829705525, |
| "lr": 0.0002630093914096226 |
| }, |
| { |
| "step": 9000, |
| "train_loss": 1.1510637402534485, |
| "val_loss": 1.1527819111943245, |
| "val_ppl": 3.166990953913901, |
| "lr": 0.0002523971484455467 |
| }, |
| { |
| "step": 10000, |
| "train_loss": 1.140123575925827, |
| "val_loss": 1.1461433116346598, |
| "val_ppl": 3.146036201225796, |
| "lr": 0.0002407602538239216 |
| }, |
| { |
| "step": 11000, |
| "train_loss": 1.1275735795497894, |
| "val_loss": 1.131921675056219, |
| "val_ppl": 3.1016110655411038, |
| "lr": 0.00022823513949447164 |
| }, |
| { |
| "step": 12000, |
| "train_loss": 1.1099890172481537, |
| "val_loss": 1.112453417852521, |
| "val_ppl": 3.041812083259338, |
| "lr": 0.00021496865097088842 |
| }, |
| { |
| "step": 13000, |
| "train_loss": 1.1127586960792542, |
| "val_loss": 1.112892348319292, |
| "val_ppl": 3.043147520317438, |
| "lr": 0.0002011163257014448 |
| }, |
| { |
| "step": 14000, |
| "train_loss": 1.0873990654945374, |
| "val_loss": 1.1024821121245623, |
| "val_ppl": 3.0116319626741244, |
| "lr": 0.00018684056953462323 |
| }, |
| { |
| "step": 15000, |
| "train_loss": 1.0949949026107788, |
| "val_loss": 1.1003286074846983, |
| "val_ppl": 3.0051533776041945, |
| "lr": 0.00017230875265903135 |
| }, |
| { |
| "step": 16000, |
| "train_loss": 1.092372715473175, |
| "val_loss": 1.0886210184544325, |
| "val_ppl": 2.9701754301311736, |
| "lr": 0.00015769124734096862 |
| }, |
| { |
| "step": 17000, |
| "train_loss": 1.0719301402568817, |
| "val_loss": 1.087962357327342, |
| "val_ppl": 2.968219735175533, |
| "lr": 0.00014315943046537674 |
| }, |
| { |
| "step": 18000, |
| "train_loss": 1.0894330739974976, |
| "val_loss": 1.0875801891088486, |
| "val_ppl": 2.9670855926576603, |
| "lr": 0.0001288836742985552 |
| }, |
| { |
| "step": 19000, |
| "train_loss": 1.0676527321338654, |
| "val_loss": 1.0716162715107203, |
| "val_ppl": 2.920095354830056, |
| "lr": 0.00011503134902911152 |
| }, |
| { |
| "step": 20000, |
| "train_loss": 1.0742259323596954, |
| "val_loss": 1.0812196973711252, |
| "val_ppl": 2.948273360207015, |
| "lr": 0.00010176486050552833 |
| }, |
| { |
| "step": 21000, |
| "train_loss": 1.0726729929447174, |
| "val_loss": 1.0718515273183584, |
| "val_ppl": 2.9207824050342435, |
| "lr": 8.923974617607838e-05 |
| }, |
| { |
| "step": 22000, |
| "train_loss": 1.0701198875904083, |
| "val_loss": 1.0739975553005934, |
| "val_ppl": 2.927057216357621, |
| "lr": 7.760285155445327e-05 |
| }, |
| { |
| "step": 23000, |
| "train_loss": 1.0675779581069946, |
| "val_loss": 1.0646078549325466, |
| "val_ppl": 2.899701657373658, |
| "lr": 6.699060859037736e-05 |
| }, |
| { |
| "step": 24000, |
| "train_loss": 1.0793527662754059, |
| "val_loss": 1.0707154776901007, |
| "val_ppl": 2.917466135348921, |
| "lr": 5.7527436129738084e-05 |
| }, |
| { |
| "step": 25000, |
| "train_loss": 1.0686360597610474, |
| "val_loss": 1.067691769450903, |
| "val_ppl": 2.9086578924472115, |
| "lr": 4.9324281217375474e-05 |
| }, |
| { |
| "step": 26000, |
| "train_loss": 1.079252928495407, |
| "val_loss": 1.064154027029872, |
| "val_ppl": 2.8983859904178786, |
| "lr": 4.247731834442082e-05 |
| }, |
| { |
| "step": 27000, |
| "train_loss": 1.0666958093643188, |
| "val_loss": 1.0639245696365833, |
| "val_ppl": 2.8977210106189566, |
| "lr": 3.7066821890321684e-05 |
| }, |
| { |
| "step": 28000, |
| "train_loss": 1.065284639596939, |
| "val_loss": 1.0690924655646086, |
| "val_ppl": 2.912734892906038, |
| "lr": 3.31562249791383e-05 |
| }, |
| { |
| "step": 29000, |
| "train_loss": 1.06133571267128, |
| "val_loss": 1.0545352958142757, |
| "val_ppl": 2.8706408450794916, |
| "lr": 3.0791375784161455e-05 |
| } |
| ], |
| "final_val": 1.0572172198444605, |
| "best_val": 1.0545352958142757 |
| } |