{ "model": { "vocab_size": 50257, "max_seq_len": 256, "embed_dim": 384, "depth": 4, "edim": 16, "feat_dim": 96, "hidden": 384, "num_heads": 8, "num_blocks": 8, "dropout": 0.1, "params": 54107168 }, "training": { "batch_size": 12, "seq_len": 256, "lr": 0.0003, "weight_decay": 0.1, "num_epochs": 14, "grad_clip": 1.0, "ce_weight": 1.0, "validity_weight": 0.1 }, "data": { "train_tokens": 304222, "val_tokens": 33803, "vocab_size": 50257 }, "run_name": "run_1770236129" }