data: train_urls: - "gs://levanter-data/lakh-data-aar/train.txt" validation_urls: - "gs://levanter-data/lakh-data-aar/valid.txt" cache_dir: "gs://levanter-data/lakh-data-aar/cache/" tokenizer: "passthrough" plaintext: True enforce_eos: False model: hidden_dim: 1024 num_heads: 16 num_layers: 24 seq_len: 1024 scale_attn_by_inverse_layer_idx: true embed_pdrop: 0.1 resid_pdrop: 0.1 gradient_checkpointing: true trainer: mp: p=f32,c=bfloat16 learning_rate: 3E-4 weight_decay: 0.1 model_axis_size: 1 per_device_parallelism: 16 num_train_steps: 100000 checkpointer: base_path: gs://levanter-data/lakh-checkpoints/ save_interval: 30m axis_resources: batch: "data" vocab: "model" mlp: "model" heads: "model" parameter_axis_resources: embed: "data"