| data: | |
| train_urls: | |
| - "gs://levanter-data/lakh-data-aar/train.txt" | |
| validation_urls: | |
| - "gs://levanter-data/lakh-data-aar/valid.txt" | |
| cache_dir: "gs://levanter-data/lakh-data-aar/cache/" | |
| tokenizer: "passthrough" | |
| plaintext: True | |
| enforce_eos: False | |
| model: | |
| hidden_dim: 1024 | |
| num_heads: 16 | |
| num_layers: 24 | |
| seq_len: 1024 | |
| scale_attn_by_inverse_layer_idx: true | |
| embed_pdrop: 0.1 | |
| resid_pdrop: 0.1 | |
| gradient_checkpointing: true | |
| trainer: | |
| mp: p=f32,c=bfloat16 | |
| learning_rate: 3E-4 | |
| weight_decay: 0.1 | |
| model_axis_size: 1 | |
| per_device_parallelism: 16 | |
| num_train_steps: 100000 | |
| checkpointer: | |
| base_path: gs://levanter-data/lakh-checkpoints/ | |
| save_interval: 30m | |
| axis_resources: | |
| batch: "data" | |
| vocab: "model" | |
| mlp: "model" | |
| heads: "model" | |
| parameter_axis_resources: | |
| embed: "data" | |