beatalignment / train /lakh-medium.yaml
william590y's picture
Upload folder using huggingface_hub
151b875 verified
data:
train_urls:
- "gs://levanter-data/lakh-data-aar/train.txt"
validation_urls:
- "gs://levanter-data/lakh-data-aar/valid.txt"
cache_dir: "gs://levanter-data/lakh-data-aar/cache/"
tokenizer: "passthrough"
plaintext: True
enforce_eos: False
model:
hidden_dim: 1024
num_heads: 16
num_layers: 24
seq_len: 1024
scale_attn_by_inverse_layer_idx: true
embed_pdrop: 0.1
resid_pdrop: 0.1
gradient_checkpointing: true
trainer:
mp: p=f32,c=bfloat16
learning_rate: 3E-4
weight_decay: 0.1
model_axis_size: 1
per_device_parallelism: 16
num_train_steps: 100000
checkpointer:
base_path: gs://levanter-data/lakh-checkpoints/
save_interval: 30m
axis_resources:
batch: "data"
vocab: "model"
mlp: "model"
heads: "model"
parameter_axis_resources:
embed: "data"