beatalignment / train /lakh-small.yaml
william590y's picture
Upload folder using huggingface_hub
151b875 verified
data:
train_urls:
- "gs://levanter-data/lakh-data-aar/train.txt"
validation_urls:
- "gs://levanter-data/lakh-data-aar/valid.txt"
cache_dir: "gs://levanter-data/lakh-data-aar/cache/"
tokenizer: "passthrough"
plaintext: True
enforce_eos: False
model:
hidden_dim: 768
num_heads: 12
num_layers: 12
seq_len: 1024
scale_attn_by_inverse_layer_idx: true
embed_pdrop: 0.1
resid_pdrop: 0.1
trainer:
mp: p=f32,c=bfloat16
learning_rate: 6E-4
weight_decay: 0.1
model_axis_size: 1
per_device_parallelism: 2
num_train_steps: 100000
checkpointer:
base_path: gs://levanter-data/lakh-checkpoints/
save_interval: 30m
axis_resources:
batch: "data"
vocab: "model"
mlp: "model"
heads: "model"
parameter_axis_resources:
embed: "data"