File size: 867 Bytes
151b875
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
data:
  train_urls:
      - "gs://levanter-data/lakh-data-aar/train.txt"
  validation_urls:
      - "gs://levanter-data/lakh-data-aar/valid.txt"
  cache_dir: "gs://levanter-data/lakh-data-aar/cache/"
  tokenizer: "passthrough"
  plaintext: True
  enforce_eos: False
model:
  hidden_dim: 1024
  num_heads: 16
  num_layers: 24
  seq_len: 1024
  scale_attn_by_inverse_layer_idx: true
  embed_pdrop: 0.1
  resid_pdrop: 0.1
  gradient_checkpointing: true
trainer:
  mp: p=f32,c=bfloat16
  learning_rate: 3E-4
  weight_decay: 0.1
  model_axis_size: 1
  per_device_parallelism: 16
  num_train_steps: 100000

  checkpointer:
    base_path: gs://levanter-data/lakh-checkpoints/
    save_interval: 30m

  axis_resources:
    batch: "data"
    vocab: "model"
    mlp: "model"
    heads: "model"
  parameter_axis_resources:
    embed: "data"