File size: 1,144 Bytes
72c0672 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | # Template config, need to change dump_dir, data.root_dir and tokenizer.path
# dump_dir: !!CHANGETHIS!!
name: "debug"
steps: 1000
# probe_freq: 100
seed: 777
optim:
lr: 3e-4
warmup: 2000
lr_min_ratio: 0.000001
clip: 10.0
distributed:
fsdp_type: full_shard
compile: true
model_dtype: bf16
matmul_allow_tf32: false
selective_activation_checkpointing: false
tp_size: 1
model:
dim: 1024
n_layers: 8
n_heads: 8
n_future_head: 3
data:
root_dir: data/shuffled
sources:
dclm_baseline_1.0: 1.0
batch_size: 16
prefetch_size: 64
seq_len: 4096
n_views: 4
load_async: true
tokenizer:
name: sp
path: tokenizers/tokenizer_final_32k.minus_inf_ws.model
profiling:
run: true
checkpoint:
dump:
every: 100
keep: 2
eval:
every: 100
keep: 1
logging:
freq: 10
eval:
generator:
max_tokens: 8192
dtype: bf16
temperature: 1.0
top_p: 0.95
harness:
tasks:
- hellaswag
- piqa
- task: nq_open
num_fewshot: 5
|