Byte-lingua-code / apps /mtp /configs /debug.yaml
2ira's picture
offline_compression_graph_code
72c0672 verified
# Template config, need to change dump_dir, data.root_dir and tokenizer.path
# dump_dir: !!CHANGETHIS!!
name: "debug"
steps: 1000
# probe_freq: 100
seed: 777
optim:
lr: 3e-4
warmup: 2000
lr_min_ratio: 0.000001
clip: 10.0
distributed:
fsdp_type: full_shard
compile: true
model_dtype: bf16
matmul_allow_tf32: false
selective_activation_checkpointing: false
tp_size: 1
model:
dim: 1024
n_layers: 8
n_heads: 8
n_future_head: 3
data:
root_dir: data/shuffled
sources:
dclm_baseline_1.0: 1.0
batch_size: 16
prefetch_size: 64
seq_len: 4096
n_views: 4
load_async: true
tokenizer:
name: sp
path: tokenizers/tokenizer_final_32k.minus_inf_ws.model
profiling:
run: true
checkpoint:
dump:
every: 100
keep: 2
eval:
every: 100
keep: 1
logging:
freq: 10
eval:
generator:
max_tokens: 8192
dtype: bf16
temperature: 1.0
top_p: 0.95
harness:
tasks:
- hellaswag
- piqa
- task: nq_open
num_fewshot: 5