2ira's picture
offline_compression_graph_code
72c0672 verified
# Template config, need to change dump_dir, data.root_dir and tokenizer.path
# Evals can be activated by uncommenting its config
# python -m lingua.stool config=apps/main/configs/debug.yaml nodes=8 account=fair_amaia_cw_codegen qos=lowest
# dump_dir: !!CHANGETHIS!!
name: "debug"
steps: 1000
probe_freq: null #10
seed: 777
optim:
lr: 3e-4
warmup: 2000
lr_min_ratio: 0.000001
clip: 1.0
distributed:
fsdp_type: full_shard
compile: true
model_dtype: bf16
matmul_allow_tf32: false
selective_activation_checkpointing: true
tp_size: 1
model:
dim: 1024
n_layers: 8
n_heads: 16
state_dim: 128
conv_size: 4
dt_bias: false
#init_std_factor: "current_depth"
init_args:
dt_max: 0.01
dt_min: 0.00001
dt_init_floor: 1e-4
A_init_min: 0.1
A_init_max: 2
data:
root_dir: data/shuffled/llama2
sources:
wikipedia: 80.0
arxiv: 20.0
batch_size: 64
prefetch_size: 64
seq_len: 1024
n_views: 2
load_async: true
tokenizer:
name: sp
path: tokenizers/tokenizer_final_32k.minus_inf_ws.model
profiling:
run: true
checkpoint:
dump:
every: 100
keep: 2
eval:
every: 100
keep: 1
logging:
freq: 10
eval:
generator:
max_tokens: 8192
dtype: bf16
temperature: 1.0
top_p: 0.95
harness:
tasks:
- hellaswag
- piqa
- task: nq_open
num_fewshot: 5
validation:
max_steps: 100