| # Template config, need to change dump_dir, data.root_dir and tokenizer.path | |
| # Evals can be activated by uncommenting its config | |
| # python -m lingua.stool config=apps/main/configs/debug.yaml nodes=8 account=fair_amaia_cw_codegen qos=lowest | |
| # dump_dir: !!CHANGETHIS!! | |
| name: "debug" | |
| steps: 1000 | |
| probe_freq: null #10 | |
| seed: 777 | |
| optim: | |
| lr: 3e-4 | |
| warmup: 2000 | |
| lr_min_ratio: 0.000001 | |
| clip: 1.0 | |
| distributed: | |
| fsdp_type: full_shard | |
| compile: true | |
| model_dtype: bf16 | |
| matmul_allow_tf32: false | |
| selective_activation_checkpointing: true | |
| tp_size: 1 | |
| model: | |
| dim: 1024 | |
| n_layers: 8 | |
| n_heads: 16 | |
| state_dim: 128 | |
| conv_size: 4 | |
| dt_bias: false | |
| #init_std_factor: "current_depth" | |
| init_args: | |
| dt_max: 0.01 | |
| dt_min: 0.00001 | |
| dt_init_floor: 1e-4 | |
| A_init_min: 0.1 | |
| A_init_max: 2 | |
| data: | |
| root_dir: data/shuffled/llama2 | |
| sources: | |
| wikipedia: 80.0 | |
| arxiv: 20.0 | |
| batch_size: 64 | |
| prefetch_size: 64 | |
| seq_len: 1024 | |
| n_views: 2 | |
| load_async: true | |
| tokenizer: | |
| name: sp | |
| path: tokenizers/tokenizer_final_32k.minus_inf_ws.model | |
| profiling: | |
| run: true | |
| checkpoint: | |
| dump: | |
| every: 100 | |
| keep: 2 | |
| eval: | |
| every: 100 | |
| keep: 1 | |
| logging: | |
| freq: 10 | |
| eval: | |
| generator: | |
| max_tokens: 8192 | |
| dtype: bf16 | |
| temperature: 1.0 | |
| top_p: 0.95 | |
| harness: | |
| tasks: | |
| - hellaswag | |
| - piqa | |
| - task: nq_open | |
| num_fewshot: 5 | |
| validation: | |
| max_steps: 100 | |