| vocab_size: 16000 | |
| block_size: 256 | |
| n_layer: 6 | |
| n_head: 6 | |
| n_embed: 384 | |
| batch_size: 32 | |
| micro_batches: 4 | |
| lr: 3.0e-4 | |
| min_lr: 3.0e-5 | |
| warmup_steps: 200 | |
| max_steps: 1000 | |
| weight_decay: 0.01 | |
| grad_clip: 1.0 | |
| dtype: "float32" | |
| device: "auto" | |
| save_dir: "out/pretrain" | |
| tokenizer_path: "out/tokenizer.json" | |
| train_txt: "data/corpus_raw.txt" | |
| sft_jsonl: "data/sft_train.jsonl" | |