Nano-168M / config_pretrain.json
bd4sur's picture
Upload 6 files
4cae7b8 verified
raw
history blame contribute delete
998 Bytes
{
"use_lora": false,
"lora_rank": 8,
"lora_alpha": 16,
"lora_dropout": 0.0,
"from_checkpoint": "/home/bd4sur/ai/Nano/checkpoint/checkpoint_20241122_232939_step_196000.pt",
"save_checkpoint_to": "/home/bd4sur/ai/Nano/checkpoint",
"dataset_path": [
["/home/bd4sur/ai/Nano/dataset_preprocessed/pt_train_0.base64", "/home/bd4sur/ai/Nano/dataset_preprocessed/pt_val_0.base64"]
],
"tokenizer_path": "/home/bd4sur/ai/Nano/tokenizer/tokenizer_16384.json",
"random_seed": 39,
"batch_size": 80,
"gradient_accumulation_steps": 1,
"grad_clip": 1.0,
"dropout": 0.0,
"learning_rate": 5e-4,
"weight_decay": 1e-1,
"beta1": 0.9,
"beta2": 0.95,
"decay_lr": true,
"warmup_iters": 500,
"lr_decay_iters": 1e9,
"min_lr": 6e-5,
"eval_interval": 500,
"log_interval": 10,
"eval_iters": 2,
"backend": "nccl",
"device": "cuda",
"sdp_kernel": "flash",
"dtype": "bfloat16",
"use_amp": true
}