spoomplesmaxx-base-qwen3-14b / hyperparameters.json
aimeri's picture
Upload folder using huggingface_hub
31a395a verified
raw
history blame contribute delete
487 Bytes
{
"stage": "CPT",
"model_id": "Qwen/Qwen3-14B-Base",
"num_epochs": 2,
"max_steps": -1,
"batch_size": 4,
"grad_accum": 8,
"effective_batch_size": 32,
"learning_rate": 3e-05,
"weight_decay": 0.1,
"warmup_ratio": 0.0,
"max_grad_norm": 1.0,
"seed": 42,
"cache_key": "20be9f8f8ac0d877_Qwen3-14B-",
"domain_counts": {},
"domain_eval_domains": [],
"max_seq_length": 3072,
"prepared_max_seq_length": 16384,
"chunked_loss": false,
"chunked_loss_size": 1024
}