modernbert-base-32k / training_config.json
HuaminChen's picture
Upload folder using huggingface_hub
cda0e01 verified
{
"model_name_or_path": "answerdotai/ModernBERT-base",
"rope_scaling_type": "yarn",
"rope_scaling_factor": 4.0,
"rope_original_max_position_embeddings": 8192,
"yarn_beta_fast": 32.0,
"yarn_beta_slow": 1.0,
"yarn_extrapolation_factor": 1.0,
"yarn_attn_factor": 1.0,
"dataset_name": "/data/datasets/slimpajama_32k_1B",
"model_max_length": 32768,
"mlm_probability": 0.3,
"max_train_samples": null,
"preprocessing_num_workers": 4,
"dataloader_num_workers": 4,
"output_dir": "/data/outputs/modernbert-32k-retrieval",
"per_device_train_batch_size": 6,
"torch_compile": false,
"gradient_accumulation_steps": 1,
"learning_rate": 1e-05,
"weight_decay": 0.01,
"warmup_steps": 100,
"warmup_ratio": 0.1,
"num_train_epochs": 1,
"max_grad_norm": 1.0,
"lr_scheduler_type": "constant_with_warmup",
"use_retrieval_masking": true,
"retrieval_probability": 0.1,
"min_distance_for_retrieval": 512,
"use_ewc": true,
"ewc_lambda": 1000.0,
"ewc_samples": 100,
"bf16": true,
"logging_steps": 10,
"save_steps": 500,
"seed": 42
}