lilybert / training_config.json
matteospanio's picture
Upload folder using huggingface_hub
dcfdd2e verified
{
"data_dir": "/nfsd/voce/machine_learning/experiments/artifacts/processed",
"tokenizer_path": "/nfsd/voce/machine_learning/experiments/artifacts/tokenizer",
"output_dir": "/nfsd/voce/machine_learning/experiments/cb-pdmx-bm",
"model_architecture": "microsoft/codebert-base",
"random_init": false,
"hidden_size": 768,
"num_hidden_layers": 12,
"num_attention_heads": 12,
"intermediate_size": 3072,
"max_position_embeddings": 514,
"max_length": 512,
"mlm_probability": 0.15,
"per_device_train_batch_size": 72,
"per_device_eval_batch_size": 72,
"num_train_epochs": 10,
"learning_rate": 0.0002,
"lr_scheduler_type": "cosine",
"max_grad_norm": 1.0,
"weight_decay": 0.01,
"warmup_ratio": 0.1,
"max_steps": -1,
"logging_steps": 50,
"eval_steps": 1000,
"save_steps": 1000,
"seed": 42,
"pretokenized_shards_dir": "/nfsd/voce/machine_learning/experiments/codebert/baroque-music-shards/mlm",
"resume_from_checkpoint": null,
"dataloader_num_workers": 8,
"early_stopping": true,
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0,
"bf16": true,
"gradient_accumulation_steps": 2,
"optim": "adamw_torch_fused",
"dataloader_pin_memory": true,
"dataloader_prefetch_factor": 4,
"save_total_limit": 3,
"torch_compile": false,
"ddp_find_unused_parameters": false,
"wandb_enabled": true,
"wandb_project": "lilybert",
"wandb_entity": null,
"wandb_mode": "online",
"wandb_run_name": "cb-pdmx-baroquemusic",
"tensorboard_enabled": true,
"tensorboard_log_dir": "/nfsd/voce/machine_learning/experiments/cb-pdmx-bm/tensorboard"
}