| { |
| "data_dir": "/nfsd/voce/machine_learning/experiments/artifacts/processed", |
| "tokenizer_path": "/nfsd/voce/machine_learning/experiments/artifacts/tokenizer", |
| "output_dir": "/nfsd/voce/machine_learning/experiments/cb-pdmx-bm", |
| "model_architecture": "microsoft/codebert-base", |
| "random_init": false, |
| "hidden_size": 768, |
| "num_hidden_layers": 12, |
| "num_attention_heads": 12, |
| "intermediate_size": 3072, |
| "max_position_embeddings": 514, |
| "max_length": 512, |
| "mlm_probability": 0.15, |
| "per_device_train_batch_size": 72, |
| "per_device_eval_batch_size": 72, |
| "num_train_epochs": 10, |
| "learning_rate": 0.0002, |
| "lr_scheduler_type": "cosine", |
| "max_grad_norm": 1.0, |
| "weight_decay": 0.01, |
| "warmup_ratio": 0.1, |
| "max_steps": -1, |
| "logging_steps": 50, |
| "eval_steps": 1000, |
| "save_steps": 1000, |
| "seed": 42, |
| "pretokenized_shards_dir": "/nfsd/voce/machine_learning/experiments/codebert/baroque-music-shards/mlm", |
| "resume_from_checkpoint": null, |
| "dataloader_num_workers": 8, |
| "early_stopping": true, |
| "early_stopping_patience": 5, |
| "early_stopping_threshold": 0.0, |
| "bf16": true, |
| "gradient_accumulation_steps": 2, |
| "optim": "adamw_torch_fused", |
| "dataloader_pin_memory": true, |
| "dataloader_prefetch_factor": 4, |
| "save_total_limit": 3, |
| "torch_compile": false, |
| "ddp_find_unused_parameters": false, |
| "wandb_enabled": true, |
| "wandb_project": "lilybert", |
| "wandb_entity": null, |
| "wandb_mode": "online", |
| "wandb_run_name": "cb-pdmx-baroquemusic", |
| "tensorboard_enabled": true, |
| "tensorboard_log_dir": "/nfsd/voce/machine_learning/experiments/cb-pdmx-bm/tensorboard" |
| } |