File size: 1,024 Bytes
daae87b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 | # train_layerwise.yml — M3: Layerwise attention + mean pooling, no centering
project_name: "deep-stylometry"
data:
ds_name: "halvest"
batch_size: 64
tokenizer_name: "answerdotai/ModernBERT-base"
max_length: 512
padding: "do_not_pad"
truncation: "longest_first"
add_special_tokens: true
map_batch_size: 1000
load_from_cache_file: true
subsets: ["base-2", "base-4", "base-6", "base-8", "base-10"]
shuffle: true
model:
base_checkpoint: "answerdotai/ModernBERT-base"
dropout: 0.1
expansion_ratio: 4
pooling_method: "layerwise"
skip_list: false
mean_center: false
train:
loss: "info_nce"
tau: 0.5
margin: 0.32
lr: 3.0e-5
weight_decay: 0.1
device: "gpu"
num_devices: 4
strategy: "ddp_find_unused_parameters_true"
process_group_backend: "nccl"
max_epochs: 1
val_check_interval: null
check_val_every_n_epoch: null
log_every_n_steps: 1
accumulate_grad_batches: 1
gradient_clip_val: null
precision: "16-mixed"
use_wandb: true
log_model: false
watch: "all"
|