trainer: "any-order-flow"
dataset: "safe-drugs"

# HuggingFace dataset configuration
hf_dataset:
  name: "datamol-io/safe-gpt"
  smiles_column: "smiles"  # Adjust based on actual column name in the dataset

model:
  hidden_size: 768
  n_heads: 12
  cond_dim: 128
  dropout: 0.05
  n_blocks: 12
  torch_dtype: 'float32'  # Options: 'float32', 'float16', 'bfloat16'

interpolant:
  type: "any-order"
  tokens: null # filled in automatically
  pad_token: null # filled in automatically
  mask_token: null # filled in automatically
  max_length: 256
  insert_schedule:
    type: "linear"
  unmask_schedule:
    type: "linear"

training:
  only_embed_insert: true
  batch_size: 2048
  per_gpu_batch_size: 64 # Gradient accumulation happens automatically
  cpus: 4
  learning_rate: 3e-4
  nodes: 1
  devices: 2
  max_steps: 500000
  weight_decay: 0.03
  checkpoint_dir: "checkpoints/pretrain_mol"
  save_top_k: 3
  save_every_n_steps: 1000  # Save checkpoint every 1k steps (for streaming datasets)
  # save_every_n_epochs: 1  # Not used with streaming datasets
  loss_fn:
    unmask: "elbo"
    insert: "expectation"
  reset_lr: false
  warmup_steps: 2000
  ema_decay: 0.9999
  filter_max_length: false
  
wandb:
  entity: null  # set to your W&B entity, or leave null to use the default
  project: "a2d2-mol"
  name: "a2d2-mol"
  path: "./wandb"