benchmarks:
  batch_size: '32'
  targets:
  - scope
dataset:
  directory: ./tmp/data/mdcath/hf_dataset/mdcath_split/
  filter_on_column:
    replica:
    - '0'
    - '1'
    - '2'
    - '3'
    - '4'
    temperature:
    - '320'
metadata:
  adapter_name: ProtProfileMD_LoRA
  inference: false
  log_path: ./tmp/runs/ProtProfileMD_20251113_072928_large_batch_size_again/logs
  name: ProtProfileMD
  run_id: ProtProfileMD_20251113_072928_large_batch_size_again
  version: 1.0
  weights: null
model:
  base_model: Rostlab/ProstT5
  base_model_kwargs:
    output_loading_info: false
    use_safetensors: true
  loss_function: kldiv
  loss_function_kwargs:
    reduction: batchmean
  profile_head: linear
  profile_head_kwargs:
    dropout: 0.1
    hidden_size: 1024
    num_classes: 20
tokenizer:
  tokenizer_model: Rostlab/ProstT5
  tokenizer_model_kwargs:
    do_lower_case: false
    legacy: false
    use_fast: true
training:
  data_collator:
    pad_to_multiple_of: 8
    padding: true
  ddp: true
  devices: 0,1,2,3
  lora:
    bias: none
    inference_mode: false
    lora_alpha: 16
    lora_dropout: 0.05
    modules_to_save:
    - profile_head
    r: 8
    target_modules:
    - q
    - v
    use_dora: false
    use_rslora: false
  quantize: null
  save_dir: ./tmp/runs/ProtProfileMD_20251113_072928_large_batch_size_again/model
  training_args:
    batch_eval_metrics: false
    eval_on_start: true
    eval_steps: 32
    eval_strategy: steps
    gradient_accumulation_steps: 10
    label_names:
    - profiles
    learning_rate: 0.001
    logging_steps: 1
    logging_strategy: steps
    lr_scheduler_type: cosine
    num_train_epochs: 6
    output_dir: ./tmp/runs/ProtProfileMD_20251113_072928_large_batch_size_again/training
    per_device_eval_batch_size: 32
    per_device_train_batch_size: 5
    remove_unused_columns: true
    report_to: wandb
    save_steps: 300
    save_strategy: steps
    save_total_limit: 16
    seed: 42
    warmup_steps: 200
wandb:
  project: protprofilemd