benchmarks: batch_size: '32' targets: - scope dataset: directory: ./tmp/data/mdcath/hf_dataset/mdcath_split/ filter_on_column: replica: - '0' - '1' - '2' - '3' - '4' temperature: - '320' metadata: adapter_name: ProtProfileMD_LoRA inference: false log_path: ./tmp/runs/ProtProfileMD_20251113_072928_large_batch_size_again/logs name: ProtProfileMD run_id: ProtProfileMD_20251113_072928_large_batch_size_again version: 1.0 weights: null model: base_model: Rostlab/ProstT5 base_model_kwargs: output_loading_info: false use_safetensors: true loss_function: kldiv loss_function_kwargs: reduction: batchmean profile_head: linear profile_head_kwargs: dropout: 0.1 hidden_size: 1024 num_classes: 20 tokenizer: tokenizer_model: Rostlab/ProstT5 tokenizer_model_kwargs: do_lower_case: false legacy: false use_fast: true training: data_collator: pad_to_multiple_of: 8 padding: true ddp: true devices: 0,1,2,3 lora: bias: none inference_mode: false lora_alpha: 16 lora_dropout: 0.05 modules_to_save: - profile_head r: 8 target_modules: - q - v use_dora: false use_rslora: false quantize: null save_dir: ./tmp/runs/ProtProfileMD_20251113_072928_large_batch_size_again/model training_args: batch_eval_metrics: false eval_on_start: true eval_steps: 32 eval_strategy: steps gradient_accumulation_steps: 10 label_names: - profiles learning_rate: 0.001 logging_steps: 1 logging_strategy: steps lr_scheduler_type: cosine num_train_epochs: 6 output_dir: ./tmp/runs/ProtProfileMD_20251113_072928_large_batch_size_again/training per_device_eval_batch_size: 32 per_device_train_batch_size: 5 remove_unused_columns: true report_to: wandb save_steps: 300 save_strategy: steps save_total_limit: 16 seed: 42 warmup_steps: 200 wandb: project: protprofilemd