# config.yaml

# General settings for the training run
general:
  output_dir: "/leonardo_scratch/large/userexternal/apetruzz/ale_priv/SpecialIssue/results/v3/user_gemma_3_1b_it" # Directory to save the final model adapters

# Model configuration
model:
  name: "/leonardo_scratch/large/userexternal/apetruzz/ale_priv/base_models/gemma-3-1b-it" # Base model from Hugging Face Hub
  max_seq_length: 2048 # Maximum sequence length for the tokenizer and model
  trust_remote_code: true
  chat_template_file: "/leonardo_work/IscrC_SYMBREC/ale/UserSimTraining/data/chat_template.jinja"

# Dataset configuration
dataset:
  name: "/leonardo_work/IscrC_SYMBREC/ale/UserSimTraining/data/all_processed_prompts_new.jsonl" # Dataset from Hugging Face Hub or local path
  text_field: "prompt" # The name of the column in the dataset that contains the text

# PEFT (LoRA) configuration
peft_config:
  lora_alpha: 32
  lora_dropout: 0.1
  r: 128
  bias: "none"
  task_type: "CAUSAL_LM"
  target_modules: "all-linear"

# SFTTrainer-specific arguments
trainer_args:
  packing: false

# Logging configuration
logging:
  use_wandb: false # Set to true to enable Weights & Biases logging

# Hugging Face TrainingArguments
# See https://huggingface.co/docs/transformers/main_classes/trainer#transformers.TrainingArguments
training_args:
  num_train_epochs: 5
  per_device_train_batch_size: 8
  gradient_accumulation_steps: 1
  optim: "adamw_torch"
  logging_steps: 25
  learning_rate: 0.0002 # 2e-4
  weight_decay: 0.001
  fp16: false
  bf16: true
  max_grad_norm: 1.0
  max_steps: -1
  warmup_ratio: 0.05
  lr_scheduler_type: "constant"
  #evaluation_strategy: "epoch"
  save_strategy: "epoch"