run:
  run_dir: "./runs/instruct_run_24b"
  seed: 42

# WandB integration for experiment tracking
wandb:
  enabled: true  # Set to true to enable wandb logging
  project: "sft-training"  # WandB project name
  entity: null  # WandB entity/team (optional)
  name: null  # Run name (optional, will auto-generate if null)
  tags: ["sft-lora", "24b-Devstral"]  # List of tags for the run (e.g., ["lora", "qlora", "experiment-1"])
  notes: null  # Run description/notes (optional)

model:
  # Use local Qwen2.5-Coder-14B model
  repo_id: "./CPT/runs/cpt_run_v1/merged_24b_cpt_lora"
  revision: null

  # Used only when repo_id is a HF repo (not a local path)
  base_local_dir: "base_model"

  trust_remote_code: true
  tokenizer_use_fast: true
  device_map: "auto"

  torch_dtype: "bfloat16"  # "float16" | "bfloat16" | "float32"

  # QLoRA
  use_4bit: false
  bnb_4bit_quant_type: "nf4"
  bnb_4bit_use_double_quant: false
  bnb_4bit_compute_dtype: "bfloat16"

  # optional: "flash_attention_2" | "sdpa" | null
  attn_implementation: null

data:
  train_jsonl: "../sft_dataset.jsonl"
  eval_jsonl: null
  eval_split_ratio: 0.1
  
  # Field names in your JSONL data
  instruction_field: "instruction"  # This will be the system prompt
  input_field: "input"             # This is the task description
  output_field: "output"           # This is the analysis + selection
  
  # Formatting options
  format_type: "custom"  # "chatml" | "alpaca" | "custom"
  
  # For chatml format
  system_prompt: |
    You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.

    ## Output Format

    ##OUTPUT
    Explain the data flow and why each component must change:
    - Flow: [Input → Processing → Output with arrows]
    - For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"
    - Explain coupling between components

    ##SELECT
    modify::crates/path/to/file.rs::impl::ComponentName
    add::crates/another/file.rs::function::AnotherComponent
    <EOS>

    ## Rules

    1. Use full paths: `remove::crates/folder/file.rs::Type::Name`
    2. Use `::` for nested items: `status::StructName::Type::Name`
    3. Always explain "must change because" and "without this"
    3. Types of components: function, struct, enum, impl, trait
    4. If there is extra information (e.g., enum variants), include that too.
    5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>

    ## Example

    ##TASK
    Add webhook subscription support

    ##OUTPUT
    The webhook system routes events via EventClass enum. Flow: webhook → EventClass → handler → processing. The EventClass enum (crates/common_enums/src/enums.rs::EventClass) must add Subscriptions variant because it defines event routing—without this, subscription events cannot be processed. The SubscriptionStatus impl (crates/common_enums/src/transformers.rs::SubscriptionStatus) must map to EventType because it converts status to events—without this, status changes don't trigger webhooks. These are coupled: EventClass routes to handlers that use SubscriptionStatus mappings.

    ##SELECT
    crates/common_enums/src/enums.rs::EventClass
    crates/common_enums/src/transformers.rs::SubscriptionStatus
    <EOS>
  
  # For custom format (only used when format_type="custom")
  custom_template: "##INSTRUCTION\n{instruction}<|im_end|>\n##TASK\n{input}<|im_end|>\n##OUTPUT\n{output}<|im_end|>"
  
  max_length: 2048
  shuffle: true
  num_proc: 4

peft:
  enabled: true
  r: 8
  lora_alpha: 16
  lora_dropout: 0.05
  bias: "none"
  target_modules: "auto"

train:
  # max_steps: 10
  num_train_epochs: 6

  per_device_train_batch_size: 1
  per_device_eval_batch_size: 1
  gradient_accumulation_steps: 8

  learning_rate: 1e-4
  weight_decay: 0.0
  warmup_ratio: 0.08
  lr_scheduler_type: "cosine"

  optim: "adamw_torch"  # ✅ Changed from paged_adamw_8bit (requires use_4bit=true)
  max_grad_norm: 0.8
  gradient_checkpointing: true

  logging_steps: 2
  save_strategy: "steps"
  save_steps: 500
  save_total_limit: 20

  evaluation_strategy: "steps"
  eval_steps: 100
  load_best_model_at_end: true

  # Early stopping
  early_stopping:
    enabled: true
    patience: 3  # Number of evaluations with no improvement before stopping
    min_delta: 0.001  # Minimum change to qualify as improvement
    metric: "eval_loss"  # Metric to monitor
    mode: "min"  # "min" for loss, "max" for accuracy/etc.

  resume_from_checkpoint: "auto"

merge:
  enabled: true
  merged_dtype: "float16"
  max_shard_size: "2GB"
  output_dir: "./merged_24b_instruct_lora"