user_gemma_3_1b_it / training_config.yaml
planeB's picture
Add files using upload-large-folder tool
760b092 verified
# config.yaml
# General settings for the training run
general:
output_dir: "/leonardo_scratch/large/userexternal/apetruzz/ale_priv/SpecialIssue/results/v3/user_gemma_3_1b_it" # Directory to save the final model adapters
# Model configuration
model:
name: "/leonardo_scratch/large/userexternal/apetruzz/ale_priv/base_models/gemma-3-1b-it" # Base model from Hugging Face Hub
max_seq_length: 2048 # Maximum sequence length for the tokenizer and model
trust_remote_code: true
chat_template_file: "/leonardo_work/IscrC_SYMBREC/ale/UserSimTraining/data/chat_template.jinja"
# Dataset configuration
dataset:
name: "/leonardo_work/IscrC_SYMBREC/ale/UserSimTraining/data/all_processed_prompts_new.jsonl" # Dataset from Hugging Face Hub or local path
text_field: "prompt" # The name of the column in the dataset that contains the text
# PEFT (LoRA) configuration
peft_config:
lora_alpha: 32
lora_dropout: 0.1
r: 128
bias: "none"
task_type: "CAUSAL_LM"
target_modules: "all-linear"
# SFTTrainer-specific arguments
trainer_args:
packing: false
# Logging configuration
logging:
use_wandb: false # Set to true to enable Weights & Biases logging
# Hugging Face TrainingArguments
# See https://huggingface.co/docs/transformers/main_classes/trainer#transformers.TrainingArguments
training_args:
num_train_epochs: 5
per_device_train_batch_size: 8
gradient_accumulation_steps: 1
optim: "adamw_torch"
logging_steps: 25
learning_rate: 0.0002 # 2e-4
weight_decay: 0.001
fp16: false
bf16: true
max_grad_norm: 1.0
max_steps: -1
warmup_ratio: 0.05
lr_scheduler_type: "constant"
#evaluation_strategy: "epoch"
save_strategy: "epoch"