RonniRodriguez's picture
Initial commit of YOFO Safety Evaluator
2b259aa
"""
YOFO Training Script.
This script fine-tunes a language model using the YOFO method.
It uses LoRA for efficient training on consumer GPUs.
Key features:
- Loads mapped YOFO data
- Uses YOFOTemplateBuilder for correct tokenization
- Trains with L_answer loss (focusing only on the 12 safety bits)
- Saves the LoRA adapter
"""
import os
import json
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
TrainingArguments,
Trainer,
DataCollatorForTokenClassification
)
from peft import LoraConfig, get_peft_model, TaskType
from tqdm import tqdm
import sys
# Add src to path
sys.path.append(os.getcwd())
from src.data.template import YOFOTemplateBuilder
class YOFODataset(Dataset):
def __init__(self, data_path, builder):
self.data = []
with open(data_path, 'r', encoding='utf-8') as f:
for line in f:
self.data.append(json.loads(line))
self.builder = builder
print(f"Loaded {len(self.data)} examples from {data_path}")
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
item = self.data[idx]
# Build the YOFO input
yofo_input = self.builder.build_template(
prompt=item['prompt'],
response=item['response'],
requirements=item['requirements']
)
# Return dict compatible with HuggingFace Trainer
return {
"input_ids": yofo_input.input_ids,
"attention_mask": yofo_input.attention_mask,
"labels": yofo_input.labels
}
def train():
# --- Configuration ---
# Using a small, efficient model for demonstration
# Qwen2.5-1.5B-Instruct is excellent and fits on Colab T4 or standard GPUs
# You can swap this for Qwen2-VL-2B if you specifically want the VLM from the paper
MODEL_ID = "Qwen/Qwen2.5-1.5B-Instruct"
OUTPUT_DIR = "models/yofo_lora"
BATCH_SIZE = 4 # Small batch size for consumer GPU
LEARNING_RATE = 2e-4
EPOCHS = 3
print(f"Initializing training with model: {MODEL_ID}")
# 1. Load Tokenizer & Builder
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
builder = YOFOTemplateBuilder(tokenizer)
# 2. Load Datasets
train_dataset = YOFODataset("data/processed/train_yofo.jsonl", builder)
val_dataset = YOFODataset("data/processed/val_yofo.jsonl", builder)
# 3. Load Model
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16,
device_map="auto",
trust_remote_code=True
)
# 4. Configure LoRA
peft_config = LoraConfig(
task_type=TaskType.CAUSAL_LM,
inference_mode=False,
r=16, # Rank
lora_alpha=32,
lora_dropout=0.05,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()
# 5. Setup Trainer
training_args = TrainingArguments(
output_dir=OUTPUT_DIR,
num_train_epochs=EPOCHS,
per_device_train_batch_size=BATCH_SIZE,
per_device_eval_batch_size=BATCH_SIZE,
gradient_accumulation_steps=4,
learning_rate=LEARNING_RATE,
weight_decay=0.01,
logging_steps=10,
evaluation_strategy="epoch",
save_strategy="epoch",
fp16=True, # Use mixed precision
report_to="none", # Disable wandb for simplicity
remove_unused_columns=False # Important for custom datasets
)
# We need a data collator that handles padding
# standard default_data_collator might not pad 'labels' correctly with -100
# DataCollatorForTokenClassification pads labels with -100 by default
data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=val_dataset,
data_collator=data_collator,
)
# 6. Train
print("\n🚀 Starting training...")
trainer.train()
# 7. Save
print(f"\n💾 Saving model to {OUTPUT_DIR}")
model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)
if __name__ == "__main__":
# Ensure directories exist
os.makedirs("models", exist_ok=True)
train()