|
|
""" |
|
|
YOFO Training Script. |
|
|
|
|
|
This script fine-tunes a language model using the YOFO method. |
|
|
It uses LoRA for efficient training on consumer GPUs. |
|
|
|
|
|
Key features: |
|
|
- Loads mapped YOFO data |
|
|
- Uses YOFOTemplateBuilder for correct tokenization |
|
|
- Trains with L_answer loss (focusing only on the 12 safety bits) |
|
|
- Saves the LoRA adapter |
|
|
""" |
|
|
|
|
|
import os |
|
|
import json |
|
|
import torch |
|
|
from torch.utils.data import Dataset, DataLoader |
|
|
from transformers import ( |
|
|
AutoTokenizer, |
|
|
AutoModelForCausalLM, |
|
|
TrainingArguments, |
|
|
Trainer, |
|
|
DataCollatorForTokenClassification |
|
|
) |
|
|
from peft import LoraConfig, get_peft_model, TaskType |
|
|
from tqdm import tqdm |
|
|
import sys |
|
|
|
|
|
|
|
|
sys.path.append(os.getcwd()) |
|
|
from src.data.template import YOFOTemplateBuilder |
|
|
|
|
|
class YOFODataset(Dataset): |
|
|
def __init__(self, data_path, builder): |
|
|
self.data = [] |
|
|
with open(data_path, 'r', encoding='utf-8') as f: |
|
|
for line in f: |
|
|
self.data.append(json.loads(line)) |
|
|
self.builder = builder |
|
|
print(f"Loaded {len(self.data)} examples from {data_path}") |
|
|
|
|
|
def __len__(self): |
|
|
return len(self.data) |
|
|
|
|
|
def __getitem__(self, idx): |
|
|
item = self.data[idx] |
|
|
|
|
|
yofo_input = self.builder.build_template( |
|
|
prompt=item['prompt'], |
|
|
response=item['response'], |
|
|
requirements=item['requirements'] |
|
|
) |
|
|
|
|
|
|
|
|
return { |
|
|
"input_ids": yofo_input.input_ids, |
|
|
"attention_mask": yofo_input.attention_mask, |
|
|
"labels": yofo_input.labels |
|
|
} |
|
|
|
|
|
def train(): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
MODEL_ID = "Qwen/Qwen2.5-1.5B-Instruct" |
|
|
|
|
|
OUTPUT_DIR = "models/yofo_lora" |
|
|
BATCH_SIZE = 4 |
|
|
LEARNING_RATE = 2e-4 |
|
|
EPOCHS = 3 |
|
|
|
|
|
print(f"Initializing training with model: {MODEL_ID}") |
|
|
|
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) |
|
|
if tokenizer.pad_token is None: |
|
|
tokenizer.pad_token = tokenizer.eos_token |
|
|
|
|
|
builder = YOFOTemplateBuilder(tokenizer) |
|
|
|
|
|
|
|
|
train_dataset = YOFODataset("data/processed/train_yofo.jsonl", builder) |
|
|
val_dataset = YOFODataset("data/processed/val_yofo.jsonl", builder) |
|
|
|
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
MODEL_ID, |
|
|
torch_dtype=torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16, |
|
|
device_map="auto", |
|
|
trust_remote_code=True |
|
|
) |
|
|
|
|
|
|
|
|
peft_config = LoraConfig( |
|
|
task_type=TaskType.CAUSAL_LM, |
|
|
inference_mode=False, |
|
|
r=16, |
|
|
lora_alpha=32, |
|
|
lora_dropout=0.05, |
|
|
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"] |
|
|
) |
|
|
|
|
|
model = get_peft_model(model, peft_config) |
|
|
model.print_trainable_parameters() |
|
|
|
|
|
|
|
|
training_args = TrainingArguments( |
|
|
output_dir=OUTPUT_DIR, |
|
|
num_train_epochs=EPOCHS, |
|
|
per_device_train_batch_size=BATCH_SIZE, |
|
|
per_device_eval_batch_size=BATCH_SIZE, |
|
|
gradient_accumulation_steps=4, |
|
|
learning_rate=LEARNING_RATE, |
|
|
weight_decay=0.01, |
|
|
logging_steps=10, |
|
|
evaluation_strategy="epoch", |
|
|
save_strategy="epoch", |
|
|
fp16=True, |
|
|
report_to="none", |
|
|
remove_unused_columns=False |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer) |
|
|
|
|
|
trainer = Trainer( |
|
|
model=model, |
|
|
args=training_args, |
|
|
train_dataset=train_dataset, |
|
|
eval_dataset=val_dataset, |
|
|
data_collator=data_collator, |
|
|
) |
|
|
|
|
|
|
|
|
print("\n🚀 Starting training...") |
|
|
trainer.train() |
|
|
|
|
|
|
|
|
print(f"\n💾 Saving model to {OUTPUT_DIR}") |
|
|
model.save_pretrained(OUTPUT_DIR) |
|
|
tokenizer.save_pretrained(OUTPUT_DIR) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
os.makedirs("models", exist_ok=True) |
|
|
train() |
|
|
|
|
|
|