File size: 2,923 Bytes
dececb8 218bb54 78d2009 dececb8 78d2009 218bb54 dececb8 218bb54 dececb8 218bb54 dececb8 218bb54 dececb8 218bb54 dececb8 218bb54 dececb8 218bb54 dececb8 218bb54 dececb8 218bb54 dececb8 218bb54 78d2009 218bb54 dececb8 78d2009 dececb8 218bb54 dececb8 218bb54 dececb8 218bb54 dececb8 218bb54 dececb8 218bb54 dececb8 218bb54 dececb8 218bb54 dececb8 78d2009 218bb54 dececb8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 | #!/usr/bin/env python3
# /// script
# dependencies = [
# "trl>=0.12.0",
# "peft>=0.7.0",
# "transformers>=4.36.0",
# "accelerate>=0.24.0",
# "trackio",
# ]
# ///
"""
SFT training script for Qwen/Qwen2.5-0.5B model.
This script demonstrates:
- Trackio integration for real-time monitoring
- LoRA/PEFT for efficient training
- Proper Hub saving configuration
- Train/eval split for monitoring progress
- Optimized training parameters for small model testing
"""
import trackio
from datasets import load_dataset
from peft import LoraConfig
from trl import SFTTrainer, SFTConfig
# Load dataset
print("π¦ Loading dataset...")
dataset = load_dataset("trl-lib/Capybara", split="train")
print(f"β
Dataset loaded: {len(dataset)} examples")
# Create train/eval split for monitoring
print("π Creating train/eval split...")
dataset_split = dataset.train_test_split(test_size=0.1, seed=42)
train_dataset = dataset_split["train"]
eval_dataset = dataset_split["test"]
print(f" Train: {len(train_dataset)} examples")
print(f" Eval: {len(eval_dataset)} examples")
# Training configuration
print("βοΈ Configuring training parameters...")
config = SFTConfig(
# CRITICAL: Hub settings - Save model to Hugging Face Hub
output_dir="qwen-0.5b-sft-capybara",
push_to_hub=True,
hub_model_id="vgtomahawk/qwen-0.5b-sft-capybara",
hub_strategy="every_save", # Push checkpoints to Hub
# Training parameters
num_train_epochs=3,
per_device_train_batch_size=4,
gradient_accumulation_steps=4, # Effective batch size = 4 * 4 = 16
learning_rate=2e-5,
# Logging & checkpointing
logging_steps=10,
save_strategy="steps",
save_steps=100,
save_total_limit=2, # Keep only last 2 checkpoints
# Evaluation
eval_strategy="steps",
eval_steps=100,
# Optimization
warmup_ratio=0.1,
lr_scheduler_type="cosine",
# Monitoring with Trackio
report_to="trackio",
project="qwen-sft-demo",
run_name="qwen-0.5b-baseline",
)
# LoRA configuration for efficient training
print("π§ Setting up LoRA configuration...")
peft_config = LoraConfig(
r=16,
lora_alpha=32,
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM",
target_modules=["q_proj", "v_proj"],
)
# Initialize trainer
print("π― Initializing SFT trainer...")
trainer = SFTTrainer(
model="Qwen/Qwen2.5-0.5B",
train_dataset=train_dataset,
eval_dataset=eval_dataset,
args=config,
peft_config=peft_config,
)
# Start training
print("π Starting training...")
print("=" * 60)
trainer.train()
# Push final model to Hub
print("=" * 60)
print("πΎ Pushing final model to Hub...")
trainer.push_to_hub()
# Complete
print("β
Training complete!")
print(f"π Model available at: https://huggingface.co/vgtomahawk/qwen-0.5b-sft-capybara")
print(f"π View training metrics at: https://huggingface.co/spaces/vgtomahawk/trackio")
|