training-scripts / train_qwen3_codeforces.py
albertlieadrian's picture
Upload train_qwen3_codeforces.py with huggingface_hub
ed82e98 verified
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "trl>=0.12.0",
# "peft>=0.7.0",
# "transformers>=4.36.0",
# "accelerate>=0.24.0",
# "trackio",
# ]
# ///
import trackio
from datasets import load_dataset
from peft import LoraConfig
from trl import SFTTrainer, SFTConfig
# Load dataset - use only 20 examples for quick demo
print("πŸ“¦ Loading dataset...")
full_dataset = load_dataset("open-r1/codeforces-cots", split="train")
# Take only first 20 examples for quick demo
dataset = full_dataset.select(range(20))
print(f"βœ… Dataset loaded: {len(dataset)} examples")
# Format the dataset - convert messages to text format for SFT
def format_for_sft(example):
"""Convert messages to a single text format for training."""
messages = example.get("messages", [])
text = ""
for msg in messages:
role = msg.get("role", "unknown")
content = msg.get("content", "")
if role == "system":
text += f"System: {content}\n\n"
elif role == "user":
text += f"User: {content}\n\n"
elif role == "assistant":
text += f"Assistant: {content}\n\n"
return {"text": text.strip()}
print("πŸ”„ Formatting dataset...")
dataset = dataset.map(format_for_sft, remove_columns=dataset.column_names)
print(f" Formatted to text: {dataset[0]['text'][:200]}...")
# Training configuration
config = SFTConfig(
# CRITICAL: Hub settings
output_dir="qwen3-0.6b-codeforces-sft",
push_to_hub=True,
hub_model_id="albertlieadrian/qwen3-0.6b-codeforces-sft",
hub_strategy="every_save",
# Training parameters - optimized for small dataset demo
num_train_epochs=3,
per_device_train_batch_size=2,
gradient_accumulation_steps=4,
learning_rate=2e-5,
max_length=2048, # Code problems need longer context
# Logging & checkpointing
logging_steps=5,
save_strategy="no", # Skip saving for quick demo
save_total_limit=0,
# Optimization
warmup_ratio=0.1,
lr_scheduler_type="cosine",
# Monitoring
report_to="trackio",
project="qwen3-codeforces-demo",
run_name="20-examples-demo",
)
# LoRA configuration - efficient for 0.6B model
peft_config = LoraConfig(
r=16,
lora_alpha=32,
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM",
target_modules=["q_proj", "v_proj"],
)
# Initialize and train
print("🎯 Initializing trainer...")
trainer = SFTTrainer(
model="Qwen/Qwen3-0.6B",
train_dataset=dataset,
formatting_func=lambda x: x["text"],
args=config,
peft_config=peft_config,
)
print("πŸš€ Starting training...")
trainer.train()
print("πŸ’Ύ Pushing to Hub...")
trainer.push_to_hub()
# Finish Trackio tracking
trackio.finish()
print("βœ… Complete! Model at: https://huggingface.co/albertlieadrian/qwen3-0.6b-codeforces-sft")