File size: 2,844 Bytes
2d67f40 66abccc 2d67f40 ed82e98 2d67f40 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 | # /// script
# requires-python = ">=3.10"
# dependencies = [
# "trl>=0.12.0",
# "peft>=0.7.0",
# "transformers>=4.36.0",
# "accelerate>=0.24.0",
# "trackio",
# ]
# ///
import trackio
from datasets import load_dataset
from peft import LoraConfig
from trl import SFTTrainer, SFTConfig
# Load dataset - use only 20 examples for quick demo
print("๐ฆ Loading dataset...")
full_dataset = load_dataset("open-r1/codeforces-cots", split="train")
# Take only first 20 examples for quick demo
dataset = full_dataset.select(range(20))
print(f"โ
Dataset loaded: {len(dataset)} examples")
# Format the dataset - convert messages to text format for SFT
def format_for_sft(example):
"""Convert messages to a single text format for training."""
messages = example.get("messages", [])
text = ""
for msg in messages:
role = msg.get("role", "unknown")
content = msg.get("content", "")
if role == "system":
text += f"System: {content}\n\n"
elif role == "user":
text += f"User: {content}\n\n"
elif role == "assistant":
text += f"Assistant: {content}\n\n"
return {"text": text.strip()}
print("๐ Formatting dataset...")
dataset = dataset.map(format_for_sft, remove_columns=dataset.column_names)
print(f" Formatted to text: {dataset[0]['text'][:200]}...")
# Training configuration
config = SFTConfig(
# CRITICAL: Hub settings
output_dir="qwen3-0.6b-codeforces-sft",
push_to_hub=True,
hub_model_id="albertlieadrian/qwen3-0.6b-codeforces-sft",
hub_strategy="every_save",
# Training parameters - optimized for small dataset demo
num_train_epochs=3,
per_device_train_batch_size=2,
gradient_accumulation_steps=4,
learning_rate=2e-5,
max_length=2048, # Code problems need longer context
# Logging & checkpointing
logging_steps=5,
save_strategy="no", # Skip saving for quick demo
save_total_limit=0,
# Optimization
warmup_ratio=0.1,
lr_scheduler_type="cosine",
# Monitoring
report_to="trackio",
project="qwen3-codeforces-demo",
run_name="20-examples-demo",
)
# LoRA configuration - efficient for 0.6B model
peft_config = LoraConfig(
r=16,
lora_alpha=32,
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM",
target_modules=["q_proj", "v_proj"],
)
# Initialize and train
print("๐ฏ Initializing trainer...")
trainer = SFTTrainer(
model="Qwen/Qwen3-0.6B",
train_dataset=dataset,
formatting_func=lambda x: x["text"],
args=config,
peft_config=peft_config,
)
print("๐ Starting training...")
trainer.train()
print("๐พ Pushing to Hub...")
trainer.push_to_hub()
# Finish Trackio tracking
trackio.finish()
print("โ
Complete! Model at: https://huggingface.co/albertlieadrian/qwen3-0.6b-codeforces-sft") |