# /// script # requires-python = ">=3.10" # dependencies = [ # "trl>=0.12.0", # "peft>=0.7.0", # "transformers>=4.36.0", # "accelerate>=0.24.0", # "trackio", # ] # /// import trackio from datasets import load_dataset from peft import LoraConfig from trl import SFTTrainer, SFTConfig # Load dataset - use only 20 examples for quick demo print("📦 Loading dataset...") full_dataset = load_dataset("open-r1/codeforces-cots", split="train") # Take only first 20 examples for quick demo dataset = full_dataset.select(range(20)) print(f"✅ Dataset loaded: {len(dataset)} examples") # Format the dataset - convert messages to text format for SFT def format_for_sft(example): """Convert messages to a single text format for training.""" messages = example.get("messages", []) text = "" for msg in messages: role = msg.get("role", "unknown") content = msg.get("content", "") if role == "system": text += f"System: {content}\n\n" elif role == "user": text += f"User: {content}\n\n" elif role == "assistant": text += f"Assistant: {content}\n\n" return {"text": text.strip()} print("🔄 Formatting dataset...") dataset = dataset.map(format_for_sft, remove_columns=dataset.column_names) print(f" Formatted to text: {dataset[0]['text'][:200]}...") # Training configuration config = SFTConfig( # CRITICAL: Hub settings output_dir="qwen3-0.6b-codeforces-sft", push_to_hub=True, hub_model_id="albertlieadrian/qwen3-0.6b-codeforces-sft", hub_strategy="every_save", # Training parameters - optimized for small dataset demo num_train_epochs=3, per_device_train_batch_size=2, gradient_accumulation_steps=4, learning_rate=2e-5, max_length=2048, # Code problems need longer context # Logging & checkpointing logging_steps=5, save_strategy="no", # Skip saving for quick demo save_total_limit=0, # Optimization warmup_ratio=0.1, lr_scheduler_type="cosine", # Monitoring report_to="trackio", project="qwen3-codeforces-demo", run_name="20-examples-demo", ) # LoRA configuration - efficient for 0.6B model peft_config = LoraConfig( r=16, lora_alpha=32, lora_dropout=0.05, bias="none", task_type="CAUSAL_LM", target_modules=["q_proj", "v_proj"], ) # Initialize and train print("🎯 Initializing trainer...") trainer = SFTTrainer( model="Qwen/Qwen3-0.6B", train_dataset=dataset, formatting_func=lambda x: x["text"], args=config, peft_config=peft_config, ) print("🚀 Starting training...") trainer.train() print("💾 Pushing to Hub...") trainer.push_to_hub() # Finish Trackio tracking trackio.finish() print("✅ Complete! Model at: https://huggingface.co/albertlieadrian/qwen3-0.6b-codeforces-sft")