# /// script # dependencies = ["trl>=0.12.0", "peft>=0.7.0", "trackio", "transformers", "datasets", "accelerate", "torch"] # /// from datasets import load_dataset from peft import LoraConfig from trl import SFTTrainer, SFTConfig # Load the Codeforces CoTs dataset (decontaminated version) print("Loading dataset...") dataset = load_dataset("open-r1/codeforces-cots", "solutions_py_decontaminated", split="train") print(f"Dataset size: {len(dataset)} examples") print(f"Columns: {dataset.column_names}") # Check first example to understand structure print(f"First example keys: {dataset[0].keys()}") if "messages" in dataset.column_names: print(f"Messages sample: {dataset[0]['messages'][:2] if len(dataset[0]['messages']) > 1 else dataset[0]['messages']}") # Create train/eval split dataset_split = dataset.train_test_split(test_size=0.05, seed=42) print(f"Train: {len(dataset_split['train'])}, Eval: {len(dataset_split['test'])}") # LoRA config for efficient fine-tuning peft_config = LoraConfig( r=16, lora_alpha=32, lora_dropout=0.05, target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], bias="none", task_type="CAUSAL_LM", ) # Training config - using "messages" column for chat format training_args = SFTConfig( output_dir="qwen3-0.6b-codeforces-sft", push_to_hub=True, hub_model_id="luiscosio/qwen3-0.6b-codeforces-sft", hub_strategy="every_save", num_train_epochs=3, per_device_train_batch_size=2, gradient_accumulation_steps=8, gradient_checkpointing=True, learning_rate=2e-4, lr_scheduler_type="cosine", warmup_ratio=0.1, eval_strategy="steps", eval_steps=200, save_strategy="steps", save_steps=200, save_total_limit=3, logging_steps=10, report_to="trackio", run_name="qwen3-0.6b-codeforces-sft", bf16=True, optim="adamw_torch_fused", max_grad_norm=1.0, max_length=2048, dataset_text_field=None, # Use messages format ) # Initialize trainer print("Initializing trainer...") trainer = SFTTrainer( model="Qwen/Qwen3-0.6B", train_dataset=dataset_split["train"], eval_dataset=dataset_split["test"], peft_config=peft_config, args=training_args, ) # Train print("Starting training...") trainer.train() # Push final model to Hub print("Pushing to Hub...") trainer.push_to_hub() print("Training complete!")