# /// script # dependencies = ["trl>=0.12.0", "peft>=0.7.0", "trackio", "transformers>=4.37.0", "datasets", "torch"] # /// from datasets import load_dataset from peft import LoraConfig from trl import SFTTrainer, SFTConfig import trackio print("Loading dataset: open-r1/codeforces-cots...") dataset = load_dataset("open-r1/codeforces-cots", "solutions", split="train") # Take a subset for quick training (t4-small is memory-constrained) print(f"Original dataset size: {len(dataset)}") dataset = dataset.select(range(min(500, len(dataset)))) print(f"Using subset: {len(dataset)} examples") # Create small eval split for monitoring dataset_split = dataset.train_test_split(test_size=0.1, seed=42) print(f"Train: {len(dataset_split['train'])}, Eval: {len(dataset_split['test'])}") # Configure LoRA for efficient training lora_config = LoraConfig( r=16, lora_alpha=32, lora_dropout=0.05, target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], bias="none", task_type="CAUSAL_LM" ) print("Initializing trainer...") trainer = SFTTrainer( model="Qwen/Qwen2.5-0.5B-Instruct", train_dataset=dataset_split["train"], eval_dataset=dataset_split["test"], peft_config=lora_config, args=SFTConfig( output_dir="sr-test-qwen-codeforces-ft", # Training hyperparameters optimized for t4-small num_train_epochs=1, per_device_train_batch_size=1, per_device_eval_batch_size=1, gradient_accumulation_steps=8, # Effective batch size = 8 gradient_checkpointing=True, # Learning rate learning_rate=2e-4, warmup_ratio=0.03, lr_scheduler_type="cosine", # Logging and evaluation logging_steps=10, eval_strategy="steps", eval_steps=50, save_strategy="steps", save_steps=100, save_total_limit=2, # Memory optimization optim="adamw_torch", bf16=True, # Use bf16 if supported, else will fall back to fp32 # Hub configuration push_to_hub=True, hub_model_id="nishant-research/sr-test-qwen-codeforces-ft", hub_strategy="every_save", hub_private_repo=False, # Trackio monitoring report_to="trackio", project="qwen-codeforces-training", run_name="qwen2.5-0.5b-codeforces-ft-test", ) ) print("Starting training...") trainer.train() print("Training complete! Pushing final model to Hub...") trainer.push_to_hub() print("✅ Training job completed successfully!") print(f"Model saved to: https://huggingface.co/nishant-research/sr-test-qwen-codeforces-ft")