kneeraj commited on
Commit
c91de98
·
verified ·
1 Parent(s): 175d640

Upload train_qwen3_codeforces.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. train_qwen3_codeforces.py +95 -0
train_qwen3_codeforces.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # dependencies = ["trl>=0.12.0", "peft>=0.7.0", "trackio", "transformers>=4.44.0", "datasets"]
3
+ # ///
4
+
5
+ from datasets import load_dataset
6
+ from peft import LoraConfig
7
+ from trl import SFTTrainer, SFTConfig
8
+ import trackio
9
+
10
+ # Load dataset with editorials for better instruction following
11
+ dataset = load_dataset(
12
+ "open-r1/codeforces-cots",
13
+ name="solutions_w_editorials_decontaminated",
14
+ split="train"
15
+ )
16
+
17
+ # Create train/eval split (90/10)
18
+ dataset_split = dataset.train_test_split(test_size=0.1, seed=42)
19
+
20
+ # LoRA configuration for efficient fine-tuning
21
+ peft_config = LoraConfig(
22
+ r=16,
23
+ lora_alpha=32,
24
+ lora_dropout=0.05,
25
+ target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
26
+ task_type="CAUSAL_LM"
27
+ )
28
+
29
+ # SFT Training configuration
30
+ training_args = SFTConfig(
31
+ output_dir="qwen3-0.6b-codeforces-instruct",
32
+
33
+ # Training hyperparameters
34
+ num_train_epochs=3,
35
+ per_device_train_batch_size=4,
36
+ per_device_eval_batch_size=4,
37
+ gradient_accumulation_steps=4, # Effective batch size: 16
38
+ gradient_checkpointing=True,
39
+
40
+ # Learning rate and optimization
41
+ learning_rate=2e-4,
42
+ lr_scheduler_type="cosine",
43
+ warmup_ratio=0.1,
44
+ optim="paged_adamw_8bit",
45
+
46
+ # Evaluation and logging
47
+ eval_strategy="steps",
48
+ eval_steps=100,
49
+ logging_steps=10,
50
+ save_strategy="steps",
51
+ save_steps=200,
52
+ save_total_limit=3,
53
+
54
+ # Hub integration (CRITICAL - saves model to Hub)
55
+ push_to_hub=True,
56
+ hub_model_id="kneeraj/qwen3-0.6b-codeforces-instruct",
57
+ hub_strategy="every_save",
58
+ hub_private_repo=False,
59
+
60
+ # Trackio monitoring
61
+ report_to="trackio",
62
+ project="codeforces-finetuning",
63
+ run_name="qwen3-0.6b-codeforces-sft",
64
+
65
+ # Performance optimizations
66
+ bf16=True,
67
+ max_grad_norm=1.0,
68
+
69
+ # Data processing
70
+ max_seq_length=2048, # CodeForces problems can be lengthy
71
+ dataset_text_field="messages", # Use chat format
72
+ packing=False, # Don't pack for instruction following
73
+ )
74
+
75
+ # Initialize trainer
76
+ trainer = SFTTrainer(
77
+ model="Qwen/Qwen2.5-0.5B-Instruct", # Using Qwen2.5-0.5B as base (Qwen3-0.6B may not be available)
78
+ train_dataset=dataset_split["train"],
79
+ eval_dataset=dataset_split["test"],
80
+ peft_config=peft_config,
81
+ args=training_args,
82
+ )
83
+
84
+ print("Starting training...")
85
+ print(f"Training samples: {len(dataset_split['train'])}")
86
+ print(f"Evaluation samples: {len(dataset_split['test'])}")
87
+
88
+ # Train the model
89
+ trainer.train()
90
+
91
+ # Final push to Hub
92
+ print("Pushing final model to Hub...")
93
+ trainer.push_to_hub()
94
+
95
+ print("Training complete! Model saved to: kneeraj/qwen3-0.6b-codeforces-instruct")