papebaba commited on
Commit
e620ebc
·
verified ·
1 Parent(s): 72c34a1

Upload train_qwen_codeforces.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. train_qwen_codeforces.py +75 -0
train_qwen_codeforces.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # dependencies = ["trl>=0.12.0", "peft>=0.7.0", "trackio>=0.1.0", "datasets>=2.0.0"]
3
+ # ///
4
+
5
+ from datasets import load_dataset
6
+ from peft import LoraConfig
7
+ from trl import SFTTrainer, SFTConfig
8
+ import trackio
9
+ import os
10
+
11
+ # Load dataset - 1000 examples for ~20 min training
12
+ dataset = load_dataset(
13
+ "open-r1/codeforces-cots",
14
+ "solutions_w_editorials_py_decontaminated",
15
+ split="train[:1000]"
16
+ )
17
+
18
+ print(f"📊 Training on {len(dataset)} examples for 3 epochs")
19
+
20
+ # Get username for hub model id
21
+ username = os.environ.get("HF_USERNAME", "papebaba")
22
+
23
+ # Configure LoRA for efficient training on T4 small
24
+ peft_config = LoraConfig(
25
+ r=8,
26
+ lora_alpha=16,
27
+ lora_dropout=0.05,
28
+ bias="none",
29
+ task_type="CAUSAL_LM",
30
+ target_modules=["q_proj", "k_proj", "v_proj", "o_proj"]
31
+ )
32
+
33
+ # Configure trainer - optimized for T4 small
34
+ trainer = SFTTrainer(
35
+ model="Qwen/Qwen2.5-0.5B",
36
+ train_dataset=dataset,
37
+ # No eval dataset to save ~40% memory on T4 small
38
+ peft_config=peft_config,
39
+ args=SFTConfig(
40
+ output_dir="qwen-codeforces-finetuned",
41
+ num_train_epochs=3,
42
+ per_device_train_batch_size=1,
43
+ gradient_accumulation_steps=8, # Effective batch size = 8
44
+ gradient_checkpointing=True,
45
+ learning_rate=2e-4,
46
+ lr_scheduler_type="cosine",
47
+ warmup_ratio=0.1,
48
+ logging_steps=10,
49
+ save_strategy="epoch",
50
+ save_total_limit=1,
51
+ # Hub configuration
52
+ push_to_hub=True,
53
+ hub_model_id=f"{username}/qwen-codeforces-finetuned",
54
+ hub_strategy="end",
55
+ hub_private_repo=False,
56
+ # Trackio monitoring
57
+ report_to="trackio",
58
+ run_name="qwen-codeforces-sft-1k",
59
+ # Optimization for T4 small
60
+ bf16=True,
61
+ max_grad_norm=1.0,
62
+ optim="adamw_torch",
63
+ max_seq_length=512,
64
+ )
65
+ )
66
+
67
+ # Train the model
68
+ print("🚀 Starting training on T4 small...")
69
+ trainer.train()
70
+
71
+ # Final push to hub
72
+ print("📤 Pushing final model to Hub...")
73
+ trainer.push_to_hub()
74
+
75
+ print("✅ Training complete!")