HarshitShri026 commited on
Commit
12acaa5
·
1 Parent(s): 973cd6f
Files changed (2) hide show
  1. train/colab_train.py +9 -0
  2. train/train_grpo.py +7 -0
train/colab_train.py CHANGED
@@ -15,6 +15,7 @@ Instructions for Colab:
15
 
16
  import collections
17
  import random
 
18
  from datasets import Dataset
19
  import torch
20
  import numpy as np
@@ -37,6 +38,11 @@ env = AutomathreasonerEnv(url=HF_SPACE_URL)
37
  max_seq_length = 1024 # Fits well within Colab T4 16GB VRAM limit
38
  lora_rank = 16
39
 
 
 
 
 
 
40
  # 2. Load Model via Unsloth (optimized for Free Colab VRAM)
41
  print("Loading model via Unsloth...")
42
  model, tokenizer = FastLanguageModel.from_pretrained(
@@ -191,6 +197,9 @@ training_args = GRPOConfig(
191
 
192
  # 8-bit optimizer saves VRAM
193
  optim="adamw_8bit",
 
 
 
194
  )
195
 
196
  trainer = GRPOTrainer(
 
15
 
16
  import collections
17
  import random
18
+ import unsloth # Must be imported before trl/transformers/peft for patching.
19
  from datasets import Dataset
20
  import torch
21
  import numpy as np
 
38
  max_seq_length = 1024 # Fits well within Colab T4 16GB VRAM limit
39
  lora_rank = 16
40
 
41
+ # T4 (and many non-Ampere GPUs) do not support bf16; pick precision dynamically.
42
+ has_cuda = torch.cuda.is_available()
43
+ use_bf16 = has_cuda and torch.cuda.is_bf16_supported()
44
+ use_fp16 = has_cuda and not use_bf16
45
+
46
  # 2. Load Model via Unsloth (optimized for Free Colab VRAM)
47
  print("Loading model via Unsloth...")
48
  model, tokenizer = FastLanguageModel.from_pretrained(
 
197
 
198
  # 8-bit optimizer saves VRAM
199
  optim="adamw_8bit",
200
+ bf16=use_bf16,
201
+ fp16=use_fp16,
202
+ use_cpu=not has_cuda,
203
  )
204
 
205
  trainer = GRPOTrainer(
train/train_grpo.py CHANGED
@@ -1,5 +1,6 @@
1
  import random
2
  import collections
 
3
  import torch
4
  import numpy as np
5
  from datasets import Dataset
@@ -143,6 +144,9 @@ def run_ttrl(model, tokenizer, test_problem, env, steps=5):
143
  def main():
144
  max_seq_length = 1024
145
  lora_rank = 16
 
 
 
146
 
147
  # Load model via Unsloth
148
  model, tokenizer = FastLanguageModel.from_pretrained(
@@ -354,6 +358,9 @@ def main():
354
 
355
  # Optimizer
356
  optim="adamw_8bit", # Memory-efficient
 
 
 
357
  )
358
 
359
  trainer = GRPOTrainer(
 
1
  import random
2
  import collections
3
+ import unsloth # Must be imported before trl/transformers/peft for patching.
4
  import torch
5
  import numpy as np
6
  from datasets import Dataset
 
144
  def main():
145
  max_seq_length = 1024
146
  lora_rank = 16
147
+ has_cuda = torch.cuda.is_available()
148
+ use_bf16 = has_cuda and torch.cuda.is_bf16_supported()
149
+ use_fp16 = has_cuda and not use_bf16
150
 
151
  # Load model via Unsloth
152
  model, tokenizer = FastLanguageModel.from_pretrained(
 
358
 
359
  # Optimizer
360
  optim="adamw_8bit", # Memory-efficient
361
+ bf16=use_bf16,
362
+ fp16=use_fp16,
363
+ use_cpu=not has_cuda,
364
  )
365
 
366
  trainer = GRPOTrainer(