Spaces:

parth-1
/

MetaGuard-Train

Runtime error

parth-1 commited on Apr 26

Commit

c16c504

verified ·

1 Parent(s): 4ae43fc

Update grpo_train.py

Files changed (1) hide show

grpo_train.py CHANGED Viewed

@@ -300,24 +300,31 @@ def reward_environment(prompts, completions, task_id=None, setup_actions=None, *
 # MODEL
 # =========================
-USE_4BIT = not torch.cuda.is_available() or torch.cuda.get_device_properties(0).total_memory < 40 * 1024**3
 model, tokenizer = FastLanguageModel.from_pretrained(
     model_name="unsloth/Llama-3.1-8B-Instruct",
     load_in_4bit=USE_4BIT,
-    dtype = torch.bfloat16,
     max_seq_length=2048,
-    dtype=None,  # auto-detect bf16 on A100
 )
 model = FastLanguageModel.get_peft_model(
     model,
-    r=32,
     target_modules=[
         "q_proj", "k_proj", "v_proj", "o_proj",
         "gate_proj", "up_proj", "down_proj",
     ],
-    lora_alpha=64,
     lora_dropout=0,
     bias="none",
     use_gradient_checkpointing="unsloth",
@@ -336,16 +343,16 @@ trainer = GRPOTrainer(
     args=GRPOConfig(
         output_dir="outputs",
         learning_rate=2e-5,
-        num_train_epochs=3,
-        per_device_train_batch_size=2,
-        gradient_accumulation_steps=4,
-        num_generations=4,
         max_prompt_length=768,
         max_completion_length=128,
-        logging_steps=5,
-        warmup_ratio=0.1,
-        bf16=True,
-        fp16=False,
         report_to="none",
     ),
     train_dataset=dataset,

 # MODEL
 # =========================
+if torch.cuda.is_available():
+    _vram = torch.cuda.get_device_properties(0).total_memory
+    _name = torch.cuda.get_device_name(0)
+    print(f"GPU: {_name}  VRAM: {_vram / 1024**3:.1f} GB")
+else:
+    _vram = 0
+    _name = "CPU"
+USE_4BIT = _vram < 40 * 1024**3  # True for T4 (15 GB) and L4 (24 GB); False for A100 (80 GB)
 model, tokenizer = FastLanguageModel.from_pretrained(
     model_name="unsloth/Llama-3.1-8B-Instruct",
     load_in_4bit=USE_4BIT,
     max_seq_length=2048,
+    dtype=None,
 )
 model = FastLanguageModel.get_peft_model(
     model,
+    r=16 if USE_4BIT else 32,
     target_modules=[
         "q_proj", "k_proj", "v_proj", "o_proj",
         "gate_proj", "up_proj", "down_proj",
     ],
+    lora_alpha=32 if USE_4BIT else 64,
     lora_dropout=0,
     bias="none",
     use_gradient_checkpointing="unsloth",
     args=GRPOConfig(
         output_dir="outputs",
         learning_rate=2e-5,
+        num_train_epochs=1 if USE_4BIT else 3,
+        per_device_train_batch_size=1 if USE_4BIT else 2,
+        gradient_accumulation_steps=2 if USE_4BIT else 4,
+        num_generations=2 if USE_4BIT else 4,
         max_prompt_length=768,
         max_completion_length=128,
+        logging_steps=3 if USE_4BIT else 5,
+        warmup_steps=5 if USE_4BIT else 10,
+        bf16=not USE_4BIT,
+        fp16=USE_4BIT,
         report_to="none",
     ),
     train_dataset=dataset,