Prajwal782007 commited on
Commit
b0701ef
·
1 Parent(s): 32d5b8f

feat: implement GridMind-RL training pipeline with GRPO Colab notebook and Unsloth configuration script

Browse files
scripts/gridmind_grpo_colab.ipynb CHANGED
@@ -345,7 +345,10 @@
345
  "MODEL_NAME = \"Qwen/Qwen2.5-1.5B-Instruct\"\n",
346
  "gpu_name = torch.cuda.get_device_name(0) if torch.cuda.is_available() else \"CPU\"\n",
347
  "gpu_total_gb = torch.cuda.get_device_properties(0).total_memory / 1e9 if torch.cuda.is_available() else 0\n",
 
 
348
  "print(f\"Loading {MODEL_NAME} with 4-bit quantization on {gpu_name} ({gpu_total_gb:.2f} GB VRAM)...\")\n",
 
349
  "\n",
350
  "tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)\n",
351
  "if tokenizer.pad_token is None:\n",
@@ -355,7 +358,7 @@
355
  "# 4-bit quantization for memory-efficient QLoRA training\n",
356
  "bnb_config = BitsAndBytesConfig(\n",
357
  " load_in_4bit=True,\n",
358
- " bnb_4bit_compute_dtype=torch.float16,\n",
359
  " bnb_4bit_quant_type=\"nf4\",\n",
360
  " bnb_4bit_use_double_quant=True,\n",
361
  ")\n",
@@ -363,6 +366,7 @@
363
  "model = AutoModelForCausalLM.from_pretrained(\n",
364
  " MODEL_NAME,\n",
365
  " quantization_config=bnb_config,\n",
 
366
  " device_map=\"auto\",\n",
367
  " trust_remote_code=True,\n",
368
  ")\n",
@@ -685,7 +689,9 @@
685
  " \"max_new_tokens\": 80,\n",
686
  " \"num_generations\": 4,\n",
687
  " \"learning_rate\": 5e-5,\n",
688
- " \"fp16\": True,\n",
 
 
689
  " \"logging_steps\": 1,\n",
690
  " \"save_steps\": 60,\n",
691
  " \"report_to\": \"none\",\n",
 
345
  "MODEL_NAME = \"Qwen/Qwen2.5-1.5B-Instruct\"\n",
346
  "gpu_name = torch.cuda.get_device_name(0) if torch.cuda.is_available() else \"CPU\"\n",
347
  "gpu_total_gb = torch.cuda.get_device_properties(0).total_memory / 1e9 if torch.cuda.is_available() else 0\n",
348
+ "use_bf16 = bool(torch.cuda.is_available() and torch.cuda.is_bf16_supported())\n",
349
+ "compute_dtype = torch.bfloat16 if use_bf16 else torch.float16\n",
350
  "print(f\"Loading {MODEL_NAME} with 4-bit quantization on {gpu_name} ({gpu_total_gb:.2f} GB VRAM)...\")\n",
351
+ "print(f\"Compute dtype: {compute_dtype}\")\n",
352
  "\n",
353
  "tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)\n",
354
  "if tokenizer.pad_token is None:\n",
 
358
  "# 4-bit quantization for memory-efficient QLoRA training\n",
359
  "bnb_config = BitsAndBytesConfig(\n",
360
  " load_in_4bit=True,\n",
361
+ " bnb_4bit_compute_dtype=compute_dtype,\n",
362
  " bnb_4bit_quant_type=\"nf4\",\n",
363
  " bnb_4bit_use_double_quant=True,\n",
364
  ")\n",
 
366
  "model = AutoModelForCausalLM.from_pretrained(\n",
367
  " MODEL_NAME,\n",
368
  " quantization_config=bnb_config,\n",
369
+ " torch_dtype=compute_dtype,\n",
370
  " device_map=\"auto\",\n",
371
  " trust_remote_code=True,\n",
372
  ")\n",
 
689
  " \"max_new_tokens\": 80,\n",
690
  " \"num_generations\": 4,\n",
691
  " \"learning_rate\": 5e-5,\n",
692
+ " \"fp16\": not use_bf16,\n",
693
+ " \"bf16\": use_bf16,\n",
694
+ " \"max_grad_norm\": 0.0,\n",
695
  " \"logging_steps\": 1,\n",
696
  " \"save_steps\": 60,\n",
697
  " \"report_to\": \"none\",\n",
scripts/train_unsloth.py CHANGED
@@ -642,6 +642,7 @@ def main():
642
  print(f"🚀 Loading model: {args.model_name}")
643
  max_seq_length = 512
644
  lora_rank = 8
 
645
 
646
  model, tokenizer = FastLanguageModel.from_pretrained(
647
  model_name=args.model_name,
@@ -683,7 +684,9 @@ def main():
683
  "warmup_ratio": 0.1,
684
  "logging_steps": 5,
685
  "save_steps": 100,
686
- "fp16": True,
 
 
687
  "report_to": "none",
688
  "seed": 42,
689
  }
 
642
  print(f"🚀 Loading model: {args.model_name}")
643
  max_seq_length = 512
644
  lora_rank = 8
645
+ use_bf16 = bool(torch.cuda.is_available() and torch.cuda.is_bf16_supported())
646
 
647
  model, tokenizer = FastLanguageModel.from_pretrained(
648
  model_name=args.model_name,
 
684
  "warmup_ratio": 0.1,
685
  "logging_steps": 5,
686
  "save_steps": 100,
687
+ "fp16": not use_bf16,
688
+ "bf16": use_bf16,
689
+ "max_grad_norm": 0.0,
690
  "report_to": "none",
691
  "seed": 42,
692
  }