Spaces:

Prajwal782007
/

Gridmind

Running

App Files Files Community

Prajwal782007 commited on 23 days ago

Commit

b0701ef

1 Parent(s): 32d5b8f

feat: implement GridMind-RL training pipeline with GRPO Colab notebook and Unsloth configuration script

Browse files

Files changed (2) hide show

scripts/gridmind_grpo_colab.ipynb +8 -2
scripts/train_unsloth.py +4 -1

scripts/gridmind_grpo_colab.ipynb CHANGED Viewed

@@ -345,7 +345,10 @@
     "MODEL_NAME = \"Qwen/Qwen2.5-1.5B-Instruct\"\n",
     "gpu_name = torch.cuda.get_device_name(0) if torch.cuda.is_available() else \"CPU\"\n",
     "gpu_total_gb = torch.cuda.get_device_properties(0).total_memory / 1e9 if torch.cuda.is_available() else 0\n",
     "print(f\"Loading {MODEL_NAME} with 4-bit quantization on {gpu_name} ({gpu_total_gb:.2f} GB VRAM)...\")\n",
     "\n",
     "tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)\n",
     "if tokenizer.pad_token is None:\n",
@@ -355,7 +358,7 @@
     "# 4-bit quantization for memory-efficient QLoRA training\n",
     "bnb_config = BitsAndBytesConfig(\n",
     "    load_in_4bit=True,\n",
-    "    bnb_4bit_compute_dtype=torch.float16,\n",
     "    bnb_4bit_quant_type=\"nf4\",\n",
     "    bnb_4bit_use_double_quant=True,\n",
     ")\n",
@@ -363,6 +366,7 @@
     "model = AutoModelForCausalLM.from_pretrained(\n",
     "    MODEL_NAME,\n",
     "    quantization_config=bnb_config,\n",
     "    device_map=\"auto\",\n",
     "    trust_remote_code=True,\n",
     ")\n",
@@ -685,7 +689,9 @@
     "    \"max_new_tokens\": 80,\n",
     "    \"num_generations\": 4,\n",
     "    \"learning_rate\": 5e-5,\n",
-    "    \"fp16\": True,\n",
     "    \"logging_steps\": 1,\n",
     "    \"save_steps\": 60,\n",
     "    \"report_to\": \"none\",\n",

     "MODEL_NAME = \"Qwen/Qwen2.5-1.5B-Instruct\"\n",
     "gpu_name = torch.cuda.get_device_name(0) if torch.cuda.is_available() else \"CPU\"\n",
     "gpu_total_gb = torch.cuda.get_device_properties(0).total_memory / 1e9 if torch.cuda.is_available() else 0\n",
+    "use_bf16 = bool(torch.cuda.is_available() and torch.cuda.is_bf16_supported())\n",
+    "compute_dtype = torch.bfloat16 if use_bf16 else torch.float16\n",
     "print(f\"Loading {MODEL_NAME} with 4-bit quantization on {gpu_name} ({gpu_total_gb:.2f} GB VRAM)...\")\n",
+    "print(f\"Compute dtype: {compute_dtype}\")\n",
     "\n",
     "tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)\n",
     "if tokenizer.pad_token is None:\n",
     "# 4-bit quantization for memory-efficient QLoRA training\n",
     "bnb_config = BitsAndBytesConfig(\n",
     "    load_in_4bit=True,\n",
+    "    bnb_4bit_compute_dtype=compute_dtype,\n",
     "    bnb_4bit_quant_type=\"nf4\",\n",
     "    bnb_4bit_use_double_quant=True,\n",
     ")\n",
     "model = AutoModelForCausalLM.from_pretrained(\n",
     "    MODEL_NAME,\n",
     "    quantization_config=bnb_config,\n",
+    "    torch_dtype=compute_dtype,\n",
     "    device_map=\"auto\",\n",
     "    trust_remote_code=True,\n",
     ")\n",
     "    \"max_new_tokens\": 80,\n",
     "    \"num_generations\": 4,\n",
     "    \"learning_rate\": 5e-5,\n",
+    "    \"fp16\": not use_bf16,\n",
+    "    \"bf16\": use_bf16,\n",
+    "    \"max_grad_norm\": 0.0,\n",
     "    \"logging_steps\": 1,\n",
     "    \"save_steps\": 60,\n",
     "    \"report_to\": \"none\",\n",

scripts/train_unsloth.py CHANGED Viewed

@@ -642,6 +642,7 @@ def main():
     print(f"🚀 Loading model: {args.model_name}")
     max_seq_length = 512
     lora_rank = 8
     model, tokenizer = FastLanguageModel.from_pretrained(
         model_name=args.model_name,
@@ -683,7 +684,9 @@ def main():
         "warmup_ratio": 0.1,
         "logging_steps": 5,
         "save_steps": 100,
-        "fp16": True,
         "report_to": "none",
         "seed": 42,
     }

     print(f"🚀 Loading model: {args.model_name}")
     max_seq_length = 512
     lora_rank = 8
+    use_bf16 = bool(torch.cuda.is_available() and torch.cuda.is_bf16_supported())
     model, tokenizer = FastLanguageModel.from_pretrained(
         model_name=args.model_name,
         "warmup_ratio": 0.1,
         "logging_steps": 5,
         "save_steps": 100,
+        "fp16": not use_bf16,
+        "bf16": use_bf16,
+        "max_grad_norm": 0.0,
         "report_to": "none",
         "seed": 42,
     }