Spaces:
Running
Running
Commit ·
b0701ef
1
Parent(s): 32d5b8f
feat: implement GridMind-RL training pipeline with GRPO Colab notebook and Unsloth configuration script
Browse files
scripts/gridmind_grpo_colab.ipynb
CHANGED
|
@@ -345,7 +345,10 @@
|
|
| 345 |
"MODEL_NAME = \"Qwen/Qwen2.5-1.5B-Instruct\"\n",
|
| 346 |
"gpu_name = torch.cuda.get_device_name(0) if torch.cuda.is_available() else \"CPU\"\n",
|
| 347 |
"gpu_total_gb = torch.cuda.get_device_properties(0).total_memory / 1e9 if torch.cuda.is_available() else 0\n",
|
|
|
|
|
|
|
| 348 |
"print(f\"Loading {MODEL_NAME} with 4-bit quantization on {gpu_name} ({gpu_total_gb:.2f} GB VRAM)...\")\n",
|
|
|
|
| 349 |
"\n",
|
| 350 |
"tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)\n",
|
| 351 |
"if tokenizer.pad_token is None:\n",
|
|
@@ -355,7 +358,7 @@
|
|
| 355 |
"# 4-bit quantization for memory-efficient QLoRA training\n",
|
| 356 |
"bnb_config = BitsAndBytesConfig(\n",
|
| 357 |
" load_in_4bit=True,\n",
|
| 358 |
-
" bnb_4bit_compute_dtype=
|
| 359 |
" bnb_4bit_quant_type=\"nf4\",\n",
|
| 360 |
" bnb_4bit_use_double_quant=True,\n",
|
| 361 |
")\n",
|
|
@@ -363,6 +366,7 @@
|
|
| 363 |
"model = AutoModelForCausalLM.from_pretrained(\n",
|
| 364 |
" MODEL_NAME,\n",
|
| 365 |
" quantization_config=bnb_config,\n",
|
|
|
|
| 366 |
" device_map=\"auto\",\n",
|
| 367 |
" trust_remote_code=True,\n",
|
| 368 |
")\n",
|
|
@@ -685,7 +689,9 @@
|
|
| 685 |
" \"max_new_tokens\": 80,\n",
|
| 686 |
" \"num_generations\": 4,\n",
|
| 687 |
" \"learning_rate\": 5e-5,\n",
|
| 688 |
-
" \"fp16\":
|
|
|
|
|
|
|
| 689 |
" \"logging_steps\": 1,\n",
|
| 690 |
" \"save_steps\": 60,\n",
|
| 691 |
" \"report_to\": \"none\",\n",
|
|
|
|
| 345 |
"MODEL_NAME = \"Qwen/Qwen2.5-1.5B-Instruct\"\n",
|
| 346 |
"gpu_name = torch.cuda.get_device_name(0) if torch.cuda.is_available() else \"CPU\"\n",
|
| 347 |
"gpu_total_gb = torch.cuda.get_device_properties(0).total_memory / 1e9 if torch.cuda.is_available() else 0\n",
|
| 348 |
+
"use_bf16 = bool(torch.cuda.is_available() and torch.cuda.is_bf16_supported())\n",
|
| 349 |
+
"compute_dtype = torch.bfloat16 if use_bf16 else torch.float16\n",
|
| 350 |
"print(f\"Loading {MODEL_NAME} with 4-bit quantization on {gpu_name} ({gpu_total_gb:.2f} GB VRAM)...\")\n",
|
| 351 |
+
"print(f\"Compute dtype: {compute_dtype}\")\n",
|
| 352 |
"\n",
|
| 353 |
"tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)\n",
|
| 354 |
"if tokenizer.pad_token is None:\n",
|
|
|
|
| 358 |
"# 4-bit quantization for memory-efficient QLoRA training\n",
|
| 359 |
"bnb_config = BitsAndBytesConfig(\n",
|
| 360 |
" load_in_4bit=True,\n",
|
| 361 |
+
" bnb_4bit_compute_dtype=compute_dtype,\n",
|
| 362 |
" bnb_4bit_quant_type=\"nf4\",\n",
|
| 363 |
" bnb_4bit_use_double_quant=True,\n",
|
| 364 |
")\n",
|
|
|
|
| 366 |
"model = AutoModelForCausalLM.from_pretrained(\n",
|
| 367 |
" MODEL_NAME,\n",
|
| 368 |
" quantization_config=bnb_config,\n",
|
| 369 |
+
" torch_dtype=compute_dtype,\n",
|
| 370 |
" device_map=\"auto\",\n",
|
| 371 |
" trust_remote_code=True,\n",
|
| 372 |
")\n",
|
|
|
|
| 689 |
" \"max_new_tokens\": 80,\n",
|
| 690 |
" \"num_generations\": 4,\n",
|
| 691 |
" \"learning_rate\": 5e-5,\n",
|
| 692 |
+
" \"fp16\": not use_bf16,\n",
|
| 693 |
+
" \"bf16\": use_bf16,\n",
|
| 694 |
+
" \"max_grad_norm\": 0.0,\n",
|
| 695 |
" \"logging_steps\": 1,\n",
|
| 696 |
" \"save_steps\": 60,\n",
|
| 697 |
" \"report_to\": \"none\",\n",
|
scripts/train_unsloth.py
CHANGED
|
@@ -642,6 +642,7 @@ def main():
|
|
| 642 |
print(f"🚀 Loading model: {args.model_name}")
|
| 643 |
max_seq_length = 512
|
| 644 |
lora_rank = 8
|
|
|
|
| 645 |
|
| 646 |
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 647 |
model_name=args.model_name,
|
|
@@ -683,7 +684,9 @@ def main():
|
|
| 683 |
"warmup_ratio": 0.1,
|
| 684 |
"logging_steps": 5,
|
| 685 |
"save_steps": 100,
|
| 686 |
-
"fp16":
|
|
|
|
|
|
|
| 687 |
"report_to": "none",
|
| 688 |
"seed": 42,
|
| 689 |
}
|
|
|
|
| 642 |
print(f"🚀 Loading model: {args.model_name}")
|
| 643 |
max_seq_length = 512
|
| 644 |
lora_rank = 8
|
| 645 |
+
use_bf16 = bool(torch.cuda.is_available() and torch.cuda.is_bf16_supported())
|
| 646 |
|
| 647 |
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 648 |
model_name=args.model_name,
|
|
|
|
| 684 |
"warmup_ratio": 0.1,
|
| 685 |
"logging_steps": 5,
|
| 686 |
"save_steps": 100,
|
| 687 |
+
"fp16": not use_bf16,
|
| 688 |
+
"bf16": use_bf16,
|
| 689 |
+
"max_grad_norm": 0.0,
|
| 690 |
"report_to": "none",
|
| 691 |
"seed": 42,
|
| 692 |
}
|