Kartik Goyal commited on
Commit ·
d3424a0
1
Parent(s): 278024e
fixed docker
Browse files- grpo_train.py +1 -1
grpo_train.py
CHANGED
|
@@ -300,7 +300,7 @@ def reward_environment(prompts, completions, task_id=None, setup_actions=None, *
|
|
| 300 |
# MODEL
|
| 301 |
# =========================
|
| 302 |
|
| 303 |
-
USE_4BIT = not torch.cuda.is_available() or torch.cuda.get_device_properties(0).
|
| 304 |
|
| 305 |
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 306 |
model_name="unsloth/Llama-3.1-8B-Instruct",
|
|
|
|
| 300 |
# MODEL
|
| 301 |
# =========================
|
| 302 |
|
| 303 |
+
USE_4BIT = not torch.cuda.is_available() or torch.cuda.get_device_properties(0).total_memory < 40 * 1024**3
|
| 304 |
|
| 305 |
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 306 |
model_name="unsloth/Llama-3.1-8B-Instruct",
|