Kartik Goyal commited on
Commit
d3424a0
·
1 Parent(s): 278024e

fixed docker

Browse files
Files changed (1) hide show
  1. grpo_train.py +1 -1
grpo_train.py CHANGED
@@ -300,7 +300,7 @@ def reward_environment(prompts, completions, task_id=None, setup_actions=None, *
300
  # MODEL
301
  # =========================
302
 
303
- USE_4BIT = not torch.cuda.is_available() or torch.cuda.get_device_properties(0).total_mem < 40 * 1024**3
304
 
305
  model, tokenizer = FastLanguageModel.from_pretrained(
306
  model_name="unsloth/Llama-3.1-8B-Instruct",
 
300
  # MODEL
301
  # =========================
302
 
303
+ USE_4BIT = not torch.cuda.is_available() or torch.cuda.get_device_properties(0).total_memory < 40 * 1024**3
304
 
305
  model, tokenizer = FastLanguageModel.from_pretrained(
306
  model_name="unsloth/Llama-3.1-8B-Instruct",