lamm-mit
/

PRefLexOR_ORPO_DPO_EXO_REFLECT_10222024

Text Generation

text-generation-inference

Model card Files Files and versions

mjbuehler commited on Oct 24, 2024

Commit

c7c4fd6

·

verified ·

1 Parent(s): 842f9b2

Update README.md

Files changed (1) hide show

README.md +3 -3

README.md CHANGED Viewed

@@ -80,7 +80,7 @@ print ("ANSWER:\n\n", answer_only)
 from transformers import AutoModelForCausalLM, AutoTokenizer
 # Load reasoning model
-model_name='lamm-mit/PRefLexOR_ORPO_DPO_EXO_10242024'
 model = AutoModelForCausalLM.from_pretrained(model_name,
     torch_dtype =torch.bfloat16,
     attn_implementation="flash_attention_2",device_map="auto",trust_remote_code=True,
@@ -89,10 +89,10 @@ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True,
                                           use_fast=False,
                                          )
 # Load critic model
-model_name_base = "meta-llama/Llama-3.2-3B-Instruct"
 critic_model = AutoModelForCausalLM.from_pretrained(
-    model_name_base,
     torch_dtype=torch.bfloat16,
     attn_implementation="flash_attention_2",
     device_map="auto",

 from transformers import AutoModelForCausalLM, AutoTokenizer
 # Load reasoning model
+model_name='lamm-mit/PRefLexOR_ORPO_DPO_EXO_REFLECT_10222024'
 model = AutoModelForCausalLM.from_pretrained(model_name,
     torch_dtype =torch.bfloat16,
     attn_implementation="flash_attention_2",device_map="auto",trust_remote_code=True,
                                           use_fast=False,
                                          )
 # Load critic model
+model_name_critic = "meta-llama/Llama-3.2-3B-Instruct"
 critic_model = AutoModelForCausalLM.from_pretrained(
+    model_name_critic,
     torch_dtype=torch.bfloat16,
     attn_implementation="flash_attention_2",
     device_map="auto",