Update README.md
Browse files
README.md
CHANGED
|
@@ -80,7 +80,7 @@ print ("ANSWER:\n\n", answer_only)
|
|
| 80 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 81 |
|
| 82 |
# Load reasoning model
|
| 83 |
-
model_name='lamm-mit/
|
| 84 |
model = AutoModelForCausalLM.from_pretrained(model_name,
|
| 85 |
torch_dtype =torch.bfloat16,
|
| 86 |
attn_implementation="flash_attention_2",device_map="auto",trust_remote_code=True,
|
|
@@ -89,10 +89,10 @@ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True,
|
|
| 89 |
use_fast=False,
|
| 90 |
)
|
| 91 |
# Load critic model
|
| 92 |
-
|
| 93 |
|
| 94 |
critic_model = AutoModelForCausalLM.from_pretrained(
|
| 95 |
-
|
| 96 |
torch_dtype=torch.bfloat16,
|
| 97 |
attn_implementation="flash_attention_2",
|
| 98 |
device_map="auto",
|
|
|
|
| 80 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 81 |
|
| 82 |
# Load reasoning model
|
| 83 |
+
model_name='lamm-mit/PRefLexOR_ORPO_DPO_EXO_REFLECT_10222024'
|
| 84 |
model = AutoModelForCausalLM.from_pretrained(model_name,
|
| 85 |
torch_dtype =torch.bfloat16,
|
| 86 |
attn_implementation="flash_attention_2",device_map="auto",trust_remote_code=True,
|
|
|
|
| 89 |
use_fast=False,
|
| 90 |
)
|
| 91 |
# Load critic model
|
| 92 |
+
model_name_critic = "meta-llama/Llama-3.2-3B-Instruct"
|
| 93 |
|
| 94 |
critic_model = AutoModelForCausalLM.from_pretrained(
|
| 95 |
+
model_name_critic,
|
| 96 |
torch_dtype=torch.bfloat16,
|
| 97 |
attn_implementation="flash_attention_2",
|
| 98 |
device_map="auto",
|