Spaces:
Sleeping
Sleeping
Commit ·
51c46da
1
Parent(s): e26d074
update
Browse files
examples/tutorials/dpo/ultrafeedback-dpo/step_2_train_dpo_model_single_gpu.py
CHANGED
|
@@ -120,13 +120,11 @@ def main():
|
|
| 120 |
args.model_name,
|
| 121 |
cache_dir=args.model_cache_dir,
|
| 122 |
trust_remote_code=True,
|
| 123 |
-
dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
| 124 |
)
|
| 125 |
ref_model = AutoModelForCausalLM.from_pretrained(
|
| 126 |
args.model_name,
|
| 127 |
cache_dir=args.model_cache_dir,
|
| 128 |
trust_remote_code=True,
|
| 129 |
-
dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
| 130 |
)
|
| 131 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 132 |
args.model_name,
|
|
|
|
| 120 |
args.model_name,
|
| 121 |
cache_dir=args.model_cache_dir,
|
| 122 |
trust_remote_code=True,
|
|
|
|
| 123 |
)
|
| 124 |
ref_model = AutoModelForCausalLM.from_pretrained(
|
| 125 |
args.model_name,
|
| 126 |
cache_dir=args.model_cache_dir,
|
| 127 |
trust_remote_code=True,
|
|
|
|
| 128 |
)
|
| 129 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 130 |
args.model_name,
|
examples/tutorials/rlhf/gpt2_sst2_ppo/step_3_generation.py
CHANGED
|
@@ -33,7 +33,6 @@ def get_args():
|
|
| 33 |
parser = argparse.ArgumentParser()
|
| 34 |
parser.add_argument(
|
| 35 |
"--ppo_model_name",
|
| 36 |
-
# default=(project_path / "trained_models/gpt2-sst2-ppo-kl002"),
|
| 37 |
default=(project_path / "trained_models/gpt2-sst2-ppo-kl005"),
|
| 38 |
type=str
|
| 39 |
)
|
|
@@ -76,9 +75,9 @@ def main():
|
|
| 76 |
tokenized = tokenizer(
|
| 77 |
# "this",
|
| 78 |
# "this is ",
|
| 79 |
-
|
| 80 |
# "I am ",
|
| 81 |
-
"allow us ",
|
| 82 |
# "movie ",
|
| 83 |
# "this film ",
|
| 84 |
return_tensors="pt"
|
|
|
|
| 33 |
parser = argparse.ArgumentParser()
|
| 34 |
parser.add_argument(
|
| 35 |
"--ppo_model_name",
|
|
|
|
| 36 |
default=(project_path / "trained_models/gpt2-sst2-ppo-kl005"),
|
| 37 |
type=str
|
| 38 |
)
|
|
|
|
| 75 |
tokenized = tokenizer(
|
| 76 |
# "this",
|
| 77 |
# "this is ",
|
| 78 |
+
"it's ",
|
| 79 |
# "I am ",
|
| 80 |
+
# "allow us ",
|
| 81 |
# "movie ",
|
| 82 |
# "this film ",
|
| 83 |
return_tensors="pt"
|