miyuki2026 commited on
Commit
51c46da
·
1 Parent(s): e26d074
examples/tutorials/dpo/ultrafeedback-dpo/step_2_train_dpo_model_single_gpu.py CHANGED
@@ -120,13 +120,11 @@ def main():
120
  args.model_name,
121
  cache_dir=args.model_cache_dir,
122
  trust_remote_code=True,
123
- dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
124
  )
125
  ref_model = AutoModelForCausalLM.from_pretrained(
126
  args.model_name,
127
  cache_dir=args.model_cache_dir,
128
  trust_remote_code=True,
129
- dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
130
  )
131
  tokenizer = AutoTokenizer.from_pretrained(
132
  args.model_name,
 
120
  args.model_name,
121
  cache_dir=args.model_cache_dir,
122
  trust_remote_code=True,
 
123
  )
124
  ref_model = AutoModelForCausalLM.from_pretrained(
125
  args.model_name,
126
  cache_dir=args.model_cache_dir,
127
  trust_remote_code=True,
 
128
  )
129
  tokenizer = AutoTokenizer.from_pretrained(
130
  args.model_name,
examples/tutorials/rlhf/gpt2_sst2_ppo/step_3_generation.py CHANGED
@@ -33,7 +33,6 @@ def get_args():
33
  parser = argparse.ArgumentParser()
34
  parser.add_argument(
35
  "--ppo_model_name",
36
- # default=(project_path / "trained_models/gpt2-sst2-ppo-kl002"),
37
  default=(project_path / "trained_models/gpt2-sst2-ppo-kl005"),
38
  type=str
39
  )
@@ -76,9 +75,9 @@ def main():
76
  tokenized = tokenizer(
77
  # "this",
78
  # "this is ",
79
- # "it's ",
80
  # "I am ",
81
- "allow us ",
82
  # "movie ",
83
  # "this film ",
84
  return_tensors="pt"
 
33
  parser = argparse.ArgumentParser()
34
  parser.add_argument(
35
  "--ppo_model_name",
 
36
  default=(project_path / "trained_models/gpt2-sst2-ppo-kl005"),
37
  type=str
38
  )
 
75
  tokenized = tokenizer(
76
  # "this",
77
  # "this is ",
78
+ "it's ",
79
  # "I am ",
80
+ # "allow us ",
81
  # "movie ",
82
  # "this film ",
83
  return_tensors="pt"