kejian commited on
Commit
9cabb4f
·
1 Parent(s): 0b439d9

update model card README.md

Browse files
Files changed (1) hide show
  1. README.md +22 -7
README.md CHANGED
@@ -82,7 +82,7 @@ The following hyperparameters were used during training:
82
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
83
  - lr_scheduler_type: linear
84
  - lr_scheduler_warmup_ratio: 0.01
85
- - training_steps: 45776
86
  - mixed_precision_training: Native AMP
87
 
88
  ### Framework versions
@@ -156,13 +156,28 @@ The following hyperparameters were used during training:
156
  'top_k': 0,
157
  'top_p': 0.9},
158
  'name': 'unconditional',
159
- 'num_samples': 512,
160
- 'prefix': '<|aligned|>'}],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  'scorer_config': {'device': 'cuda:0'}},
162
  'kl_gpt3_callback': {'force_call_on': [22888],
163
  'gpt3_kwargs': {'model_name': 'davinci'},
164
  'max_tokens': 64,
165
- 'num_samples': 4096,
166
  'prefix': '<|aligned|>',
167
  'should_insert_prefix': True},
168
  'model': {'from_scratch': True,
@@ -184,8 +199,8 @@ The following hyperparameters were used during training:
184
  'hub_strategy': 'all_checkpoints',
185
  'learning_rate': 0.0005,
186
  'logging_first_step': True,
187
- 'logging_steps': 1,
188
- 'num_tokens': 3000000000.0,
189
  'output_dir': 'training_output_2',
190
  'per_device_train_batch_size': 8,
191
  'push_to_hub': True,
@@ -197,4 +212,4 @@ The following hyperparameters were used during training:
197
  'weight_decay': 0.1}}
198
 
199
  # Wandb URL:
200
- https://wandb.ai/kejian/uncategorized/runs/1llp96zs
 
82
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
83
  - lr_scheduler_type: linear
84
  - lr_scheduler_warmup_ratio: 0.01
85
+ - training_steps: 42724
86
  - mixed_precision_training: Native AMP
87
 
88
  ### Framework versions
 
156
  'top_k': 0,
157
  'top_p': 0.9},
158
  'name': 'unconditional',
159
+ 'num_samples': 2560,
160
+ 'prefix': '<|aligned|>'},
161
+ {'generate_kwargs': {'bad_words_ids': [[50257],
162
+ [50258],
163
+ [50259],
164
+ [50260]],
165
+ 'do_sample': True,
166
+ 'max_length': 128,
167
+ 'min_length': 10,
168
+ 'temperature': 0.7,
169
+ 'top_k': 0,
170
+ 'top_p': 0.9},
171
+ 'name': 'challenging_rtp',
172
+ 'num_samples': 1024,
173
+ 'prefix': '<|aligned|>',
174
+ 'prompt_before_control': True,
175
+ 'prompts_path': 'resources/challenging_rtp.jsonl'}],
176
  'scorer_config': {'device': 'cuda:0'}},
177
  'kl_gpt3_callback': {'force_call_on': [22888],
178
  'gpt3_kwargs': {'model_name': 'davinci'},
179
  'max_tokens': 64,
180
+ 'num_samples': 1024,
181
  'prefix': '<|aligned|>',
182
  'should_insert_prefix': True},
183
  'model': {'from_scratch': True,
 
199
  'hub_strategy': 'all_checkpoints',
200
  'learning_rate': 0.0005,
201
  'logging_first_step': True,
202
+ 'logging_steps': 500,
203
+ 'num_tokens': 2800000000.0,
204
  'output_dir': 'training_output_2',
205
  'per_device_train_batch_size': 8,
206
  'push_to_hub': True,
 
212
  'weight_decay': 0.1}}
213
 
214
  # Wandb URL:
215
+ https://wandb.ai/kejian/uncategorized/runs/2296ywzg