update model card README.md
Browse files
README.md
CHANGED
|
@@ -99,9 +99,9 @@ The following hyperparameters were used during training:
|
|
| 99 |
'fine_prefix': '<|fine|>',
|
| 100 |
'misaligned_prefix': '<|misaligned|>',
|
| 101 |
'substandard_prefix': '<|substandard|>',
|
| 102 |
-
'threshold1': 0.
|
| 103 |
-
'threshold2': 0.
|
| 104 |
-
'threshold3': 0.
|
| 105 |
'threshold4': 0.9992},
|
| 106 |
'datasets': ['tomekkorbak/detoxify-pile-chunk3-0-50000',
|
| 107 |
'tomekkorbak/detoxify-pile-chunk3-50000-100000',
|
|
@@ -158,45 +158,6 @@ The following hyperparameters were used during training:
|
|
| 158 |
'name': 'unconditional',
|
| 159 |
'num_samples': 2560,
|
| 160 |
'prefix': '<|aligned|>'},
|
| 161 |
-
{'generate_kwargs': {'bad_words_ids': [[50257],
|
| 162 |
-
[50258],
|
| 163 |
-
[50259],
|
| 164 |
-
[50260]],
|
| 165 |
-
'do_sample': True,
|
| 166 |
-
'max_length': 128,
|
| 167 |
-
'min_length': 10,
|
| 168 |
-
'temperature': 0.7,
|
| 169 |
-
'top_k': 0,
|
| 170 |
-
'top_p': 0.9},
|
| 171 |
-
'name': 'unconditional-fine',
|
| 172 |
-
'num_samples': 512,
|
| 173 |
-
'prefix': '<|fine|>'},
|
| 174 |
-
{'generate_kwargs': {'bad_words_ids': [[50257],
|
| 175 |
-
[50258],
|
| 176 |
-
[50259],
|
| 177 |
-
[50260]],
|
| 178 |
-
'do_sample': True,
|
| 179 |
-
'max_length': 128,
|
| 180 |
-
'min_length': 10,
|
| 181 |
-
'temperature': 0.7,
|
| 182 |
-
'top_k': 0,
|
| 183 |
-
'top_p': 0.9},
|
| 184 |
-
'name': 'unconditional-substandard',
|
| 185 |
-
'num_samples': 512,
|
| 186 |
-
'prefix': '<|substandard|>'},
|
| 187 |
-
{'generate_kwargs': {'bad_words_ids': [[50257],
|
| 188 |
-
[50258],
|
| 189 |
-
[50259],
|
| 190 |
-
[50260]],
|
| 191 |
-
'do_sample': True,
|
| 192 |
-
'max_length': 128,
|
| 193 |
-
'min_length': 10,
|
| 194 |
-
'temperature': 0.7,
|
| 195 |
-
'top_k': 0,
|
| 196 |
-
'top_p': 0.9},
|
| 197 |
-
'name': 'unconditional-misaligned',
|
| 198 |
-
'num_samples': 512,
|
| 199 |
-
'prefix': '<|misaligned|>'},
|
| 200 |
{'generate_kwargs': {'bad_words_ids': [[50257],
|
| 201 |
[50258],
|
| 202 |
[50259],
|
|
@@ -216,7 +177,7 @@ The following hyperparameters were used during training:
|
|
| 216 |
'kl_gpt3_callback': {'force_call_on': [21362],
|
| 217 |
'gpt3_kwargs': {'model_name': 'davinci'},
|
| 218 |
'max_tokens': 64,
|
| 219 |
-
'num_samples':
|
| 220 |
'prefix': '<|aligned|>',
|
| 221 |
'should_insert_prefix': True},
|
| 222 |
'model': {'from_scratch': True,
|
|
@@ -240,7 +201,7 @@ The following hyperparameters were used during training:
|
|
| 240 |
'logging_first_step': True,
|
| 241 |
'logging_steps': 500,
|
| 242 |
'num_tokens': 2800000000.0,
|
| 243 |
-
'output_dir': '
|
| 244 |
'per_device_train_batch_size': 8,
|
| 245 |
'push_to_hub': True,
|
| 246 |
'remove_unused_columns': False,
|
|
@@ -251,4 +212,4 @@ The following hyperparameters were used during training:
|
|
| 251 |
'weight_decay': 0.1}}
|
| 252 |
|
| 253 |
# Wandb URL:
|
| 254 |
-
https://wandb.ai/kejian/uncategorized/runs/
|
|
|
|
| 99 |
'fine_prefix': '<|fine|>',
|
| 100 |
'misaligned_prefix': '<|misaligned|>',
|
| 101 |
'substandard_prefix': '<|substandard|>',
|
| 102 |
+
'threshold1': 0.0006038,
|
| 103 |
+
'threshold2': 0.0006638,
|
| 104 |
+
'threshold3': 0.00089704,
|
| 105 |
'threshold4': 0.9992},
|
| 106 |
'datasets': ['tomekkorbak/detoxify-pile-chunk3-0-50000',
|
| 107 |
'tomekkorbak/detoxify-pile-chunk3-50000-100000',
|
|
|
|
| 158 |
'name': 'unconditional',
|
| 159 |
'num_samples': 2560,
|
| 160 |
'prefix': '<|aligned|>'},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
{'generate_kwargs': {'bad_words_ids': [[50257],
|
| 162 |
[50258],
|
| 163 |
[50259],
|
|
|
|
| 177 |
'kl_gpt3_callback': {'force_call_on': [21362],
|
| 178 |
'gpt3_kwargs': {'model_name': 'davinci'},
|
| 179 |
'max_tokens': 64,
|
| 180 |
+
'num_samples': 2048,
|
| 181 |
'prefix': '<|aligned|>',
|
| 182 |
'should_insert_prefix': True},
|
| 183 |
'model': {'from_scratch': True,
|
|
|
|
| 201 |
'logging_first_step': True,
|
| 202 |
'logging_steps': 500,
|
| 203 |
'num_tokens': 2800000000.0,
|
| 204 |
+
'output_dir': 'training_output_2',
|
| 205 |
'per_device_train_batch_size': 8,
|
| 206 |
'push_to_hub': True,
|
| 207 |
'remove_unused_columns': False,
|
|
|
|
| 212 |
'weight_decay': 0.1}}
|
| 213 |
|
| 214 |
# Wandb URL:
|
| 215 |
+
https://wandb.ai/kejian/uncategorized/runs/3m4axm31
|