Commit
路
30575fe
1
Parent(s):
91e8d4a
Update README.md
Browse files
README.md
CHANGED
|
@@ -151,21 +151,20 @@ This was necessary due to the maximum input token limit accepted by the RoBERTa-
|
|
| 151 |
- **Training regime:** fp32
|
| 152 |
- **base_model_name_or_path:** roberta-base
|
| 153 |
- **max_tokens_length:** 512
|
| 154 |
-
- **weighted_loss** true
|
| 155 |
- **training_arguments:** TrainingArguments(
|
| 156 |
output_dir=results_dir,
|
| 157 |
num_train_epochs=5,
|
| 158 |
per_device_train_batch_size=8,
|
| 159 |
per_device_eval_batch_size=8,
|
| 160 |
gradient_accumulation_steps=1,
|
| 161 |
-
learning_rate=0.
|
| 162 |
lr_scheduler_type="linear",
|
| 163 |
optim="adamw_torch",
|
| 164 |
eval_accumulation_steps=1,
|
| 165 |
evaluation_strategy="steps",
|
| 166 |
-
eval_steps=0.
|
| 167 |
save_strategy="steps",
|
| 168 |
-
save_steps=0.
|
| 169 |
logging_strategy="steps",
|
| 170 |
logging_steps=1,
|
| 171 |
report_to="tensorboard",
|
|
@@ -173,7 +172,7 @@ This was necessary due to the maximum input token limit accepted by the RoBERTa-
|
|
| 173 |
do_eval=True,
|
| 174 |
max_grad_norm=0.3,
|
| 175 |
warmup_ratio=0.03,
|
| 176 |
-
group_by_length=True,
|
| 177 |
dataloader_drop_last=False,
|
| 178 |
fp16=False,
|
| 179 |
bf16=False
|
|
|
|
| 151 |
- **Training regime:** fp32
|
| 152 |
- **base_model_name_or_path:** roberta-base
|
| 153 |
- **max_tokens_length:** 512
|
|
|
|
| 154 |
- **training_arguments:** TrainingArguments(
|
| 155 |
output_dir=results_dir,
|
| 156 |
num_train_epochs=5,
|
| 157 |
per_device_train_batch_size=8,
|
| 158 |
per_device_eval_batch_size=8,
|
| 159 |
gradient_accumulation_steps=1,
|
| 160 |
+
learning_rate=0.00001,
|
| 161 |
lr_scheduler_type="linear",
|
| 162 |
optim="adamw_torch",
|
| 163 |
eval_accumulation_steps=1,
|
| 164 |
evaluation_strategy="steps",
|
| 165 |
+
eval_steps=0.2,
|
| 166 |
save_strategy="steps",
|
| 167 |
+
save_steps=0.2,
|
| 168 |
logging_strategy="steps",
|
| 169 |
logging_steps=1,
|
| 170 |
report_to="tensorboard",
|
|
|
|
| 172 |
do_eval=True,
|
| 173 |
max_grad_norm=0.3,
|
| 174 |
warmup_ratio=0.03,
|
| 175 |
+
#group_by_length=True,
|
| 176 |
dataloader_drop_last=False,
|
| 177 |
fp16=False,
|
| 178 |
bf16=False
|