mpnet-base-all-mqp-binary
Browse files
README.md
CHANGED
|
@@ -211,9 +211,10 @@ You can finetune this model on your own dataset.
|
|
| 211 |
- `eval_strategy`: steps
|
| 212 |
- `per_device_train_batch_size`: 16
|
| 213 |
- `per_device_eval_batch_size`: 16
|
| 214 |
-
- `num_train_epochs`:
|
| 215 |
- `warmup_ratio`: 0.1
|
| 216 |
- `fp16`: True
|
|
|
|
| 217 |
- `batch_sampler`: no_duplicates
|
| 218 |
|
| 219 |
#### All Hyperparameters
|
|
@@ -236,7 +237,7 @@ You can finetune this model on your own dataset.
|
|
| 236 |
- `adam_beta2`: 0.999
|
| 237 |
- `adam_epsilon`: 1e-08
|
| 238 |
- `max_grad_norm`: 1.0
|
| 239 |
-
- `num_train_epochs`:
|
| 240 |
- `max_steps`: -1
|
| 241 |
- `lr_scheduler_type`: linear
|
| 242 |
- `lr_scheduler_kwargs`: {}
|
|
@@ -297,7 +298,7 @@ You can finetune this model on your own dataset.
|
|
| 297 |
- `dataloader_persistent_workers`: False
|
| 298 |
- `skip_memory_metrics`: True
|
| 299 |
- `use_legacy_prediction_loop`: False
|
| 300 |
-
- `push_to_hub`:
|
| 301 |
- `resume_from_checkpoint`: None
|
| 302 |
- `hub_model_id`: None
|
| 303 |
- `hub_strategy`: every_save
|
|
@@ -340,21 +341,7 @@ You can finetune this model on your own dataset.
|
|
| 340 |
### Training Logs
|
| 341 |
| Epoch | Step | Training Loss | Validation Loss |
|
| 342 |
|:------:|:----:|:-------------:|:---------------:|
|
| 343 |
-
| 0.6536 | 100 | 2.
|
| 344 |
-
| 1.3072 | 200 | 2.4602 | 2.8035 |
|
| 345 |
-
| 1.9608 | 300 | 0.9681 | 3.4420 |
|
| 346 |
-
| 2.6144 | 400 | 0.4578 | 4.1960 |
|
| 347 |
-
| 3.2680 | 500 | 0.1123 | 4.3254 |
|
| 348 |
-
| 3.9216 | 600 | 0.0155 | 4.8884 |
|
| 349 |
-
| 4.5752 | 700 | 0.0026 | 5.0455 |
|
| 350 |
-
| 5.2288 | 800 | 0.0022 | 5.0907 |
|
| 351 |
-
| 5.8824 | 900 | 0.0003 | 5.0952 |
|
| 352 |
-
| 6.5359 | 1000 | 0.0001 | 5.1793 |
|
| 353 |
-
| 7.1895 | 1100 | 0.0001 | 5.2393 |
|
| 354 |
-
| 7.8431 | 1200 | 0.0001 | 5.2619 |
|
| 355 |
-
| 8.4967 | 1300 | 0.0001 | 5.2712 |
|
| 356 |
-
| 9.1503 | 1400 | 0.0001 | 5.2953 |
|
| 357 |
-
| 9.8039 | 1500 | 0.0001 | 5.3024 |
|
| 358 |
|
| 359 |
|
| 360 |
### Framework Versions
|
|
|
|
| 211 |
- `eval_strategy`: steps
|
| 212 |
- `per_device_train_batch_size`: 16
|
| 213 |
- `per_device_eval_batch_size`: 16
|
| 214 |
+
- `num_train_epochs`: 1
|
| 215 |
- `warmup_ratio`: 0.1
|
| 216 |
- `fp16`: True
|
| 217 |
+
- `push_to_hub`: True
|
| 218 |
- `batch_sampler`: no_duplicates
|
| 219 |
|
| 220 |
#### All Hyperparameters
|
|
|
|
| 237 |
- `adam_beta2`: 0.999
|
| 238 |
- `adam_epsilon`: 1e-08
|
| 239 |
- `max_grad_norm`: 1.0
|
| 240 |
+
- `num_train_epochs`: 1
|
| 241 |
- `max_steps`: -1
|
| 242 |
- `lr_scheduler_type`: linear
|
| 243 |
- `lr_scheduler_kwargs`: {}
|
|
|
|
| 298 |
- `dataloader_persistent_workers`: False
|
| 299 |
- `skip_memory_metrics`: True
|
| 300 |
- `use_legacy_prediction_loop`: False
|
| 301 |
+
- `push_to_hub`: True
|
| 302 |
- `resume_from_checkpoint`: None
|
| 303 |
- `hub_model_id`: None
|
| 304 |
- `hub_strategy`: every_save
|
|
|
|
| 341 |
### Training Logs
|
| 342 |
| Epoch | Step | Training Loss | Validation Loss |
|
| 343 |
|:------:|:----:|:-------------:|:---------------:|
|
| 344 |
+
| 0.6536 | 100 | 2.7722 | 2.8215 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
|
| 346 |
|
| 347 |
### Framework Versions
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 437967672
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:06c9fc017185df78269a8c8628dd77893dca26eed157fbe3923e5e455085ecd2
|
| 3 |
size 437967672
|
runs/Feb06_08-23-57_90c6fdffb148/events.out.tfevents.1738830343.90c6fdffb148.17069.0
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5ab46833711dbb594ff00deef2d2cbfd942305b4f6d6d553c7c6aa7fd94971de
|
| 3 |
+
size 5254
|