Push model using huggingface_hub.
Browse files- README.md +19 -6
- model.safetensors +1 -1
- model_head.pkl +1 -1
- tokenizer.json +1 -3
README.md
CHANGED
|
@@ -109,12 +109,13 @@ preds = model("Have a good day!")
|
|
| 109 |
| 1 | 116 |
|
| 110 |
|
| 111 |
### Training Hyperparameters
|
| 112 |
-
- batch_size: (
|
| 113 |
-
- num_epochs: (
|
| 114 |
- max_steps: -1
|
| 115 |
- sampling_strategy: oversampling
|
| 116 |
-
-
|
| 117 |
-
-
|
|
|
|
| 118 |
- loss: CosineSimilarityLoss
|
| 119 |
- distance_metric: cosine_distance
|
| 120 |
- margin: 0.25
|
|
@@ -123,9 +124,8 @@ preds = model("Have a good day!")
|
|
| 123 |
- warmup_proportion: 0.1
|
| 124 |
- l2_weight: 0.01
|
| 125 |
- seed: 42
|
| 126 |
-
- evaluation_strategy: epoch
|
| 127 |
- eval_max_steps: -1
|
| 128 |
-
- load_best_model_at_end:
|
| 129 |
|
| 130 |
### Training Results
|
| 131 |
| Epoch | Step | Training Loss | Validation Loss |
|
|
@@ -133,6 +133,19 @@ preds = model("Have a good day!")
|
|
| 133 |
| 0.0009 | 1 | 0.3528 | - |
|
| 134 |
| 1.0 | 1068 | 0.0252 | 0.0729 |
|
| 135 |
| 2.0 | 2136 | 0.0001 | 0.0544 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
### Framework Versions
|
| 138 |
- Python: 3.10.12
|
|
|
|
| 109 |
| 1 | 116 |
|
| 110 |
|
| 111 |
### Training Hyperparameters
|
| 112 |
+
- batch_size: (16, 16)
|
| 113 |
+
- num_epochs: (1, 1)
|
| 114 |
- max_steps: -1
|
| 115 |
- sampling_strategy: oversampling
|
| 116 |
+
- num_iterations: 20
|
| 117 |
+
- body_learning_rate: (2e-05, 2e-05)
|
| 118 |
+
- head_learning_rate: 2e-05
|
| 119 |
- loss: CosineSimilarityLoss
|
| 120 |
- distance_metric: cosine_distance
|
| 121 |
- margin: 0.25
|
|
|
|
| 124 |
- warmup_proportion: 0.1
|
| 125 |
- l2_weight: 0.01
|
| 126 |
- seed: 42
|
|
|
|
| 127 |
- eval_max_steps: -1
|
| 128 |
+
- load_best_model_at_end: False
|
| 129 |
|
| 130 |
### Training Results
|
| 131 |
| Epoch | Step | Training Loss | Validation Loss |
|
|
|
|
| 133 |
| 0.0009 | 1 | 0.3528 | - |
|
| 134 |
| 1.0 | 1068 | 0.0252 | 0.0729 |
|
| 135 |
| 2.0 | 2136 | 0.0001 | 0.0544 |
|
| 136 |
+
| 0.0015 | 1 | 0.0 | - |
|
| 137 |
+
| 0.0772 | 50 | 0.001 | - |
|
| 138 |
+
| 0.1543 | 100 | 0.0 | - |
|
| 139 |
+
| 0.2315 | 150 | 0.0 | - |
|
| 140 |
+
| 0.3086 | 200 | 0.0 | - |
|
| 141 |
+
| 0.3858 | 250 | 0.0015 | - |
|
| 142 |
+
| 0.4630 | 300 | 0.001 | - |
|
| 143 |
+
| 0.5401 | 350 | 0.0 | - |
|
| 144 |
+
| 0.6173 | 400 | 0.0 | - |
|
| 145 |
+
| 0.6944 | 450 | 0.0 | - |
|
| 146 |
+
| 0.7716 | 500 | 0.0 | - |
|
| 147 |
+
| 0.8488 | 550 | 0.0 | - |
|
| 148 |
+
| 0.9259 | 600 | 0.0 | - |
|
| 149 |
|
| 150 |
### Framework Versions
|
| 151 |
- Python: 3.10.12
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 98453640
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:54f3c99a0285cbcae6e28b4efd74e0bc16c3464208d28b24982dadb693780bd3
|
| 3 |
size 98453640
|
model_head.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4761c7f1047edcbea61dab5f490686b765a055af9c42336c6b54f555de2a4da1
|
| 3 |
size 5611
|
tokenizer.json
CHANGED
|
@@ -7,9 +7,7 @@
|
|
| 7 |
"stride": 0
|
| 8 |
},
|
| 9 |
"padding": {
|
| 10 |
-
"strategy":
|
| 11 |
-
"Fixed": 512
|
| 12 |
-
},
|
| 13 |
"direction": "Right",
|
| 14 |
"pad_to_multiple_of": null,
|
| 15 |
"pad_id": 0,
|
|
|
|
| 7 |
"stride": 0
|
| 8 |
},
|
| 9 |
"padding": {
|
| 10 |
+
"strategy": "BatchLongest",
|
|
|
|
|
|
|
| 11 |
"direction": "Right",
|
| 12 |
"pad_to_multiple_of": null,
|
| 13 |
"pad_id": 0,
|