nickprock commited on
Commit
bc5a75f
·
verified ·
1 Parent(s): 33b0a67

Upgrade to V5: Matryoshka Loss with Asymmetric Weights [1.0, 0.4, 0.2, 0.2]. Minimized truncation tax at 768d.

Browse files
Files changed (3) hide show
  1. README.md +97 -130
  2. model.safetensors +1 -1
  3. tokenizer_config.json +1 -1
README.md CHANGED
@@ -147,49 +147,49 @@ model-index:
147
  type: retrieval-768d
148
  metrics:
149
  - type: cosine_accuracy@1
150
- value: 0.7652938824470212
151
  name: Cosine Accuracy@1
152
  - type: cosine_accuracy@3
153
- value: 0.9052379048380648
154
  name: Cosine Accuracy@3
155
  - type: cosine_accuracy@5
156
- value: 0.9384246301479409
157
  name: Cosine Accuracy@5
158
  - type: cosine_accuracy@10
159
- value: 0.9616153538584566
160
  name: Cosine Accuracy@10
161
  - type: cosine_precision@1
162
- value: 0.7652938824470212
163
  name: Cosine Precision@1
164
  - type: cosine_precision@3
165
- value: 0.30174596827935496
166
  name: Cosine Precision@3
167
  - type: cosine_precision@5
168
- value: 0.18768492602958814
169
  name: Cosine Precision@5
170
  - type: cosine_precision@10
171
- value: 0.09616153538584567
172
  name: Cosine Precision@10
173
  - type: cosine_recall@1
174
- value: 0.7652938824470212
175
  name: Cosine Recall@1
176
  - type: cosine_recall@3
177
- value: 0.9052379048380648
178
  name: Cosine Recall@3
179
  - type: cosine_recall@5
180
- value: 0.9384246301479409
181
  name: Cosine Recall@5
182
  - type: cosine_recall@10
183
- value: 0.9616153538584566
184
  name: Cosine Recall@10
185
  - type: cosine_ndcg@10
186
- value: 0.8709655191496608
187
  name: Cosine Ndcg@10
188
  - type: cosine_mrr@10
189
- value: 0.8411203772459269
190
  name: Cosine Mrr@10
191
  - type: cosine_map@100
192
- value: 0.8426922800593073
193
  name: Cosine Map@100
194
  - task:
195
  type: information-retrieval
@@ -199,49 +199,49 @@ model-index:
199
  type: retrieval-128d
200
  metrics:
201
  - type: cosine_accuracy@1
202
- value: 0.7153138744502199
203
  name: Cosine Accuracy@1
204
  - type: cosine_accuracy@3
205
- value: 0.8692522990803678
206
  name: Cosine Accuracy@3
207
  - type: cosine_accuracy@5
208
- value: 0.9016393442622951
209
  name: Cosine Accuracy@5
210
  - type: cosine_accuracy@10
211
- value: 0.9412235105957617
212
  name: Cosine Accuracy@10
213
  - type: cosine_precision@1
214
- value: 0.7153138744502199
215
  name: Cosine Precision@1
216
  - type: cosine_precision@3
217
- value: 0.2897507663601226
218
  name: Cosine Precision@3
219
  - type: cosine_precision@5
220
- value: 0.18032786885245902
221
  name: Cosine Precision@5
222
  - type: cosine_precision@10
223
- value: 0.09412235105957616
224
  name: Cosine Precision@10
225
  - type: cosine_recall@1
226
- value: 0.7153138744502199
227
  name: Cosine Recall@1
228
  - type: cosine_recall@3
229
- value: 0.8692522990803678
230
  name: Cosine Recall@3
231
  - type: cosine_recall@5
232
- value: 0.9016393442622951
233
  name: Cosine Recall@5
234
  - type: cosine_recall@10
235
- value: 0.9412235105957617
236
  name: Cosine Recall@10
237
  - type: cosine_ndcg@10
238
- value: 0.833440978290933
239
  name: Cosine Ndcg@10
240
  - type: cosine_mrr@10
241
- value: 0.7984015917442536
242
  name: Cosine Mrr@10
243
  - type: cosine_map@100
244
- value: 0.8004509036706192
245
  name: Cosine Map@100
246
  - task:
247
  type: semantic-similarity
@@ -251,10 +251,10 @@ model-index:
251
  type: sts-dev
252
  metrics:
253
  - type: pearson_cosine
254
- value: 0.8576299147217115
255
  name: Pearson Cosine
256
  - type: spearman_cosine
257
- value: 0.8550707245646871
258
  name: Spearman Cosine
259
  ---
260
 
@@ -320,9 +320,9 @@ print(embeddings.shape)
320
  # Get the similarity scores for the embeddings
321
  similarities = model.similarity(embeddings, embeddings)
322
  print(similarities)
323
- # tensor([[ 1.0000, 0.3790, 0.0013],
324
- # [ 0.3790, 1.0000, -0.0093],
325
- # [ 0.0013, -0.0093, 1.0000]])
326
  ```
327
 
328
  <!--
@@ -363,23 +363,23 @@ You can finetune this model on your own dataset.
363
  }
364
  ```
365
 
366
- | Metric | Value |
367
- |:--------------------|:----------|
368
- | cosine_accuracy@1 | 0.7653 |
369
- | cosine_accuracy@3 | 0.9052 |
370
- | cosine_accuracy@5 | 0.9384 |
371
- | cosine_accuracy@10 | 0.9616 |
372
- | cosine_precision@1 | 0.7653 |
373
- | cosine_precision@3 | 0.3017 |
374
- | cosine_precision@5 | 0.1877 |
375
- | cosine_precision@10 | 0.0962 |
376
- | cosine_recall@1 | 0.7653 |
377
- | cosine_recall@3 | 0.9052 |
378
- | cosine_recall@5 | 0.9384 |
379
- | cosine_recall@10 | 0.9616 |
380
- | **cosine_ndcg@10** | **0.871** |
381
- | cosine_mrr@10 | 0.8411 |
382
- | cosine_map@100 | 0.8427 |
383
 
384
  #### Information Retrieval
385
 
@@ -393,21 +393,21 @@ You can finetune this model on your own dataset.
393
 
394
  | Metric | Value |
395
  |:--------------------|:-----------|
396
- | cosine_accuracy@1 | 0.7153 |
397
- | cosine_accuracy@3 | 0.8693 |
398
- | cosine_accuracy@5 | 0.9016 |
399
- | cosine_accuracy@10 | 0.9412 |
400
- | cosine_precision@1 | 0.7153 |
401
- | cosine_precision@3 | 0.2898 |
402
- | cosine_precision@5 | 0.1803 |
403
- | cosine_precision@10 | 0.0941 |
404
- | cosine_recall@1 | 0.7153 |
405
- | cosine_recall@3 | 0.8693 |
406
- | cosine_recall@5 | 0.9016 |
407
- | cosine_recall@10 | 0.9412 |
408
- | **cosine_ndcg@10** | **0.8334** |
409
- | cosine_mrr@10 | 0.7984 |
410
- | cosine_map@100 | 0.8005 |
411
 
412
  #### Semantic Similarity
413
 
@@ -416,8 +416,8 @@ You can finetune this model on your own dataset.
416
 
417
  | Metric | Value |
418
  |:--------------------|:-----------|
419
- | pearson_cosine | 0.8576 |
420
- | **spearman_cosine** | **0.8551** |
421
 
422
  <!--
423
  ## Bias, Risks and Limitations
@@ -700,67 +700,34 @@ You can finetune this model on your own dataset.
700
  </details>
701
 
702
  ### Training Logs
703
- | Epoch | Step | Training Loss | task retrieval loss | task sts loss | retrieval-768d_cosine_ndcg@10 | retrieval-128d_cosine_ndcg@10 | sts-dev_spearman_cosine |
704
- |:----------:|:-------:|:-------------:|:-------------------:|:-------------:|:-----------------------------:|:-----------------------------:|:-----------------------:|
705
- | 0.0702 | 50 | 7.7958 | - | - | - | - | - |
706
- | 0.1404 | 100 | 4.5273 | - | - | - | - | - |
707
- | 0.2107 | 150 | 8.7004 | - | - | - | - | - |
708
- | 0.2809 | 200 | 5.6620 | - | - | - | - | - |
709
- | **0.3511** | **250** | **7.3535** | **0.0642** | **18.3673** | **0.871** | **0.8334** | **0.854** |
710
- | 0.4213 | 300 | 6.3456 | - | - | - | - | - |
711
- | 0.4916 | 350 | 6.5450 | - | - | - | - | - |
712
- | 0.5618 | 400 | 8.1323 | - | - | - | - | - |
713
- | 0.6320 | 450 | 6.1999 | - | - | - | - | - |
714
- | 0.7022 | 500 | 5.9058 | 0.0577 | 18.5753 | 0.8682 | 0.8372 | 0.8538 |
715
- | 0.7725 | 550 | 6.4255 | - | - | - | - | - |
716
- | 0.8427 | 600 | 6.7009 | - | - | - | - | - |
717
- | 0.9129 | 650 | 6.3682 | - | - | - | - | - |
718
- | 0.9831 | 700 | 7.1500 | - | - | - | - | - |
719
- | 1.0534 | 750 | 6.7907 | 0.0550 | 18.5580 | 0.8681 | 0.8333 | 0.8552 |
720
- | 1.1236 | 800 | 5.2997 | - | - | - | - | - |
721
- | 1.1938 | 850 | 6.0822 | - | - | - | - | - |
722
- | 1.2640 | 900 | 6.5435 | - | - | - | - | - |
723
- | 1.3343 | 950 | 7.0916 | - | - | - | - | - |
724
- | 1.4045 | 1000 | 5.9986 | 0.0554 | 18.7416 | 0.8671 | 0.8354 | 0.8550 |
725
- | 1.4747 | 1050 | 5.5105 | - | - | - | - | - |
726
- | 1.5449 | 1100 | 7.5051 | - | - | - | - | - |
727
- | 1.6152 | 1150 | 7.0109 | - | - | - | - | - |
728
- | 1.6854 | 1200 | 5.2789 | - | - | - | - | - |
729
- | 1.7556 | 1250 | 6.0140 | 0.0540 | 18.7505 | 0.8692 | 0.8387 | 0.8549 |
730
- | 1.8258 | 1300 | 6.2669 | - | - | - | - | - |
731
- | 1.8961 | 1350 | 6.2215 | - | - | - | - | - |
732
- | 1.9663 | 1400 | 7.6712 | - | - | - | - | - |
733
- | 2.0365 | 1450 | 6.1573 | - | - | - | - | - |
734
- | 2.1067 | 1500 | 6.4583 | 0.0542 | 18.8299 | 0.8653 | 0.8359 | 0.8555 |
735
- | 2.1770 | 1550 | 7.1814 | - | - | - | - | - |
736
- | 2.2472 | 1600 | 5.9135 | - | - | - | - | - |
737
- | 2.3174 | 1650 | 6.2025 | - | - | - | - | - |
738
- | 2.3876 | 1700 | 4.9456 | - | - | - | - | - |
739
- | 2.4579 | 1750 | 6.1588 | 0.0547 | 18.9144 | 0.8650 | 0.8357 | 0.8551 |
740
- | 2.5281 | 1800 | 7.6150 | - | - | - | - | - |
741
- | 2.5983 | 1850 | 6.2019 | - | - | - | - | - |
742
- | 2.6685 | 1900 | 5.9106 | - | - | - | - | - |
743
- | 2.7388 | 1950 | 5.4257 | - | - | - | - | - |
744
- | 2.8090 | 2000 | 5.6597 | 0.0523 | 19.0004 | 0.8657 | 0.8361 | 0.8546 |
745
- | 2.8792 | 2050 | 5.9472 | - | - | - | - | - |
746
- | 2.9494 | 2100 | 5.6624 | - | - | - | - | - |
747
- | 3.0197 | 2150 | 7.7736 | - | - | - | - | - |
748
- | 3.0899 | 2200 | 6.6527 | - | - | - | - | - |
749
- | 3.1601 | 2250 | 5.9107 | 0.0531 | 18.9516 | 0.8664 | 0.8373 | 0.8551 |
750
- | 3.2303 | 2300 | 6.1335 | - | - | - | - | - |
751
- | 3.3006 | 2350 | 5.4157 | - | - | - | - | - |
752
- | 3.3708 | 2400 | 7.3402 | - | - | - | - | - |
753
- | 3.4410 | 2450 | 4.6722 | - | - | - | - | - |
754
- | 3.5112 | 2500 | 7.1186 | 0.0530 | 18.9883 | 0.8652 | 0.8356 | 0.8551 |
755
- | 3.5815 | 2550 | 6.3746 | - | - | - | - | - |
756
- | 3.6517 | 2600 | 3.9370 | - | - | - | - | - |
757
- | 3.7219 | 2650 | 8.1087 | - | - | - | - | - |
758
- | 3.7921 | 2700 | 4.8976 | - | - | - | - | - |
759
- | 3.8624 | 2750 | 6.1367 | 0.0527 | 19.0004 | 0.8657 | 0.8372 | 0.8551 |
760
- | 3.9326 | 2800 | 6.6133 | - | - | - | - | - |
761
- | -1 | -1 | - | - | - | 0.8710 | 0.8334 | - |
762
-
763
- * The bold row denotes the saved checkpoint.
764
 
765
  ### Framework Versions
766
  - Python: 3.10.19
 
147
  type: retrieval-768d
148
  metrics:
149
  - type: cosine_accuracy@1
150
+ value: 0.7644942023190724
151
  name: Cosine Accuracy@1
152
  - type: cosine_accuracy@3
153
+ value: 0.9048380647740903
154
  name: Cosine Accuracy@3
155
  - type: cosine_accuracy@5
156
+ value: 0.9356257497001199
157
  name: Cosine Accuracy@5
158
  - type: cosine_accuracy@10
159
+ value: 0.9584166333466614
160
  name: Cosine Accuracy@10
161
  - type: cosine_precision@1
162
+ value: 0.7644942023190724
163
  name: Cosine Precision@1
164
  - type: cosine_precision@3
165
+ value: 0.3016126882580301
166
  name: Cosine Precision@3
167
  - type: cosine_precision@5
168
+ value: 0.187125149940024
169
  name: Cosine Precision@5
170
  - type: cosine_precision@10
171
+ value: 0.09584166333466614
172
  name: Cosine Precision@10
173
  - type: cosine_recall@1
174
+ value: 0.7644942023190724
175
  name: Cosine Recall@1
176
  - type: cosine_recall@3
177
+ value: 0.9048380647740903
178
  name: Cosine Recall@3
179
  - type: cosine_recall@5
180
+ value: 0.9356257497001199
181
  name: Cosine Recall@5
182
  - type: cosine_recall@10
183
+ value: 0.9584166333466614
184
  name: Cosine Recall@10
185
  - type: cosine_ndcg@10
186
+ value: 0.8691915140008164
187
  name: Cosine Ndcg@10
188
  - type: cosine_mrr@10
189
+ value: 0.8397337890240723
190
  name: Cosine Mrr@10
191
  - type: cosine_map@100
192
+ value: 0.8415948069155025
193
  name: Cosine Map@100
194
  - task:
195
  type: information-retrieval
 
199
  type: retrieval-128d
200
  metrics:
201
  - type: cosine_accuracy@1
202
+ value: 0.728108756497401
203
  name: Cosine Accuracy@1
204
  - type: cosine_accuracy@3
205
+ value: 0.8704518192722911
206
  name: Cosine Accuracy@3
207
  - type: cosine_accuracy@5
208
+ value: 0.90843662534986
209
  name: Cosine Accuracy@5
210
  - type: cosine_accuracy@10
211
+ value: 0.9384246301479409
212
  name: Cosine Accuracy@10
213
  - type: cosine_precision@1
214
+ value: 0.728108756497401
215
  name: Cosine Precision@1
216
  - type: cosine_precision@3
217
+ value: 0.29015060642409707
218
  name: Cosine Precision@3
219
  - type: cosine_precision@5
220
+ value: 0.181687325069972
221
  name: Cosine Precision@5
222
  - type: cosine_precision@10
223
+ value: 0.09384246301479407
224
  name: Cosine Precision@10
225
  - type: cosine_recall@1
226
+ value: 0.728108756497401
227
  name: Cosine Recall@1
228
  - type: cosine_recall@3
229
+ value: 0.8704518192722911
230
  name: Cosine Recall@3
231
  - type: cosine_recall@5
232
+ value: 0.90843662534986
233
  name: Cosine Recall@5
234
  - type: cosine_recall@10
235
+ value: 0.9384246301479409
236
  name: Cosine Recall@10
237
  - type: cosine_ndcg@10
238
+ value: 0.8386806267277799
239
  name: Cosine Ndcg@10
240
  - type: cosine_mrr@10
241
+ value: 0.8061331023146291
242
  name: Cosine Mrr@10
243
  - type: cosine_map@100
244
+ value: 0.8085397150284467
245
  name: Cosine Map@100
246
  - task:
247
  type: semantic-similarity
 
251
  type: sts-dev
252
  metrics:
253
  - type: pearson_cosine
254
+ value: 0.8573759140495629
255
  name: Pearson Cosine
256
  - type: spearman_cosine
257
+ value: 0.8548722155310733
258
  name: Spearman Cosine
259
  ---
260
 
 
320
  # Get the similarity scores for the embeddings
321
  similarities = model.similarity(embeddings, embeddings)
322
  print(similarities)
323
+ # tensor([[ 1.0000, 0.3644, 0.0120],
324
+ # [ 0.3644, 1.0000, -0.0001],
325
+ # [ 0.0120, -0.0001, 1.0000]])
326
  ```
327
 
328
  <!--
 
363
  }
364
  ```
365
 
366
+ | Metric | Value |
367
+ |:--------------------|:-----------|
368
+ | cosine_accuracy@1 | 0.7645 |
369
+ | cosine_accuracy@3 | 0.9048 |
370
+ | cosine_accuracy@5 | 0.9356 |
371
+ | cosine_accuracy@10 | 0.9584 |
372
+ | cosine_precision@1 | 0.7645 |
373
+ | cosine_precision@3 | 0.3016 |
374
+ | cosine_precision@5 | 0.1871 |
375
+ | cosine_precision@10 | 0.0958 |
376
+ | cosine_recall@1 | 0.7645 |
377
+ | cosine_recall@3 | 0.9048 |
378
+ | cosine_recall@5 | 0.9356 |
379
+ | cosine_recall@10 | 0.9584 |
380
+ | **cosine_ndcg@10** | **0.8692** |
381
+ | cosine_mrr@10 | 0.8397 |
382
+ | cosine_map@100 | 0.8416 |
383
 
384
  #### Information Retrieval
385
 
 
393
 
394
  | Metric | Value |
395
  |:--------------------|:-----------|
396
+ | cosine_accuracy@1 | 0.7281 |
397
+ | cosine_accuracy@3 | 0.8705 |
398
+ | cosine_accuracy@5 | 0.9084 |
399
+ | cosine_accuracy@10 | 0.9384 |
400
+ | cosine_precision@1 | 0.7281 |
401
+ | cosine_precision@3 | 0.2902 |
402
+ | cosine_precision@5 | 0.1817 |
403
+ | cosine_precision@10 | 0.0938 |
404
+ | cosine_recall@1 | 0.7281 |
405
+ | cosine_recall@3 | 0.8705 |
406
+ | cosine_recall@5 | 0.9084 |
407
+ | cosine_recall@10 | 0.9384 |
408
+ | **cosine_ndcg@10** | **0.8387** |
409
+ | cosine_mrr@10 | 0.8061 |
410
+ | cosine_map@100 | 0.8085 |
411
 
412
  #### Semantic Similarity
413
 
 
416
 
417
  | Metric | Value |
418
  |:--------------------|:-----------|
419
+ | pearson_cosine | 0.8574 |
420
+ | **spearman_cosine** | **0.8549** |
421
 
422
  <!--
423
  ## Bias, Risks and Limitations
 
700
  </details>
701
 
702
  ### Training Logs
703
+ | Epoch | Step | Training Loss | task retrieval loss | task sts loss | retrieval-768d_cosine_ndcg@10 | retrieval-128d_cosine_ndcg@10 | sts-dev_spearman_cosine |
704
+ |:------:|:----:|:-------------:|:-------------------:|:-------------:|:-----------------------------:|:-----------------------------:|:-----------------------:|
705
+ | 0.0702 | 50 | 7.7958 | - | - | - | - | - |
706
+ | 0.1404 | 100 | 4.5273 | - | - | - | - | - |
707
+ | 0.2107 | 150 | 8.7004 | - | - | - | - | - |
708
+ | 0.2809 | 200 | 5.6620 | - | - | - | - | - |
709
+ | 0.3511 | 250 | 7.3535 | 0.0642 | 18.3673 | 0.8710 | 0.8334 | 0.8540 |
710
+ | 0.4213 | 300 | 6.3456 | - | - | - | - | - |
711
+ | 0.4916 | 350 | 6.5450 | - | - | - | - | - |
712
+ | 0.5618 | 400 | 8.1323 | - | - | - | - | - |
713
+ | 0.6320 | 450 | 6.1999 | - | - | - | - | - |
714
+ | 0.7022 | 500 | 5.9058 | 0.0577 | 18.5753 | 0.8682 | 0.8372 | 0.8538 |
715
+ | 0.7725 | 550 | 6.4255 | - | - | - | - | - |
716
+ | 0.8427 | 600 | 6.7009 | - | - | - | - | - |
717
+ | 0.9129 | 650 | 6.3682 | - | - | - | - | - |
718
+ | 0.9831 | 700 | 7.1500 | - | - | - | - | - |
719
+ | 1.0534 | 750 | 6.7907 | 0.0550 | 18.5580 | 0.8681 | 0.8333 | 0.8552 |
720
+ | 1.1236 | 800 | 5.2997 | - | - | - | - | - |
721
+ | 1.1938 | 850 | 6.0822 | - | - | - | - | - |
722
+ | 1.2640 | 900 | 6.5435 | - | - | - | - | - |
723
+ | 1.3343 | 950 | 7.0916 | - | - | - | - | - |
724
+ | 1.4045 | 1000 | 5.9986 | 0.0554 | 18.7416 | 0.8671 | 0.8354 | 0.8550 |
725
+ | 1.4747 | 1050 | 5.5105 | - | - | - | - | - |
726
+ | 1.5449 | 1100 | 7.5051 | - | - | - | - | - |
727
+ | 1.6152 | 1150 | 7.0109 | - | - | - | - | - |
728
+ | 1.6854 | 1200 | 5.2789 | - | - | - | - | - |
729
+ | 1.7556 | 1250 | 6.0140 | 0.0540 | 18.7505 | 0.8692 | 0.8387 | 0.8549 |
730
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
731
 
732
  ### Framework Versions
733
  - Python: 3.10.19
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a0d90423b353949906bb4ce52f1da3786de0b1a65e94a37bc626f127153798a
3
  size 270316376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fe23c0df28a1ed4660cd91498b784c5fd79d6616fceab324377387e2a83403d
3
  size 270316376
tokenizer_config.json CHANGED
@@ -5,7 +5,7 @@
5
  "do_basic_tokenize": true,
6
  "do_lower_case": true,
7
  "full_tokenizer_file": null,
8
- "is_local": false,
9
  "mask_token": "[MASK]",
10
  "max_len": 512,
11
  "max_length": 512,
 
5
  "do_basic_tokenize": true,
6
  "do_lower_case": true,
7
  "full_tokenizer_file": null,
8
+ "is_local": true,
9
  "mask_token": "[MASK]",
10
  "max_len": 512,
11
  "max_length": 512,