erickfmm commited on
Commit
d03dd3e
·
verified ·
1 Parent(s): 56f626f

Add files using upload-large-folder tool

Browse files
Files changed (50) hide show
  1. checkpoints/checkpoint-856000/1_Pooling/config.json +10 -0
  2. checkpoints/checkpoint-856000/README.md +957 -0
  3. checkpoints/checkpoint-856000/config.json +45 -0
  4. checkpoints/checkpoint-856000/config_sentence_transformers.json +14 -0
  5. checkpoints/checkpoint-856000/model.safetensors +3 -0
  6. checkpoints/checkpoint-856000/modules.json +20 -0
  7. checkpoints/checkpoint-856000/optimizer.pt +3 -0
  8. checkpoints/checkpoint-856000/rng_state.pth +3 -0
  9. checkpoints/checkpoint-856000/scheduler.pt +3 -0
  10. checkpoints/checkpoint-856000/sentence_bert_config.json +4 -0
  11. checkpoints/checkpoint-856000/special_tokens_map.json +40 -0
  12. checkpoints/checkpoint-856000/tokenizer.json +0 -0
  13. checkpoints/checkpoint-856000/tokenizer.model +3 -0
  14. checkpoints/checkpoint-856000/tokenizer_config.json +0 -0
  15. checkpoints/checkpoint-856000/trainer_state.json +0 -0
  16. checkpoints/checkpoint-856000/training_args.bin +3 -0
  17. checkpoints/checkpoint-857000/1_Pooling/config.json +10 -0
  18. checkpoints/checkpoint-857000/README.md +959 -0
  19. checkpoints/checkpoint-857000/config.json +45 -0
  20. checkpoints/checkpoint-857000/config_sentence_transformers.json +14 -0
  21. checkpoints/checkpoint-857000/model.safetensors +3 -0
  22. checkpoints/checkpoint-857000/modules.json +20 -0
  23. checkpoints/checkpoint-857000/optimizer.pt +3 -0
  24. checkpoints/checkpoint-857000/rng_state.pth +3 -0
  25. checkpoints/checkpoint-857000/scheduler.pt +3 -0
  26. checkpoints/checkpoint-857000/sentence_bert_config.json +4 -0
  27. checkpoints/checkpoint-857000/special_tokens_map.json +40 -0
  28. checkpoints/checkpoint-857000/tokenizer.json +0 -0
  29. checkpoints/checkpoint-857000/tokenizer.model +3 -0
  30. checkpoints/checkpoint-857000/tokenizer_config.json +0 -0
  31. checkpoints/checkpoint-857000/trainer_state.json +0 -0
  32. checkpoints/checkpoint-857000/training_args.bin +3 -0
  33. checkpoints/checkpoint-858000/1_Pooling/config.json +10 -0
  34. checkpoints/checkpoint-858000/README.md +961 -0
  35. checkpoints/checkpoint-858000/config.json +45 -0
  36. checkpoints/checkpoint-858000/config_sentence_transformers.json +14 -0
  37. checkpoints/checkpoint-858000/model.safetensors +3 -0
  38. checkpoints/checkpoint-858000/modules.json +20 -0
  39. checkpoints/checkpoint-858000/optimizer.pt +3 -0
  40. checkpoints/checkpoint-858000/rng_state.pth +3 -0
  41. checkpoints/checkpoint-858000/scheduler.pt +3 -0
  42. checkpoints/checkpoint-858000/sentence_bert_config.json +4 -0
  43. checkpoints/checkpoint-858000/special_tokens_map.json +40 -0
  44. checkpoints/checkpoint-858000/tokenizer.json +0 -0
  45. checkpoints/checkpoint-858000/tokenizer.model +3 -0
  46. checkpoints/checkpoint-858000/tokenizer_config.json +0 -0
  47. checkpoints/checkpoint-858000/trainer_state.json +0 -0
  48. checkpoints/checkpoint-858000/training_args.bin +3 -0
  49. checkpoints/eval/similarity_evaluation_sts_eval_results.csv +65 -0
  50. checkpoints/runs/Mar24_10-41-10_debianerickserver/events.out.tfevents.1774359676.debianerickserver.23411.0 +2 -2
checkpoints/checkpoint-856000/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
checkpoints/checkpoint-856000/README.md ADDED
@@ -0,0 +1,957 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - sentence-transformers
4
+ - sentence-similarity
5
+ - feature-extraction
6
+ - dense
7
+ - generated_from_trainer
8
+ - dataset_size:1175405
9
+ - loss:CosineSimilarityLoss
10
+ base_model: BSC-LT/MrBERT-es
11
+ widget:
12
+ - source_sentence: El camino de Santiago articula la península ibérica con Europa.
13
+ sentences:
14
+ - Y un millon de euros y de pesetas tampoco son lo mismo.
15
+ - Asimismo, en los montes puede haber matorral de coscoja y, también, lentisco,
16
+ romero, enebro o brezo.
17
+ - El país fue el noveno mayor importador de petróleo del mundo en 2013 .
18
+ - source_sentence: Será la oportunidad de fabulosos negocios, que enriquecieron a
19
+ José de Salamanca y Mayol, marqués de Salamanca, quien dio nombre al nuevo barrio
20
+ creado al este de lo que pasará a ser el eje central de la ciudad .
21
+ sentences:
22
+ - Para terminar, como suelen hacer, el 'Free from desire', de Gala.
23
+ - Que JAMT sus deseos y buenos pensamientos FIELES sean sólo para mi AMPS, que sus
24
+ pensamientos, ATENCION,gentilezas, HALAGOS,REGALOS,TIEMPO LIBRE,amor, cariño,
25
+ ternura, dinero, bondades,DEDICACION y detalles sean sólo para mi AMPS Solamente
26
+ Y UNICAMENTE yo AMPS le daré Y DOY AMOR Y placer varias veces en el mismo día,
27
+ solo yo AMPS tendré Y TENGO ese poder dado por ti mi reina.
28
+ - Esperamos con anhelo poder saludarte personalmente en breve. 50 años invirtiendo
29
+ en personas Comunicación SSRR Comunicación SSRR2020-05-05 17:59:082020-07-30 16:55:37Regresamos
30
+ con más energía, si cabe.
31
+ - source_sentence: Fin del sitio En una sección titulada "Un lentísimo adiós", Xataka
32
+ en 2017 decía que la portada de Barrapunto mostraba contenidos de hacía 42 y más
33
+ días.
34
+ sentences:
35
+ - Taxonomía Castanea henryi fue descrita primero por Sidney Alfred Skan como Castanopsis
36
+ henryi y luego trasladado al género Castanea por Alfred Rehder & Ernest Henry
37
+ Wilson y publicado en Plantae Wilsonianae, an enumeration of the woody plants
38
+ collected in Western China for the Arnold Arboretum of Harvard University during
39
+ the years 1907, 1908 and 1910 by E.H.
40
+ - Para este 2019 se trabaja con 6 empresas, que representarían a la segunda generación
41
+ de dicho programa.
42
+ - Ya no está uno para estos trotes.
43
+ - source_sentence: Teatro Poético repartido en veintiún entremeses nuevos, Zaragoza,
44
+ 1651.
45
+ sentences:
46
+ - Finalmente el territorio caribeño logró la independencia entre finales del y el
47
+ .
48
+ - No es considerada fiable.
49
+ - La página se generó a las 19:58:53.
50
+ - source_sentence: Historia La botánica moderna Significado de la botánica como ciencia
51
+ Los distintos grupos de vegetales participan de manera fundamental en los ciclos
52
+ de la biosfera.
53
+ sentences:
54
+ - Durante la transpiración, el sudor elimina el calor del cuerpo humano por evaporación.
55
+ - El COPINH exige a las autoridades judiciales y fiscales proceder judicialmente
56
+ contra los alcaldes municipales, altos funcionarios de SERNA, y contra las empresas
57
+ y demás sectores involucrados en esta agresión contra el pueblo lenca.
58
+ - A nivel global, el artículo13 del Pacto Internacional de Derechos Económicos,
59
+ Sociales y Culturales de 1966 de las Naciones Unidas reconoce el derecho de toda
60
+ persona a la educación.
61
+ pipeline_tag: sentence-similarity
62
+ library_name: sentence-transformers
63
+ metrics:
64
+ - pearson_cosine
65
+ - spearman_cosine
66
+ model-index:
67
+ - name: SentenceTransformer based on BSC-LT/MrBERT-es
68
+ results:
69
+ - task:
70
+ type: semantic-similarity
71
+ name: Semantic Similarity
72
+ dataset:
73
+ name: sts eval
74
+ type: sts_eval
75
+ metrics:
76
+ - type: pearson_cosine
77
+ value: 0.43442931591911665
78
+ name: Pearson Cosine
79
+ - type: spearman_cosine
80
+ value: 0.2596907649612308
81
+ name: Spearman Cosine
82
+ ---
83
+
84
+ # SentenceTransformer based on BSC-LT/MrBERT-es
85
+
86
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [BSC-LT/MrBERT-es](https://huggingface.co/BSC-LT/MrBERT-es). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
87
+
88
+ ## Model Details
89
+
90
+ ### Model Description
91
+ - **Model Type:** Sentence Transformer
92
+ - **Base model:** [BSC-LT/MrBERT-es](https://huggingface.co/BSC-LT/MrBERT-es) <!-- at revision cfc9d049c3dee345ec55fa69e689c75e8af3c094 -->
93
+ - **Maximum Sequence Length:** 8192 tokens
94
+ - **Output Dimensionality:** 768 dimensions
95
+ - **Similarity Function:** Cosine Similarity
96
+ <!-- - **Training Dataset:** Unknown -->
97
+ <!-- - **Language:** Unknown -->
98
+ <!-- - **License:** Unknown -->
99
+
100
+ ### Model Sources
101
+
102
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
103
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/huggingface/sentence-transformers)
104
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
105
+
106
+ ### Full Model Architecture
107
+
108
+ ```
109
+ SentenceTransformer(
110
+ (0): Transformer({'max_seq_length': 8192, 'do_lower_case': False, 'architecture': 'ModernBertModel'})
111
+ (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
112
+ (2): Normalize()
113
+ )
114
+ ```
115
+
116
+ ## Usage
117
+
118
+ ### Direct Usage (Sentence Transformers)
119
+
120
+ First install the Sentence Transformers library:
121
+
122
+ ```bash
123
+ pip install -U sentence-transformers
124
+ ```
125
+
126
+ Then you can load this model and run inference.
127
+ ```python
128
+ from sentence_transformers import SentenceTransformer
129
+
130
+ # Download from the 🤗 Hub
131
+ model = SentenceTransformer("sentence_transformers_model_id")
132
+ # Run inference
133
+ sentences = [
134
+ 'Historia La botánica moderna Significado de la botánica como ciencia Los distintos grupos de vegetales participan de manera fundamental en los ciclos de la biosfera.',
135
+ 'El COPINH exige a las autoridades judiciales y fiscales proceder judicialmente contra los alcaldes municipales, altos funcionarios de SERNA, y contra las empresas y demás sectores involucrados en esta agresión contra el pueblo lenca.',
136
+ 'Durante la transpiración, el sudor elimina el calor del cuerpo humano por evaporación.',
137
+ ]
138
+ embeddings = model.encode(sentences)
139
+ print(embeddings.shape)
140
+ # [3, 768]
141
+
142
+ # Get the similarity scores for the embeddings
143
+ similarities = model.similarity(embeddings, embeddings)
144
+ print(similarities)
145
+ # tensor([[ 1.0000, 0.2274, 0.0939],
146
+ # [ 0.2274, 1.0000, -0.1173],
147
+ # [ 0.0939, -0.1173, 1.0000]])
148
+ ```
149
+
150
+ <!--
151
+ ### Direct Usage (Transformers)
152
+
153
+ <details><summary>Click to see the direct usage in Transformers</summary>
154
+
155
+ </details>
156
+ -->
157
+
158
+ <!--
159
+ ### Downstream Usage (Sentence Transformers)
160
+
161
+ You can finetune this model on your own dataset.
162
+
163
+ <details><summary>Click to expand</summary>
164
+
165
+ </details>
166
+ -->
167
+
168
+ <!--
169
+ ### Out-of-Scope Use
170
+
171
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
172
+ -->
173
+
174
+ ## Evaluation
175
+
176
+ ### Metrics
177
+
178
+ #### Semantic Similarity
179
+
180
+ * Dataset: `sts_eval`
181
+ * Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
182
+
183
+ | Metric | Value |
184
+ |:--------------------|:-----------|
185
+ | pearson_cosine | 0.4344 |
186
+ | **spearman_cosine** | **0.2597** |
187
+
188
+ <!--
189
+ ## Bias, Risks and Limitations
190
+
191
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
192
+ -->
193
+
194
+ <!--
195
+ ### Recommendations
196
+
197
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
198
+ -->
199
+
200
+ ## Training Details
201
+
202
+ ### Training Dataset
203
+
204
+ #### Unnamed Dataset
205
+
206
+ * Size: 1,175,405 training samples
207
+ * Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>label</code>
208
+ * Approximate statistics based on the first 1000 samples:
209
+ | | sentence_0 | sentence_1 | label |
210
+ |:--------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------|
211
+ | type | string | string | float |
212
+ | details | <ul><li>min: 5 tokens</li><li>mean: 37.17 tokens</li><li>max: 290 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 38.26 tokens</li><li>max: 375 tokens</li></ul> | <ul><li>min: -0.75</li><li>mean: 0.17</li><li>max: 1.0</li></ul> |
213
+ * Samples:
214
+ | sentence_0 | sentence_1 | label |
215
+ |:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------|
216
+ | <code>Los ahorros de la jubilación podrán usarse para este fin.</code> | <code>Sony Ericsson W8 además de todo eso presenta una pantalla táctil de tipo HVGA de 320 x 480 píxeles y la pantalla posee 16.777.216 colores.</code> | <code>0.2533760964870453</code> |
217
+ | <code>Programas de desarrollo en el cerebelo La transición célula progenitora a neurona madura, implica una serie de cambios morfológicos y moleculares altamente regulada espacial y temporalmente.</code> | <code>Dos ejemplos en los que el principio de exclusión relaciona la materia con la ocupación del espacio son las estrellas enanas blancas y las estrellas de neutrones, que se analizan más adelante.</code> | <code>0.1902337223291397</code> |
218
+ | <code>Bolsa inmobiliaria online en Distrito Federal df, inmuebles en venta y renta, casas, departamentos, locales, terrenos, inmobiliarias, desarrollos, anunciar inmuebles.</code> | <code>Otros prefieren hablar de "régimen" o "sistema feudal", para diferenciarlo sutilmente del feudalismo estricto, o de síntesis feudal, para marcar el hecho de que sobreviven en ella rasgos de la antigüedad clásica mezclados con contribuciones germánicas, implicando tanto a instituciones como a elementos productivos, y significó la especificidad del feudalismo europeo occidental como formación económico social frente a otras también feudales, con consecuencias trascendentales en el futuro devenir histórico.</code> | <code>0.21721388399600983</code> |
219
+ * Loss: [<code>CosineSimilarityLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosinesimilarityloss) with these parameters:
220
+ ```json
221
+ {
222
+ "loss_fct": "torch.nn.modules.loss.MSELoss"
223
+ }
224
+ ```
225
+
226
+ ### Training Hyperparameters
227
+ #### Non-Default Hyperparameters
228
+
229
+ - `eval_strategy`: steps
230
+ - `max_grad_norm`: 2.0
231
+ - `num_train_epochs`: 10
232
+ - `multi_dataset_batch_sampler`: round_robin
233
+
234
+ #### All Hyperparameters
235
+ <details><summary>Click to expand</summary>
236
+
237
+ - `overwrite_output_dir`: False
238
+ - `do_predict`: False
239
+ - `eval_strategy`: steps
240
+ - `prediction_loss_only`: True
241
+ - `per_device_train_batch_size`: 8
242
+ - `per_device_eval_batch_size`: 8
243
+ - `per_gpu_train_batch_size`: None
244
+ - `per_gpu_eval_batch_size`: None
245
+ - `gradient_accumulation_steps`: 1
246
+ - `eval_accumulation_steps`: None
247
+ - `torch_empty_cache_steps`: None
248
+ - `learning_rate`: 5e-05
249
+ - `weight_decay`: 0.0
250
+ - `adam_beta1`: 0.9
251
+ - `adam_beta2`: 0.999
252
+ - `adam_epsilon`: 1e-08
253
+ - `max_grad_norm`: 2.0
254
+ - `num_train_epochs`: 10
255
+ - `max_steps`: -1
256
+ - `lr_scheduler_type`: linear
257
+ - `lr_scheduler_kwargs`: None
258
+ - `warmup_ratio`: 0.0
259
+ - `warmup_steps`: 0
260
+ - `log_level`: passive
261
+ - `log_level_replica`: warning
262
+ - `log_on_each_node`: True
263
+ - `logging_nan_inf_filter`: True
264
+ - `save_safetensors`: True
265
+ - `save_on_each_node`: False
266
+ - `save_only_model`: False
267
+ - `restore_callback_states_from_checkpoint`: False
268
+ - `no_cuda`: False
269
+ - `use_cpu`: False
270
+ - `use_mps_device`: False
271
+ - `seed`: 42
272
+ - `data_seed`: None
273
+ - `jit_mode_eval`: False
274
+ - `bf16`: False
275
+ - `fp16`: False
276
+ - `fp16_opt_level`: O1
277
+ - `half_precision_backend`: auto
278
+ - `bf16_full_eval`: False
279
+ - `fp16_full_eval`: False
280
+ - `tf32`: None
281
+ - `local_rank`: 0
282
+ - `ddp_backend`: None
283
+ - `tpu_num_cores`: None
284
+ - `tpu_metrics_debug`: False
285
+ - `debug`: []
286
+ - `dataloader_drop_last`: False
287
+ - `dataloader_num_workers`: 0
288
+ - `dataloader_prefetch_factor`: None
289
+ - `past_index`: -1
290
+ - `disable_tqdm`: False
291
+ - `remove_unused_columns`: True
292
+ - `label_names`: None
293
+ - `load_best_model_at_end`: False
294
+ - `ignore_data_skip`: False
295
+ - `fsdp`: []
296
+ - `fsdp_min_num_params`: 0
297
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
298
+ - `fsdp_transformer_layer_cls_to_wrap`: None
299
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
300
+ - `parallelism_config`: None
301
+ - `deepspeed`: None
302
+ - `label_smoothing_factor`: 0.0
303
+ - `optim`: adamw_torch
304
+ - `optim_args`: None
305
+ - `adafactor`: False
306
+ - `group_by_length`: False
307
+ - `length_column_name`: length
308
+ - `project`: huggingface
309
+ - `trackio_space_id`: trackio
310
+ - `ddp_find_unused_parameters`: None
311
+ - `ddp_bucket_cap_mb`: None
312
+ - `ddp_broadcast_buffers`: False
313
+ - `dataloader_pin_memory`: True
314
+ - `dataloader_persistent_workers`: False
315
+ - `skip_memory_metrics`: True
316
+ - `use_legacy_prediction_loop`: False
317
+ - `push_to_hub`: False
318
+ - `resume_from_checkpoint`: None
319
+ - `hub_model_id`: None
320
+ - `hub_strategy`: every_save
321
+ - `hub_private_repo`: None
322
+ - `hub_always_push`: False
323
+ - `hub_revision`: None
324
+ - `gradient_checkpointing`: False
325
+ - `gradient_checkpointing_kwargs`: None
326
+ - `include_inputs_for_metrics`: False
327
+ - `include_for_metrics`: []
328
+ - `eval_do_concat_batches`: True
329
+ - `fp16_backend`: auto
330
+ - `push_to_hub_model_id`: None
331
+ - `push_to_hub_organization`: None
332
+ - `mp_parameters`:
333
+ - `auto_find_batch_size`: False
334
+ - `full_determinism`: False
335
+ - `torchdynamo`: None
336
+ - `ray_scope`: last
337
+ - `ddp_timeout`: 1800
338
+ - `torch_compile`: False
339
+ - `torch_compile_backend`: None
340
+ - `torch_compile_mode`: None
341
+ - `include_tokens_per_second`: False
342
+ - `include_num_input_tokens_seen`: no
343
+ - `neftune_noise_alpha`: None
344
+ - `optim_target_modules`: None
345
+ - `batch_eval_metrics`: False
346
+ - `eval_on_start`: False
347
+ - `use_liger_kernel`: False
348
+ - `liger_kernel_config`: None
349
+ - `eval_use_gather_object`: False
350
+ - `average_tokens_across_devices`: True
351
+ - `prompts`: None
352
+ - `batch_sampler`: batch_sampler
353
+ - `multi_dataset_batch_sampler`: round_robin
354
+ - `router_mapping`: {}
355
+ - `learning_rate_mapping`: {}
356
+
357
+ </details>
358
+
359
+ ### Training Logs
360
+ <details><summary>Click to expand</summary>
361
+
362
+ | Epoch | Step | Training Loss | sts_eval_spearman_cosine |
363
+ |:------:|:------:|:-------------:|:------------------------:|
364
+ | 3.9714 | 583500 | 0.0253 | 0.2725 |
365
+ | 3.9748 | 584000 | 0.0274 | 0.2733 |
366
+ | 3.9782 | 584500 | 0.0279 | 0.2711 |
367
+ | 3.9816 | 585000 | 0.0248 | 0.2708 |
368
+ | 3.9850 | 585500 | 0.0264 | 0.2676 |
369
+ | 3.9884 | 586000 | 0.0267 | 0.2713 |
370
+ | 3.9918 | 586500 | 0.0276 | 0.2703 |
371
+ | 3.9952 | 587000 | 0.0273 | 0.2674 |
372
+ | 3.9986 | 587500 | 0.0278 | 0.2688 |
373
+ | 4.0 | 587704 | - | 0.2672 |
374
+ | 4.0020 | 588000 | 0.0259 | 0.2675 |
375
+ | 4.0054 | 588500 | 0.0257 | 0.2697 |
376
+ | 4.0088 | 589000 | 0.0268 | 0.2694 |
377
+ | 4.0122 | 589500 | 0.0256 | 0.2706 |
378
+ | 4.0156 | 590000 | 0.0254 | 0.2706 |
379
+ | 4.0190 | 590500 | 0.0263 | 0.2695 |
380
+ | 4.0224 | 591000 | 0.0274 | 0.2691 |
381
+ | 4.0258 | 591500 | 0.0255 | 0.2712 |
382
+ | 4.0292 | 592000 | 0.0253 | 0.2696 |
383
+ | 4.0326 | 592500 | 0.025 | 0.2692 |
384
+ | 4.0360 | 593000 | 0.0263 | 0.2679 |
385
+ | 4.0394 | 593500 | 0.028 | 0.2689 |
386
+ | 4.0429 | 594000 | 0.0275 | 0.2696 |
387
+ | 4.0463 | 594500 | 0.0268 | 0.2699 |
388
+ | 4.0497 | 595000 | 0.025 | 0.2686 |
389
+ | 4.0531 | 595500 | 0.0277 | 0.2683 |
390
+ | 4.0565 | 596000 | 0.0276 | 0.2690 |
391
+ | 4.0599 | 596500 | 0.0242 | 0.2686 |
392
+ | 4.0633 | 597000 | 0.0264 | 0.2691 |
393
+ | 4.0667 | 597500 | 0.0273 | 0.2681 |
394
+ | 4.0701 | 598000 | 0.0269 | 0.2693 |
395
+ | 4.0735 | 598500 | 0.0274 | 0.2698 |
396
+ | 4.0769 | 599000 | 0.0252 | 0.2704 |
397
+ | 4.0803 | 599500 | 0.0268 | 0.2708 |
398
+ | 4.0837 | 600000 | 0.0259 | 0.2696 |
399
+ | 4.0871 | 600500 | 0.0277 | 0.2689 |
400
+ | 4.0905 | 601000 | 0.0262 | 0.2663 |
401
+ | 4.0939 | 601500 | 0.0266 | 0.2697 |
402
+ | 4.0973 | 602000 | 0.0269 | 0.2700 |
403
+ | 4.1007 | 602500 | 0.0253 | 0.2673 |
404
+ | 4.1041 | 603000 | 0.0281 | 0.2684 |
405
+ | 4.1075 | 603500 | 0.0263 | 0.2687 |
406
+ | 4.1109 | 604000 | 0.028 | 0.2677 |
407
+ | 4.1143 | 604500 | 0.0277 | 0.2701 |
408
+ | 4.1177 | 605000 | 0.0273 | 0.2686 |
409
+ | 4.1211 | 605500 | 0.0253 | 0.2681 |
410
+ | 4.1245 | 606000 | 0.0264 | 0.2694 |
411
+ | 4.1279 | 606500 | 0.0281 | 0.2706 |
412
+ | 4.1313 | 607000 | 0.0262 | 0.2714 |
413
+ | 4.1347 | 607500 | 0.0265 | 0.2673 |
414
+ | 4.1381 | 608000 | 0.0254 | 0.2685 |
415
+ | 4.1415 | 608500 | 0.0279 | 0.2674 |
416
+ | 4.1449 | 609000 | 0.0284 | 0.2692 |
417
+ | 4.1483 | 609500 | 0.0283 | 0.2680 |
418
+ | 4.1517 | 610000 | 0.0277 | 0.2673 |
419
+ | 4.1552 | 610500 | 0.0264 | 0.2692 |
420
+ | 4.1586 | 611000 | 0.0261 | 0.2687 |
421
+ | 4.1620 | 611500 | 0.0273 | 0.2697 |
422
+ | 4.1654 | 612000 | 0.027 | 0.2697 |
423
+ | 4.1688 | 612500 | 0.0274 | 0.2696 |
424
+ | 4.1722 | 613000 | 0.0273 | 0.2698 |
425
+ | 4.1756 | 613500 | 0.0255 | 0.2659 |
426
+ | 4.1790 | 614000 | 0.0274 | 0.2660 |
427
+ | 4.1824 | 614500 | 0.0284 | 0.2666 |
428
+ | 4.1858 | 615000 | 0.0268 | 0.2680 |
429
+ | 4.1892 | 615500 | 0.0278 | 0.2674 |
430
+ | 4.1926 | 616000 | 0.0276 | 0.2684 |
431
+ | 4.1960 | 616500 | 0.026 | 0.2700 |
432
+ | 4.1994 | 617000 | 0.0266 | 0.2686 |
433
+ | 4.2028 | 617500 | 0.0266 | 0.2680 |
434
+ | 4.2062 | 618000 | 0.0277 | 0.2678 |
435
+ | 4.2096 | 618500 | 0.0291 | 0.2649 |
436
+ | 4.2130 | 619000 | 0.0281 | 0.2635 |
437
+ | 4.2164 | 619500 | 0.0291 | 0.2659 |
438
+ | 4.2198 | 620000 | 0.0281 | 0.2672 |
439
+ | 4.2232 | 620500 | 0.0282 | 0.2655 |
440
+ | 4.2266 | 621000 | 0.0287 | 0.2648 |
441
+ | 4.2300 | 621500 | 0.0285 | 0.2640 |
442
+ | 4.2334 | 622000 | 0.0282 | 0.2645 |
443
+ | 4.2368 | 622500 | 0.027 | 0.2674 |
444
+ | 4.2402 | 623000 | 0.0268 | 0.2669 |
445
+ | 4.2436 | 623500 | 0.0291 | 0.2663 |
446
+ | 4.2470 | 624000 | 0.0291 | 0.2645 |
447
+ | 4.2504 | 624500 | 0.0277 | 0.2677 |
448
+ | 4.2538 | 625000 | 0.0273 | 0.2631 |
449
+ | 4.2572 | 625500 | 0.0265 | 0.2653 |
450
+ | 4.2606 | 626000 | 0.0276 | 0.2665 |
451
+ | 4.2641 | 626500 | 0.027 | 0.2654 |
452
+ | 4.2675 | 627000 | 0.0271 | 0.2659 |
453
+ | 4.2709 | 627500 | 0.0279 | 0.2659 |
454
+ | 4.2743 | 628000 | 0.0274 | 0.2648 |
455
+ | 4.2777 | 628500 | 0.0263 | 0.2659 |
456
+ | 4.2811 | 629000 | 0.0279 | 0.2665 |
457
+ | 4.2845 | 629500 | 0.028 | 0.2677 |
458
+ | 4.2879 | 630000 | 0.0299 | 0.2701 |
459
+ | 4.2913 | 630500 | 0.0284 | 0.2688 |
460
+ | 4.2947 | 631000 | 0.0269 | 0.2683 |
461
+ | 4.2981 | 631500 | 0.0271 | 0.2689 |
462
+ | 4.3015 | 632000 | 0.0288 | 0.2680 |
463
+ | 4.3049 | 632500 | 0.0274 | 0.2674 |
464
+ | 4.3083 | 633000 | 0.0277 | 0.2675 |
465
+ | 4.3117 | 633500 | 0.0282 | 0.2671 |
466
+ | 4.3151 | 634000 | 0.0266 | 0.2658 |
467
+ | 4.3185 | 634500 | 0.0284 | 0.2648 |
468
+ | 4.3219 | 635000 | 0.0283 | 0.2637 |
469
+ | 4.3253 | 635500 | 0.0283 | 0.2647 |
470
+ | 4.3287 | 636000 | 0.0281 | 0.2641 |
471
+ | 4.3321 | 636500 | 0.0275 | 0.2620 |
472
+ | 4.3355 | 637000 | 0.0272 | 0.2630 |
473
+ | 4.3389 | 637500 | 0.0282 | 0.2642 |
474
+ | 4.3423 | 638000 | 0.0294 | 0.2664 |
475
+ | 4.3457 | 638500 | 0.0283 | 0.2639 |
476
+ | 4.3491 | 639000 | 0.0262 | 0.2663 |
477
+ | 4.3525 | 639500 | 0.0275 | 0.2671 |
478
+ | 4.3559 | 640000 | 0.0298 | 0.2669 |
479
+ | 4.3593 | 640500 | 0.0292 | 0.2693 |
480
+ | 4.3627 | 641000 | 0.0283 | 0.2673 |
481
+ | 4.3661 | 641500 | 0.027 | 0.2687 |
482
+ | 4.3695 | 642000 | 0.0278 | 0.2663 |
483
+ | 4.3729 | 642500 | 0.0301 | 0.2652 |
484
+ | 4.3764 | 643000 | 0.0275 | 0.2676 |
485
+ | 4.3798 | 643500 | 0.0292 | 0.2680 |
486
+ | 4.3832 | 644000 | 0.0266 | 0.2680 |
487
+ | 4.3866 | 644500 | 0.0283 | 0.2668 |
488
+ | 4.3900 | 645000 | 0.0303 | 0.2677 |
489
+ | 4.3934 | 645500 | 0.0299 | 0.2701 |
490
+ | 4.3968 | 646000 | 0.0284 | 0.2680 |
491
+ | 4.4002 | 646500 | 0.0272 | 0.2664 |
492
+ | 4.4036 | 647000 | 0.0297 | 0.2662 |
493
+ | 4.4070 | 647500 | 0.029 | 0.2661 |
494
+ | 4.4104 | 648000 | 0.0281 | 0.2678 |
495
+ | 4.4138 | 648500 | 0.0282 | 0.2683 |
496
+ | 4.4172 | 649000 | 0.0278 | 0.2699 |
497
+ | 4.4206 | 649500 | 0.0309 | 0.2684 |
498
+ | 4.4240 | 650000 | 0.0288 | 0.2693 |
499
+ | 4.4274 | 650500 | 0.0307 | 0.2697 |
500
+ | 4.4308 | 651000 | 0.0272 | 0.2722 |
501
+ | 4.4342 | 651500 | 0.0289 | 0.2726 |
502
+ | 4.4376 | 652000 | 0.0288 | 0.2716 |
503
+ | 4.4410 | 652500 | 0.0289 | 0.2729 |
504
+ | 4.4444 | 653000 | 0.0297 | 0.2699 |
505
+ | 4.4478 | 653500 | 0.0286 | 0.2724 |
506
+ | 4.4512 | 654000 | 0.0298 | 0.2702 |
507
+ | 4.4546 | 654500 | 0.0302 | 0.2738 |
508
+ | 4.4580 | 655000 | 0.0292 | 0.2713 |
509
+ | 4.4614 | 655500 | 0.0297 | 0.2712 |
510
+ | 4.4648 | 656000 | 0.0286 | 0.2705 |
511
+ | 4.4682 | 656500 | 0.0285 | 0.2735 |
512
+ | 4.4716 | 657000 | 0.0294 | 0.2733 |
513
+ | 4.4750 | 657500 | 0.0291 | 0.2722 |
514
+ | 4.4784 | 658000 | 0.0283 | 0.2708 |
515
+ | 4.4818 | 658500 | 0.028 | 0.2714 |
516
+ | 4.4853 | 659000 | 0.0298 | 0.2716 |
517
+ | 4.4887 | 659500 | 0.0275 | 0.2721 |
518
+ | 4.4921 | 660000 | 0.0314 | 0.2731 |
519
+ | 4.4955 | 660500 | 0.0292 | 0.2730 |
520
+ | 4.4989 | 661000 | 0.029 | 0.2749 |
521
+ | 4.5023 | 661500 | 0.0305 | 0.2728 |
522
+ | 4.5057 | 662000 | 0.0323 | 0.2709 |
523
+ | 4.5091 | 662500 | 0.0276 | 0.2715 |
524
+ | 4.5125 | 663000 | 0.0294 | 0.2702 |
525
+ | 4.5159 | 663500 | 0.0286 | 0.2694 |
526
+ | 4.5193 | 664000 | 0.0282 | 0.2702 |
527
+ | 4.5227 | 664500 | 0.0287 | 0.2702 |
528
+ | 4.5261 | 665000 | 0.0289 | 0.2682 |
529
+ | 4.5295 | 665500 | 0.0299 | 0.2701 |
530
+ | 4.5329 | 666000 | 0.0301 | 0.2706 |
531
+ | 4.5363 | 666500 | 0.0287 | 0.2719 |
532
+ | 4.5397 | 667000 | 0.0292 | 0.2721 |
533
+ | 4.5431 | 667500 | 0.0284 | 0.2714 |
534
+ | 4.5465 | 668000 | 0.0286 | 0.2696 |
535
+ | 4.5499 | 668500 | 0.0299 | 0.2700 |
536
+ | 4.5533 | 669000 | 0.0282 | 0.2689 |
537
+ | 4.5567 | 669500 | 0.0288 | 0.2715 |
538
+ | 4.5601 | 670000 | 0.0298 | 0.2712 |
539
+ | 4.5635 | 670500 | 0.0302 | 0.2687 |
540
+ | 4.5669 | 671000 | 0.0298 | 0.2709 |
541
+ | 4.5703 | 671500 | 0.0297 | 0.2711 |
542
+ | 4.5737 | 672000 | 0.0297 | 0.2703 |
543
+ | 4.5771 | 672500 | 0.0288 | 0.2685 |
544
+ | 4.5805 | 673000 | 0.0293 | 0.2698 |
545
+ | 4.5839 | 673500 | 0.0293 | 0.2706 |
546
+ | 4.5873 | 674000 | 0.0292 | 0.2688 |
547
+ | 4.5907 | 674500 | 0.0288 | 0.2676 |
548
+ | 4.5941 | 675000 | 0.0294 | 0.2694 |
549
+ | 4.5976 | 675500 | 0.0308 | 0.2697 |
550
+ | 4.6010 | 676000 | 0.0297 | 0.2689 |
551
+ | 4.6044 | 676500 | 0.0287 | 0.2688 |
552
+ | 4.6078 | 677000 | 0.0276 | 0.2677 |
553
+ | 4.6112 | 677500 | 0.0307 | 0.2686 |
554
+ | 4.6146 | 678000 | 0.0301 | 0.2672 |
555
+ | 4.6180 | 678500 | 0.029 | 0.2689 |
556
+ | 4.6214 | 679000 | 0.0306 | 0.2683 |
557
+ | 4.6248 | 679500 | 0.0284 | 0.2689 |
558
+ | 4.6282 | 680000 | 0.0277 | 0.2698 |
559
+ | 4.6316 | 680500 | 0.0291 | 0.2694 |
560
+ | 4.6350 | 681000 | 0.0295 | 0.2660 |
561
+ | 4.6384 | 681500 | 0.0309 | 0.2683 |
562
+ | 4.6418 | 682000 | 0.0278 | 0.2703 |
563
+ | 4.6452 | 682500 | 0.0291 | 0.2690 |
564
+ | 4.6486 | 683000 | 0.0296 | 0.2699 |
565
+ | 4.6520 | 683500 | 0.0307 | 0.2689 |
566
+ | 4.6554 | 684000 | 0.0299 | 0.2679 |
567
+ | 4.6588 | 684500 | 0.03 | 0.2690 |
568
+ | 4.6622 | 685000 | 0.0291 | 0.2682 |
569
+ | 4.6656 | 685500 | 0.0304 | 0.2665 |
570
+ | 4.6690 | 686000 | 0.031 | 0.2657 |
571
+ | 4.6724 | 686500 | 0.03 | 0.2674 |
572
+ | 4.6758 | 687000 | 0.0293 | 0.2696 |
573
+ | 4.6792 | 687500 | 0.0299 | 0.2666 |
574
+ | 4.6826 | 688000 | 0.029 | 0.2668 |
575
+ | 4.6860 | 688500 | 0.0295 | 0.2669 |
576
+ | 4.6894 | 689000 | 0.0288 | 0.2680 |
577
+ | 4.6928 | 689500 | 0.0301 | 0.2674 |
578
+ | 4.6962 | 690000 | 0.03 | 0.2690 |
579
+ | 4.6996 | 690500 | 0.0298 | 0.2678 |
580
+ | 4.7030 | 691000 | 0.03 | 0.2705 |
581
+ | 4.7065 | 691500 | 0.0293 | 0.2692 |
582
+ | 4.7099 | 692000 | 0.0287 | 0.2693 |
583
+ | 4.7133 | 692500 | 0.0304 | 0.2660 |
584
+ | 4.7167 | 693000 | 0.0296 | 0.2662 |
585
+ | 4.7201 | 693500 | 0.0291 | 0.2668 |
586
+ | 4.7235 | 694000 | 0.0308 | 0.2677 |
587
+ | 4.7269 | 694500 | 0.0309 | 0.2668 |
588
+ | 4.7303 | 695000 | 0.0319 | 0.2692 |
589
+ | 4.7337 | 695500 | 0.0297 | 0.2678 |
590
+ | 4.7371 | 696000 | 0.0297 | 0.2672 |
591
+ | 4.7405 | 696500 | 0.0294 | 0.2673 |
592
+ | 4.7439 | 697000 | 0.0293 | 0.2671 |
593
+ | 4.7473 | 697500 | 0.0308 | 0.2687 |
594
+ | 4.7507 | 698000 | 0.0315 | 0.2694 |
595
+ | 4.7541 | 698500 | 0.0286 | 0.2676 |
596
+ | 4.7575 | 699000 | 0.0297 | 0.2687 |
597
+ | 4.7609 | 699500 | 0.0285 | 0.2668 |
598
+ | 4.7643 | 700000 | 0.0282 | 0.2682 |
599
+ | 4.7677 | 700500 | 0.0307 | 0.2667 |
600
+ | 4.7711 | 701000 | 0.0276 | 0.2719 |
601
+ | 4.7745 | 701500 | 0.0297 | 0.2706 |
602
+ | 4.7779 | 702000 | 0.0293 | 0.2691 |
603
+ | 4.7813 | 702500 | 0.029 | 0.2679 |
604
+ | 4.7847 | 703000 | 0.0319 | 0.2678 |
605
+ | 4.7881 | 703500 | 0.0303 | 0.2682 |
606
+ | 4.7915 | 704000 | 0.028 | 0.2688 |
607
+ | 4.7949 | 704500 | 0.031 | 0.2719 |
608
+ | 4.7983 | 705000 | 0.029 | 0.2692 |
609
+ | 4.8017 | 705500 | 0.0313 | 0.2661 |
610
+ | 4.8051 | 706000 | 0.0313 | 0.2685 |
611
+ | 4.8085 | 706500 | 0.0296 | 0.2689 |
612
+ | 4.8119 | 707000 | 0.0309 | 0.2705 |
613
+ | 4.8153 | 707500 | 0.0287 | 0.2691 |
614
+ | 4.8188 | 708000 | 0.031 | 0.2697 |
615
+ | 4.8222 | 708500 | 0.0295 | 0.2683 |
616
+ | 4.8256 | 709000 | 0.0293 | 0.2687 |
617
+ | 4.8290 | 709500 | 0.0316 | 0.2689 |
618
+ | 4.8324 | 710000 | 0.0289 | 0.2691 |
619
+ | 4.8358 | 710500 | 0.0287 | 0.2705 |
620
+ | 4.8392 | 711000 | 0.0292 | 0.2700 |
621
+ | 4.8426 | 711500 | 0.0309 | 0.2682 |
622
+ | 4.8460 | 712000 | 0.0306 | 0.2688 |
623
+ | 4.8494 | 712500 | 0.0304 | 0.2701 |
624
+ | 4.8528 | 713000 | 0.03 | 0.2679 |
625
+ | 4.8562 | 713500 | 0.0293 | 0.2713 |
626
+ | 4.8596 | 714000 | 0.03 | 0.2692 |
627
+ | 4.8630 | 714500 | 0.03 | 0.2700 |
628
+ | 4.8664 | 715000 | 0.0297 | 0.2699 |
629
+ | 4.8698 | 715500 | 0.0282 | 0.2709 |
630
+ | 4.8732 | 716000 | 0.0287 | 0.2715 |
631
+ | 4.8766 | 716500 | 0.0303 | 0.2718 |
632
+ | 4.8800 | 717000 | 0.0304 | 0.2710 |
633
+ | 4.8834 | 717500 | 0.0292 | 0.2720 |
634
+ | 4.8868 | 718000 | 0.0307 | 0.2700 |
635
+ | 4.8902 | 718500 | 0.0304 | 0.2698 |
636
+ | 4.8936 | 719000 | 0.0307 | 0.2681 |
637
+ | 4.8970 | 719500 | 0.0294 | 0.2693 |
638
+ | 4.9004 | 720000 | 0.0315 | 0.2701 |
639
+ | 4.9038 | 720500 | 0.0288 | 0.2702 |
640
+ | 4.9072 | 721000 | 0.0284 | 0.2710 |
641
+ | 4.9106 | 721500 | 0.0309 | 0.2697 |
642
+ | 4.9140 | 722000 | 0.0313 | 0.2698 |
643
+ | 4.9174 | 722500 | 0.0305 | 0.2687 |
644
+ | 4.9208 | 723000 | 0.0306 | 0.2681 |
645
+ | 4.9242 | 723500 | 0.0307 | 0.2702 |
646
+ | 4.9277 | 724000 | 0.0319 | 0.2687 |
647
+ | 4.9311 | 724500 | 0.0285 | 0.2698 |
648
+ | 4.9345 | 725000 | 0.0298 | 0.2697 |
649
+ | 4.9379 | 725500 | 0.0317 | 0.2701 |
650
+ | 4.9413 | 726000 | 0.0316 | 0.2702 |
651
+ | 4.9447 | 726500 | 0.0305 | 0.2691 |
652
+ | 4.9481 | 727000 | 0.0303 | 0.2694 |
653
+ | 4.9515 | 727500 | 0.0302 | 0.2688 |
654
+ | 4.9549 | 728000 | 0.029 | 0.2672 |
655
+ | 4.9583 | 728500 | 0.03 | 0.2690 |
656
+ | 4.9617 | 729000 | 0.0291 | 0.2687 |
657
+ | 4.9651 | 729500 | 0.0301 | 0.2682 |
658
+ | 4.9685 | 730000 | 0.0304 | 0.2680 |
659
+ | 4.9719 | 730500 | 0.0305 | 0.2655 |
660
+ | 4.9753 | 731000 | 0.0285 | 0.2668 |
661
+ | 4.9787 | 731500 | 0.0325 | 0.2672 |
662
+ | 4.9821 | 732000 | 0.0294 | 0.2677 |
663
+ | 4.9855 | 732500 | 0.0308 | 0.2648 |
664
+ | 4.9889 | 733000 | 0.0291 | 0.2672 |
665
+ | 4.9923 | 733500 | 0.0312 | 0.2663 |
666
+ | 4.9957 | 734000 | 0.0305 | 0.2671 |
667
+ | 4.9991 | 734500 | 0.0301 | 0.2677 |
668
+ | 5.0 | 734630 | - | 0.2660 |
669
+ | 5.0025 | 735000 | 0.0214 | 0.2636 |
670
+ | 5.0059 | 735500 | 0.0186 | 0.2625 |
671
+ | 5.0093 | 736000 | 0.0186 | 0.2608 |
672
+ | 5.0127 | 736500 | 0.0189 | 0.2612 |
673
+ | 5.0161 | 737000 | 0.019 | 0.2589 |
674
+ | 5.0195 | 737500 | 0.0185 | 0.2594 |
675
+ | 5.0229 | 738000 | 0.0177 | 0.2604 |
676
+ | 5.0263 | 738500 | 0.0187 | 0.2595 |
677
+ | 5.0297 | 739000 | 0.0185 | 0.2569 |
678
+ | 5.0331 | 739500 | 0.0174 | 0.2569 |
679
+ | 5.0365 | 740000 | 0.0185 | 0.2588 |
680
+ | 5.0400 | 740500 | 0.0186 | 0.2554 |
681
+ | 5.0434 | 741000 | 0.0176 | 0.2574 |
682
+ | 5.0468 | 741500 | 0.0173 | 0.2581 |
683
+ | 5.0502 | 742000 | 0.0182 | 0.2591 |
684
+ | 5.0536 | 742500 | 0.0175 | 0.2585 |
685
+ | 5.0570 | 743000 | 0.0173 | 0.2589 |
686
+ | 5.0604 | 743500 | 0.0175 | 0.2589 |
687
+ | 5.0638 | 744000 | 0.0184 | 0.2612 |
688
+ | 5.0672 | 744500 | 0.019 | 0.2595 |
689
+ | 5.0706 | 745000 | 0.0183 | 0.2588 |
690
+ | 5.0740 | 745500 | 0.0187 | 0.2553 |
691
+ | 5.0774 | 746000 | 0.0183 | 0.2553 |
692
+ | 5.0808 | 746500 | 0.0178 | 0.2560 |
693
+ | 5.0842 | 747000 | 0.0194 | 0.2566 |
694
+ | 5.0876 | 747500 | 0.0187 | 0.2572 |
695
+ | 5.0910 | 748000 | 0.0188 | 0.2534 |
696
+ | 5.0944 | 748500 | 0.0195 | 0.2556 |
697
+ | 5.0978 | 749000 | 0.0187 | 0.2579 |
698
+ | 5.1012 | 749500 | 0.0182 | 0.2558 |
699
+ | 5.1046 | 750000 | 0.0188 | 0.2554 |
700
+ | 5.1080 | 750500 | 0.019 | 0.2566 |
701
+ | 5.1114 | 751000 | 0.0182 | 0.2538 |
702
+ | 5.1148 | 751500 | 0.0185 | 0.2537 |
703
+ | 5.1182 | 752000 | 0.0183 | 0.2559 |
704
+ | 5.1216 | 752500 | 0.0185 | 0.2567 |
705
+ | 5.1250 | 753000 | 0.0186 | 0.2551 |
706
+ | 5.1284 | 753500 | 0.0186 | 0.2574 |
707
+ | 5.1318 | 754000 | 0.0187 | 0.2559 |
708
+ | 5.1352 | 754500 | 0.019 | 0.2566 |
709
+ | 5.1386 | 755000 | 0.0179 | 0.2561 |
710
+ | 5.1420 | 755500 | 0.0186 | 0.2556 |
711
+ | 5.1454 | 756000 | 0.0186 | 0.2545 |
712
+ | 5.1489 | 756500 | 0.0198 | 0.2526 |
713
+ | 5.1523 | 757000 | 0.0195 | 0.2556 |
714
+ | 5.1557 | 757500 | 0.0189 | 0.2519 |
715
+ | 5.1591 | 758000 | 0.0186 | 0.2547 |
716
+ | 5.1625 | 758500 | 0.0186 | 0.2536 |
717
+ | 5.1659 | 759000 | 0.0186 | 0.2548 |
718
+ | 5.1693 | 759500 | 0.0198 | 0.2537 |
719
+ | 5.1727 | 760000 | 0.0179 | 0.2557 |
720
+ | 5.1761 | 760500 | 0.0183 | 0.2540 |
721
+ | 5.1795 | 761000 | 0.0192 | 0.2558 |
722
+ | 5.1829 | 761500 | 0.0199 | 0.2575 |
723
+ | 5.1863 | 762000 | 0.0197 | 0.2555 |
724
+ | 5.1897 | 762500 | 0.0187 | 0.2579 |
725
+ | 5.1931 | 763000 | 0.0191 | 0.2577 |
726
+ | 5.1965 | 763500 | 0.0192 | 0.2572 |
727
+ | 5.1999 | 764000 | 0.0187 | 0.2565 |
728
+ | 5.2033 | 764500 | 0.018 | 0.2565 |
729
+ | 5.2067 | 765000 | 0.0188 | 0.2552 |
730
+ | 5.2101 | 765500 | 0.0193 | 0.2568 |
731
+ | 5.2135 | 766000 | 0.0187 | 0.2574 |
732
+ | 5.2169 | 766500 | 0.0181 | 0.2577 |
733
+ | 5.2203 | 767000 | 0.0197 | 0.2595 |
734
+ | 5.2237 | 767500 | 0.019 | 0.2599 |
735
+ | 5.2271 | 768000 | 0.0196 | 0.2587 |
736
+ | 5.2305 | 768500 | 0.0196 | 0.2584 |
737
+ | 5.2339 | 769000 | 0.0186 | 0.2570 |
738
+ | 5.2373 | 769500 | 0.0193 | 0.2593 |
739
+ | 5.2407 | 770000 | 0.0198 | 0.2595 |
740
+ | 5.2441 | 770500 | 0.019 | 0.2561 |
741
+ | 5.2475 | 771000 | 0.0198 | 0.2584 |
742
+ | 5.2509 | 771500 | 0.0195 | 0.2584 |
743
+ | 5.2543 | 772000 | 0.0201 | 0.2579 |
744
+ | 5.2577 | 772500 | 0.02 | 0.2582 |
745
+ | 5.2612 | 773000 | 0.0194 | 0.2576 |
746
+ | 5.2646 | 773500 | 0.0194 | 0.2585 |
747
+ | 5.2680 | 774000 | 0.0192 | 0.2574 |
748
+ | 5.2714 | 774500 | 0.019 | 0.2559 |
749
+ | 5.2748 | 775000 | 0.0197 | 0.2556 |
750
+ | 5.2782 | 775500 | 0.0191 | 0.2553 |
751
+ | 5.2816 | 776000 | 0.0205 | 0.2577 |
752
+ | 5.2850 | 776500 | 0.0195 | 0.2572 |
753
+ | 5.2884 | 777000 | 0.0207 | 0.2566 |
754
+ | 5.2918 | 777500 | 0.0206 | 0.2571 |
755
+ | 5.2952 | 778000 | 0.0202 | 0.2580 |
756
+ | 5.2986 | 778500 | 0.0192 | 0.2570 |
757
+ | 5.3020 | 779000 | 0.0191 | 0.2558 |
758
+ | 5.3054 | 779500 | 0.0213 | 0.2570 |
759
+ | 5.3088 | 780000 | 0.0193 | 0.2578 |
760
+ | 5.3122 | 780500 | 0.0193 | 0.2567 |
761
+ | 5.3156 | 781000 | 0.0212 | 0.2579 |
762
+ | 5.3190 | 781500 | 0.0197 | 0.2563 |
763
+ | 5.3224 | 782000 | 0.0204 | 0.2592 |
764
+ | 5.3258 | 782500 | 0.0207 | 0.2596 |
765
+ | 5.3292 | 783000 | 0.0197 | 0.2570 |
766
+ | 5.3326 | 783500 | 0.0201 | 0.2590 |
767
+ | 5.3360 | 784000 | 0.0204 | 0.2570 |
768
+ | 5.3394 | 784500 | 0.0198 | 0.2586 |
769
+ | 5.3428 | 785000 | 0.0193 | 0.2597 |
770
+ | 5.3462 | 785500 | 0.0197 | 0.2594 |
771
+ | 5.3496 | 786000 | 0.0205 | 0.2595 |
772
+ | 5.3530 | 786500 | 0.0194 | 0.2603 |
773
+ | 5.3564 | 787000 | 0.0205 | 0.2593 |
774
+ | 5.3598 | 787500 | 0.0205 | 0.2586 |
775
+ | 5.3632 | 788000 | 0.0203 | 0.2583 |
776
+ | 5.3666 | 788500 | 0.0194 | 0.2610 |
777
+ | 5.3701 | 789000 | 0.0206 | 0.2626 |
778
+ | 5.3735 | 789500 | 0.0198 | 0.2602 |
779
+ | 5.3769 | 790000 | 0.0208 | 0.2597 |
780
+ | 5.3803 | 790500 | 0.0201 | 0.2578 |
781
+ | 5.3837 | 791000 | 0.0205 | 0.2578 |
782
+ | 5.3871 | 791500 | 0.0197 | 0.2569 |
783
+ | 5.3905 | 792000 | 0.0204 | 0.2546 |
784
+ | 5.3939 | 792500 | 0.02 | 0.2565 |
785
+ | 5.3973 | 793000 | 0.0202 | 0.2574 |
786
+ | 5.4007 | 793500 | 0.0198 | 0.2572 |
787
+ | 5.4041 | 794000 | 0.0194 | 0.2593 |
788
+ | 5.4075 | 794500 | 0.0215 | 0.2584 |
789
+ | 5.4109 | 795000 | 0.0207 | 0.2590 |
790
+ | 5.4143 | 795500 | 0.021 | 0.2589 |
791
+ | 5.4177 | 796000 | 0.0218 | 0.2589 |
792
+ | 5.4211 | 796500 | 0.0211 | 0.2595 |
793
+ | 5.4245 | 797000 | 0.0203 | 0.2584 |
794
+ | 5.4279 | 797500 | 0.0204 | 0.2596 |
795
+ | 5.4313 | 798000 | 0.0198 | 0.2594 |
796
+ | 5.4347 | 798500 | 0.0208 | 0.2596 |
797
+ | 5.4381 | 799000 | 0.02 | 0.2590 |
798
+ | 5.4415 | 799500 | 0.0218 | 0.2583 |
799
+ | 5.4449 | 800000 | 0.0208 | 0.2578 |
800
+ | 5.4483 | 800500 | 0.0198 | 0.2582 |
801
+ | 5.4517 | 801000 | 0.0209 | 0.2583 |
802
+ | 5.4551 | 801500 | 0.02 | 0.2596 |
803
+ | 5.4585 | 802000 | 0.0206 | 0.2591 |
804
+ | 5.4619 | 802500 | 0.0208 | 0.2610 |
805
+ | 5.4653 | 803000 | 0.0219 | 0.2603 |
806
+ | 5.4687 | 803500 | 0.0208 | 0.2598 |
807
+ | 5.4721 | 804000 | 0.0208 | 0.2582 |
808
+ | 5.4755 | 804500 | 0.0224 | 0.2582 |
809
+ | 5.4789 | 805000 | 0.0232 | 0.2564 |
810
+ | 5.4824 | 805500 | 0.0204 | 0.2590 |
811
+ | 5.4858 | 806000 | 0.0218 | 0.2598 |
812
+ | 5.4892 | 806500 | 0.0202 | 0.2612 |
813
+ | 5.4926 | 807000 | 0.0204 | 0.2615 |
814
+ | 5.4960 | 807500 | 0.0208 | 0.2608 |
815
+ | 5.4994 | 808000 | 0.0199 | 0.2604 |
816
+ | 5.5028 | 808500 | 0.0219 | 0.2587 |
817
+ | 5.5062 | 809000 | 0.0197 | 0.2613 |
818
+ | 5.5096 | 809500 | 0.0209 | 0.2606 |
819
+ | 5.5130 | 810000 | 0.0211 | 0.2615 |
820
+ | 5.5164 | 810500 | 0.021 | 0.2613 |
821
+ | 5.5198 | 811000 | 0.0205 | 0.2594 |
822
+ | 5.5232 | 811500 | 0.0208 | 0.2581 |
823
+ | 5.5266 | 812000 | 0.0206 | 0.2577 |
824
+ | 5.5300 | 812500 | 0.0202 | 0.2574 |
825
+ | 5.5334 | 813000 | 0.021 | 0.2592 |
826
+ | 5.5368 | 813500 | 0.0202 | 0.2574 |
827
+ | 5.5402 | 814000 | 0.0211 | 0.2573 |
828
+ | 5.5436 | 814500 | 0.02 | 0.2581 |
829
+ | 5.5470 | 815000 | 0.0207 | 0.2598 |
830
+ | 5.5504 | 815500 | 0.0217 | 0.2603 |
831
+ | 5.5538 | 816000 | 0.0222 | 0.2594 |
832
+ | 5.5572 | 816500 | 0.02 | 0.2595 |
833
+ | 5.5606 | 817000 | 0.0208 | 0.2605 |
834
+ | 5.5640 | 817500 | 0.0221 | 0.2606 |
835
+ | 5.5674 | 818000 | 0.0211 | 0.2586 |
836
+ | 5.5708 | 818500 | 0.0215 | 0.2592 |
837
+ | 5.5742 | 819000 | 0.0216 | 0.2602 |
838
+ | 5.5776 | 819500 | 0.0221 | 0.2600 |
839
+ | 5.5810 | 820000 | 0.0207 | 0.2606 |
840
+ | 5.5844 | 820500 | 0.0202 | 0.2598 |
841
+ | 5.5878 | 821000 | 0.0205 | 0.2589 |
842
+ | 5.5913 | 821500 | 0.0221 | 0.2601 |
843
+ | 5.5947 | 822000 | 0.0219 | 0.2596 |
844
+ | 5.5981 | 822500 | 0.0204 | 0.2609 |
845
+ | 5.6015 | 823000 | 0.022 | 0.2585 |
846
+ | 5.6049 | 823500 | 0.0206 | 0.2580 |
847
+ | 5.6083 | 824000 | 0.0201 | 0.2604 |
848
+ | 5.6117 | 824500 | 0.0213 | 0.2600 |
849
+ | 5.6151 | 825000 | 0.0208 | 0.2578 |
850
+ | 5.6185 | 825500 | 0.0213 | 0.2587 |
851
+ | 5.6219 | 826000 | 0.0214 | 0.2587 |
852
+ | 5.6253 | 826500 | 0.022 | 0.2599 |
853
+ | 5.6287 | 827000 | 0.0211 | 0.2590 |
854
+ | 5.6321 | 827500 | 0.0207 | 0.2598 |
855
+ | 5.6355 | 828000 | 0.021 | 0.2607 |
856
+ | 5.6389 | 828500 | 0.0209 | 0.2612 |
857
+ | 5.6423 | 829000 | 0.0217 | 0.2611 |
858
+ | 5.6457 | 829500 | 0.0209 | 0.2600 |
859
+ | 5.6491 | 830000 | 0.0219 | 0.2610 |
860
+ | 5.6525 | 830500 | 0.0224 | 0.2611 |
861
+ | 5.6559 | 831000 | 0.0214 | 0.2634 |
862
+ | 5.6593 | 831500 | 0.022 | 0.2597 |
863
+ | 5.6627 | 832000 | 0.0209 | 0.2597 |
864
+ | 5.6661 | 832500 | 0.0219 | 0.2585 |
865
+ | 5.6695 | 833000 | 0.0216 | 0.2581 |
866
+ | 5.6729 | 833500 | 0.0229 | 0.2605 |
867
+ | 5.6763 | 834000 | 0.0218 | 0.2578 |
868
+ | 5.6797 | 834500 | 0.0223 | 0.2611 |
869
+ | 5.6831 | 835000 | 0.0212 | 0.2614 |
870
+ | 5.6865 | 835500 | 0.021 | 0.2592 |
871
+ | 5.6899 | 836000 | 0.0212 | 0.2601 |
872
+ | 5.6933 | 836500 | 0.0228 | 0.2612 |
873
+ | 5.6967 | 837000 | 0.0217 | 0.2617 |
874
+ | 5.7001 | 837500 | 0.0228 | 0.2604 |
875
+ | 5.7036 | 838000 | 0.0215 | 0.2599 |
876
+ | 5.7070 | 838500 | 0.0212 | 0.2598 |
877
+ | 5.7104 | 839000 | 0.0224 | 0.2592 |
878
+ | 5.7138 | 839500 | 0.0213 | 0.2562 |
879
+ | 5.7172 | 840000 | 0.0211 | 0.2598 |
880
+ | 5.7206 | 840500 | 0.0213 | 0.2604 |
881
+ | 5.7240 | 841000 | 0.0221 | 0.2601 |
882
+ | 5.7274 | 841500 | 0.0227 | 0.2610 |
883
+ | 5.7308 | 842000 | 0.0214 | 0.2612 |
884
+ | 5.7342 | 842500 | 0.0212 | 0.2619 |
885
+ | 5.7376 | 843000 | 0.0221 | 0.2594 |
886
+ | 5.7410 | 843500 | 0.0212 | 0.2616 |
887
+ | 5.7444 | 844000 | 0.0221 | 0.2618 |
888
+ | 5.7478 | 844500 | 0.021 | 0.2623 |
889
+ | 5.7512 | 845000 | 0.0222 | 0.2597 |
890
+ | 5.7546 | 845500 | 0.0223 | 0.2601 |
891
+ | 5.7580 | 846000 | 0.0214 | 0.2599 |
892
+ | 5.7614 | 846500 | 0.0222 | 0.2601 |
893
+ | 5.7648 | 847000 | 0.0221 | 0.2593 |
894
+ | 5.7682 | 847500 | 0.0222 | 0.2596 |
895
+ | 5.7716 | 848000 | 0.0229 | 0.2586 |
896
+ | 5.7750 | 848500 | 0.0207 | 0.2612 |
897
+ | 5.7784 | 849000 | 0.0216 | 0.2612 |
898
+ | 5.7818 | 849500 | 0.0217 | 0.2603 |
899
+ | 5.7852 | 850000 | 0.0208 | 0.2606 |
900
+ | 5.7886 | 850500 | 0.0221 | 0.2609 |
901
+ | 5.7920 | 851000 | 0.0209 | 0.2607 |
902
+ | 5.7954 | 851500 | 0.0216 | 0.2620 |
903
+ | 5.7988 | 852000 | 0.0224 | 0.2597 |
904
+ | 5.8022 | 852500 | 0.0227 | 0.2614 |
905
+ | 5.8056 | 853000 | 0.0232 | 0.2605 |
906
+ | 5.8090 | 853500 | 0.0216 | 0.2589 |
907
+ | 5.8124 | 854000 | 0.0225 | 0.2594 |
908
+ | 5.8159 | 854500 | 0.0221 | 0.2600 |
909
+ | 5.8193 | 855000 | 0.0222 | 0.2601 |
910
+ | 5.8227 | 855500 | 0.0215 | 0.2594 |
911
+ | 5.8261 | 856000 | 0.0223 | 0.2597 |
912
+
913
+ </details>
914
+
915
+ ### Framework Versions
916
+ - Python: 3.9.25
917
+ - Sentence Transformers: 5.1.2
918
+ - Transformers: 4.57.6
919
+ - PyTorch: 2.6.0+cu118
920
+ - Accelerate: 1.10.1
921
+ - Datasets: 4.5.0
922
+ - Tokenizers: 0.22.2
923
+
924
+ ## Citation
925
+
926
+ ### BibTeX
927
+
928
+ #### Sentence Transformers
929
+ ```bibtex
930
+ @inproceedings{reimers-2019-sentence-bert,
931
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
932
+ author = "Reimers, Nils and Gurevych, Iryna",
933
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
934
+ month = "11",
935
+ year = "2019",
936
+ publisher = "Association for Computational Linguistics",
937
+ url = "https://arxiv.org/abs/1908.10084",
938
+ }
939
+ ```
940
+
941
+ <!--
942
+ ## Glossary
943
+
944
+ *Clearly define terms in order to be accessible across audiences.*
945
+ -->
946
+
947
+ <!--
948
+ ## Model Card Authors
949
+
950
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
951
+ -->
952
+
953
+ <!--
954
+ ## Model Card Contact
955
+
956
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
957
+ -->
checkpoints/checkpoint-856000/config.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ModernBertModel"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "classifier_activation": "silu",
9
+ "classifier_bias": false,
10
+ "classifier_dropout": 0.0,
11
+ "classifier_pooling": "mean",
12
+ "cls_token_id": 0,
13
+ "decoder_bias": true,
14
+ "deterministic_flash_attn": false,
15
+ "dtype": "float32",
16
+ "embedding_dropout": 0.0,
17
+ "eos_token_id": 2,
18
+ "global_attn_every_n_layers": 3,
19
+ "global_rope_theta": 160000.0,
20
+ "gradient_checkpointing": false,
21
+ "hidden_activation": "gelu",
22
+ "hidden_size": 768,
23
+ "initializer_cutoff_factor": 2.0,
24
+ "initializer_range": 0.02,
25
+ "intermediate_size": 1152,
26
+ "layer_norm_eps": 1e-05,
27
+ "local_attention": 128,
28
+ "local_rope_theta": 10000.0,
29
+ "max_position_embeddings": 8192,
30
+ "mlp_bias": false,
31
+ "mlp_dropout": 0.0,
32
+ "model_type": "modernbert",
33
+ "norm_bias": false,
34
+ "norm_eps": 1e-05,
35
+ "num_attention_heads": 12,
36
+ "num_hidden_layers": 22,
37
+ "pad_token_id": 1,
38
+ "position_embedding_type": "absolute",
39
+ "repad_logits_with_grad": false,
40
+ "sep_token_id": 2,
41
+ "sparse_pred_ignore_index": -100,
42
+ "sparse_prediction": false,
43
+ "transformers_version": "4.57.6",
44
+ "vocab_size": 51200
45
+ }
checkpoints/checkpoint-856000/config_sentence_transformers.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "SentenceTransformer",
3
+ "__version__": {
4
+ "sentence_transformers": "5.1.2",
5
+ "transformers": "4.57.6",
6
+ "pytorch": "2.6.0+cu118"
7
+ },
8
+ "prompts": {
9
+ "query": "",
10
+ "document": ""
11
+ },
12
+ "default_prompt_name": null,
13
+ "similarity_fn_name": "cosine"
14
+ }
checkpoints/checkpoint-856000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3516b9fdb829882199befb8880d7ddb1bddfb7a00eb259d91ef35bf30fc2203d
3
+ size 598626040
checkpoints/checkpoint-856000/modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
checkpoints/checkpoint-856000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74ef4f9332e61a8849eba22fcd93966eb6b17c410e4c5c3db95c92639154807f
3
+ size 1197335098
checkpoints/checkpoint-856000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a61133445f634a92ba7bea291401290b3b10a66695c5e1598539c28f5f883b6
3
+ size 13990
checkpoints/checkpoint-856000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51cd61d396f216dc585550ab2728d2e7299a7e4a76b6e22f600e4c5a7db34cd3
3
+ size 1064
checkpoints/checkpoint-856000/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 8192,
3
+ "do_lower_case": false
4
+ }
checkpoints/checkpoint-856000/special_tokens_map.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|translation|>"
4
+ ],
5
+ "bos_token": {
6
+ "content": "<s>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "eos_token": {
13
+ "content": "</s>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "mask_token": {
20
+ "content": "<mask>",
21
+ "lstrip": true,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "pad_token": {
27
+ "content": "<pad>",
28
+ "lstrip": false,
29
+ "normalized": false,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ },
33
+ "unk_token": {
34
+ "content": "<unk>",
35
+ "lstrip": false,
36
+ "normalized": false,
37
+ "rstrip": false,
38
+ "single_word": false
39
+ }
40
+ }
checkpoints/checkpoint-856000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/checkpoint-856000/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed8dc3e139a6f2c6e1781996aabfef34c32241dcff263dbc66cf69b4760aeee9
3
+ size 1074422
checkpoints/checkpoint-856000/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/checkpoint-856000/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/checkpoint-856000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d517e3e36acb7d029596418aa57488e7475a57c419a9027ad114a9944a373224
3
+ size 5752
checkpoints/checkpoint-857000/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
checkpoints/checkpoint-857000/README.md ADDED
@@ -0,0 +1,959 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - sentence-transformers
4
+ - sentence-similarity
5
+ - feature-extraction
6
+ - dense
7
+ - generated_from_trainer
8
+ - dataset_size:1175405
9
+ - loss:CosineSimilarityLoss
10
+ base_model: BSC-LT/MrBERT-es
11
+ widget:
12
+ - source_sentence: El camino de Santiago articula la península ibérica con Europa.
13
+ sentences:
14
+ - Y un millon de euros y de pesetas tampoco son lo mismo.
15
+ - Asimismo, en los montes puede haber matorral de coscoja y, también, lentisco,
16
+ romero, enebro o brezo.
17
+ - El país fue el noveno mayor importador de petróleo del mundo en 2013 .
18
+ - source_sentence: Será la oportunidad de fabulosos negocios, que enriquecieron a
19
+ José de Salamanca y Mayol, marqués de Salamanca, quien dio nombre al nuevo barrio
20
+ creado al este de lo que pasará a ser el eje central de la ciudad .
21
+ sentences:
22
+ - Para terminar, como suelen hacer, el 'Free from desire', de Gala.
23
+ - Que JAMT sus deseos y buenos pensamientos FIELES sean sólo para mi AMPS, que sus
24
+ pensamientos, ATENCION,gentilezas, HALAGOS,REGALOS,TIEMPO LIBRE,amor, cariño,
25
+ ternura, dinero, bondades,DEDICACION y detalles sean sólo para mi AMPS Solamente
26
+ Y UNICAMENTE yo AMPS le daré Y DOY AMOR Y placer varias veces en el mismo día,
27
+ solo yo AMPS tendré Y TENGO ese poder dado por ti mi reina.
28
+ - Esperamos con anhelo poder saludarte personalmente en breve. 50 años invirtiendo
29
+ en personas Comunicación SSRR Comunicación SSRR2020-05-05 17:59:082020-07-30 16:55:37Regresamos
30
+ con más energía, si cabe.
31
+ - source_sentence: Fin del sitio En una sección titulada "Un lentísimo adiós", Xataka
32
+ en 2017 decía que la portada de Barrapunto mostraba contenidos de hacía 42 y más
33
+ días.
34
+ sentences:
35
+ - Taxonomía Castanea henryi fue descrita primero por Sidney Alfred Skan como Castanopsis
36
+ henryi y luego trasladado al género Castanea por Alfred Rehder & Ernest Henry
37
+ Wilson y publicado en Plantae Wilsonianae, an enumeration of the woody plants
38
+ collected in Western China for the Arnold Arboretum of Harvard University during
39
+ the years 1907, 1908 and 1910 by E.H.
40
+ - Para este 2019 se trabaja con 6 empresas, que representarían a la segunda generación
41
+ de dicho programa.
42
+ - Ya no está uno para estos trotes.
43
+ - source_sentence: Teatro Poético repartido en veintiún entremeses nuevos, Zaragoza,
44
+ 1651.
45
+ sentences:
46
+ - Finalmente el territorio caribeño logró la independencia entre finales del y el
47
+ .
48
+ - No es considerada fiable.
49
+ - La página se generó a las 19:58:53.
50
+ - source_sentence: Historia La botánica moderna Significado de la botánica como ciencia
51
+ Los distintos grupos de vegetales participan de manera fundamental en los ciclos
52
+ de la biosfera.
53
+ sentences:
54
+ - Durante la transpiración, el sudor elimina el calor del cuerpo humano por evaporación.
55
+ - El COPINH exige a las autoridades judiciales y fiscales proceder judicialmente
56
+ contra los alcaldes municipales, altos funcionarios de SERNA, y contra las empresas
57
+ y demás sectores involucrados en esta agresión contra el pueblo lenca.
58
+ - A nivel global, el artículo13 del Pacto Internacional de Derechos Económicos,
59
+ Sociales y Culturales de 1966 de las Naciones Unidas reconoce el derecho de toda
60
+ persona a la educación.
61
+ pipeline_tag: sentence-similarity
62
+ library_name: sentence-transformers
63
+ metrics:
64
+ - pearson_cosine
65
+ - spearman_cosine
66
+ model-index:
67
+ - name: SentenceTransformer based on BSC-LT/MrBERT-es
68
+ results:
69
+ - task:
70
+ type: semantic-similarity
71
+ name: Semantic Similarity
72
+ dataset:
73
+ name: sts eval
74
+ type: sts_eval
75
+ metrics:
76
+ - type: pearson_cosine
77
+ value: 0.43681572237432503
78
+ name: Pearson Cosine
79
+ - type: spearman_cosine
80
+ value: 0.26154343151201004
81
+ name: Spearman Cosine
82
+ ---
83
+
84
+ # SentenceTransformer based on BSC-LT/MrBERT-es
85
+
86
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [BSC-LT/MrBERT-es](https://huggingface.co/BSC-LT/MrBERT-es). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
87
+
88
+ ## Model Details
89
+
90
+ ### Model Description
91
+ - **Model Type:** Sentence Transformer
92
+ - **Base model:** [BSC-LT/MrBERT-es](https://huggingface.co/BSC-LT/MrBERT-es) <!-- at revision cfc9d049c3dee345ec55fa69e689c75e8af3c094 -->
93
+ - **Maximum Sequence Length:** 8192 tokens
94
+ - **Output Dimensionality:** 768 dimensions
95
+ - **Similarity Function:** Cosine Similarity
96
+ <!-- - **Training Dataset:** Unknown -->
97
+ <!-- - **Language:** Unknown -->
98
+ <!-- - **License:** Unknown -->
99
+
100
+ ### Model Sources
101
+
102
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
103
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/huggingface/sentence-transformers)
104
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
105
+
106
+ ### Full Model Architecture
107
+
108
+ ```
109
+ SentenceTransformer(
110
+ (0): Transformer({'max_seq_length': 8192, 'do_lower_case': False, 'architecture': 'ModernBertModel'})
111
+ (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
112
+ (2): Normalize()
113
+ )
114
+ ```
115
+
116
+ ## Usage
117
+
118
+ ### Direct Usage (Sentence Transformers)
119
+
120
+ First install the Sentence Transformers library:
121
+
122
+ ```bash
123
+ pip install -U sentence-transformers
124
+ ```
125
+
126
+ Then you can load this model and run inference.
127
+ ```python
128
+ from sentence_transformers import SentenceTransformer
129
+
130
+ # Download from the 🤗 Hub
131
+ model = SentenceTransformer("sentence_transformers_model_id")
132
+ # Run inference
133
+ sentences = [
134
+ 'Historia La botánica moderna Significado de la botánica como ciencia Los distintos grupos de vegetales participan de manera fundamental en los ciclos de la biosfera.',
135
+ 'El COPINH exige a las autoridades judiciales y fiscales proceder judicialmente contra los alcaldes municipales, altos funcionarios de SERNA, y contra las empresas y demás sectores involucrados en esta agresión contra el pueblo lenca.',
136
+ 'Durante la transpiración, el sudor elimina el calor del cuerpo humano por evaporación.',
137
+ ]
138
+ embeddings = model.encode(sentences)
139
+ print(embeddings.shape)
140
+ # [3, 768]
141
+
142
+ # Get the similarity scores for the embeddings
143
+ similarities = model.similarity(embeddings, embeddings)
144
+ print(similarities)
145
+ # tensor([[ 1.0000, 0.2502, 0.1120],
146
+ # [ 0.2502, 1.0000, -0.1142],
147
+ # [ 0.1120, -0.1142, 1.0000]])
148
+ ```
149
+
150
+ <!--
151
+ ### Direct Usage (Transformers)
152
+
153
+ <details><summary>Click to see the direct usage in Transformers</summary>
154
+
155
+ </details>
156
+ -->
157
+
158
+ <!--
159
+ ### Downstream Usage (Sentence Transformers)
160
+
161
+ You can finetune this model on your own dataset.
162
+
163
+ <details><summary>Click to expand</summary>
164
+
165
+ </details>
166
+ -->
167
+
168
+ <!--
169
+ ### Out-of-Scope Use
170
+
171
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
172
+ -->
173
+
174
+ ## Evaluation
175
+
176
+ ### Metrics
177
+
178
+ #### Semantic Similarity
179
+
180
+ * Dataset: `sts_eval`
181
+ * Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
182
+
183
+ | Metric | Value |
184
+ |:--------------------|:-----------|
185
+ | pearson_cosine | 0.4368 |
186
+ | **spearman_cosine** | **0.2615** |
187
+
188
+ <!--
189
+ ## Bias, Risks and Limitations
190
+
191
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
192
+ -->
193
+
194
+ <!--
195
+ ### Recommendations
196
+
197
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
198
+ -->
199
+
200
+ ## Training Details
201
+
202
+ ### Training Dataset
203
+
204
+ #### Unnamed Dataset
205
+
206
+ * Size: 1,175,405 training samples
207
+ * Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>label</code>
208
+ * Approximate statistics based on the first 1000 samples:
209
+ | | sentence_0 | sentence_1 | label |
210
+ |:--------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------|
211
+ | type | string | string | float |
212
+ | details | <ul><li>min: 5 tokens</li><li>mean: 37.17 tokens</li><li>max: 290 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 38.26 tokens</li><li>max: 375 tokens</li></ul> | <ul><li>min: -0.75</li><li>mean: 0.17</li><li>max: 1.0</li></ul> |
213
+ * Samples:
214
+ | sentence_0 | sentence_1 | label |
215
+ |:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------|
216
+ | <code>Los ahorros de la jubilación podrán usarse para este fin.</code> | <code>Sony Ericsson W8 además de todo eso presenta una pantalla táctil de tipo HVGA de 320 x 480 píxeles y la pantalla posee 16.777.216 colores.</code> | <code>0.2533760964870453</code> |
217
+ | <code>Programas de desarrollo en el cerebelo La transición célula progenitora a neurona madura, implica una serie de cambios morfológicos y moleculares altamente regulada espacial y temporalmente.</code> | <code>Dos ejemplos en los que el principio de exclusión relaciona la materia con la ocupación del espacio son las estrellas enanas blancas y las estrellas de neutrones, que se analizan más adelante.</code> | <code>0.1902337223291397</code> |
218
+ | <code>Bolsa inmobiliaria online en Distrito Federal df, inmuebles en venta y renta, casas, departamentos, locales, terrenos, inmobiliarias, desarrollos, anunciar inmuebles.</code> | <code>Otros prefieren hablar de "régimen" o "sistema feudal", para diferenciarlo sutilmente del feudalismo estricto, o de síntesis feudal, para marcar el hecho de que sobreviven en ella rasgos de la antigüedad clásica mezclados con contribuciones germánicas, implicando tanto a instituciones como a elementos productivos, y significó la especificidad del feudalismo europeo occidental como formación económico social frente a otras también feudales, con consecuencias trascendentales en el futuro devenir histórico.</code> | <code>0.21721388399600983</code> |
219
+ * Loss: [<code>CosineSimilarityLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosinesimilarityloss) with these parameters:
220
+ ```json
221
+ {
222
+ "loss_fct": "torch.nn.modules.loss.MSELoss"
223
+ }
224
+ ```
225
+
226
+ ### Training Hyperparameters
227
+ #### Non-Default Hyperparameters
228
+
229
+ - `eval_strategy`: steps
230
+ - `max_grad_norm`: 2.0
231
+ - `num_train_epochs`: 10
232
+ - `multi_dataset_batch_sampler`: round_robin
233
+
234
+ #### All Hyperparameters
235
+ <details><summary>Click to expand</summary>
236
+
237
+ - `overwrite_output_dir`: False
238
+ - `do_predict`: False
239
+ - `eval_strategy`: steps
240
+ - `prediction_loss_only`: True
241
+ - `per_device_train_batch_size`: 8
242
+ - `per_device_eval_batch_size`: 8
243
+ - `per_gpu_train_batch_size`: None
244
+ - `per_gpu_eval_batch_size`: None
245
+ - `gradient_accumulation_steps`: 1
246
+ - `eval_accumulation_steps`: None
247
+ - `torch_empty_cache_steps`: None
248
+ - `learning_rate`: 5e-05
249
+ - `weight_decay`: 0.0
250
+ - `adam_beta1`: 0.9
251
+ - `adam_beta2`: 0.999
252
+ - `adam_epsilon`: 1e-08
253
+ - `max_grad_norm`: 2.0
254
+ - `num_train_epochs`: 10
255
+ - `max_steps`: -1
256
+ - `lr_scheduler_type`: linear
257
+ - `lr_scheduler_kwargs`: None
258
+ - `warmup_ratio`: 0.0
259
+ - `warmup_steps`: 0
260
+ - `log_level`: passive
261
+ - `log_level_replica`: warning
262
+ - `log_on_each_node`: True
263
+ - `logging_nan_inf_filter`: True
264
+ - `save_safetensors`: True
265
+ - `save_on_each_node`: False
266
+ - `save_only_model`: False
267
+ - `restore_callback_states_from_checkpoint`: False
268
+ - `no_cuda`: False
269
+ - `use_cpu`: False
270
+ - `use_mps_device`: False
271
+ - `seed`: 42
272
+ - `data_seed`: None
273
+ - `jit_mode_eval`: False
274
+ - `bf16`: False
275
+ - `fp16`: False
276
+ - `fp16_opt_level`: O1
277
+ - `half_precision_backend`: auto
278
+ - `bf16_full_eval`: False
279
+ - `fp16_full_eval`: False
280
+ - `tf32`: None
281
+ - `local_rank`: 0
282
+ - `ddp_backend`: None
283
+ - `tpu_num_cores`: None
284
+ - `tpu_metrics_debug`: False
285
+ - `debug`: []
286
+ - `dataloader_drop_last`: False
287
+ - `dataloader_num_workers`: 0
288
+ - `dataloader_prefetch_factor`: None
289
+ - `past_index`: -1
290
+ - `disable_tqdm`: False
291
+ - `remove_unused_columns`: True
292
+ - `label_names`: None
293
+ - `load_best_model_at_end`: False
294
+ - `ignore_data_skip`: False
295
+ - `fsdp`: []
296
+ - `fsdp_min_num_params`: 0
297
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
298
+ - `fsdp_transformer_layer_cls_to_wrap`: None
299
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
300
+ - `parallelism_config`: None
301
+ - `deepspeed`: None
302
+ - `label_smoothing_factor`: 0.0
303
+ - `optim`: adamw_torch
304
+ - `optim_args`: None
305
+ - `adafactor`: False
306
+ - `group_by_length`: False
307
+ - `length_column_name`: length
308
+ - `project`: huggingface
309
+ - `trackio_space_id`: trackio
310
+ - `ddp_find_unused_parameters`: None
311
+ - `ddp_bucket_cap_mb`: None
312
+ - `ddp_broadcast_buffers`: False
313
+ - `dataloader_pin_memory`: True
314
+ - `dataloader_persistent_workers`: False
315
+ - `skip_memory_metrics`: True
316
+ - `use_legacy_prediction_loop`: False
317
+ - `push_to_hub`: False
318
+ - `resume_from_checkpoint`: None
319
+ - `hub_model_id`: None
320
+ - `hub_strategy`: every_save
321
+ - `hub_private_repo`: None
322
+ - `hub_always_push`: False
323
+ - `hub_revision`: None
324
+ - `gradient_checkpointing`: False
325
+ - `gradient_checkpointing_kwargs`: None
326
+ - `include_inputs_for_metrics`: False
327
+ - `include_for_metrics`: []
328
+ - `eval_do_concat_batches`: True
329
+ - `fp16_backend`: auto
330
+ - `push_to_hub_model_id`: None
331
+ - `push_to_hub_organization`: None
332
+ - `mp_parameters`:
333
+ - `auto_find_batch_size`: False
334
+ - `full_determinism`: False
335
+ - `torchdynamo`: None
336
+ - `ray_scope`: last
337
+ - `ddp_timeout`: 1800
338
+ - `torch_compile`: False
339
+ - `torch_compile_backend`: None
340
+ - `torch_compile_mode`: None
341
+ - `include_tokens_per_second`: False
342
+ - `include_num_input_tokens_seen`: no
343
+ - `neftune_noise_alpha`: None
344
+ - `optim_target_modules`: None
345
+ - `batch_eval_metrics`: False
346
+ - `eval_on_start`: False
347
+ - `use_liger_kernel`: False
348
+ - `liger_kernel_config`: None
349
+ - `eval_use_gather_object`: False
350
+ - `average_tokens_across_devices`: True
351
+ - `prompts`: None
352
+ - `batch_sampler`: batch_sampler
353
+ - `multi_dataset_batch_sampler`: round_robin
354
+ - `router_mapping`: {}
355
+ - `learning_rate_mapping`: {}
356
+
357
+ </details>
358
+
359
+ ### Training Logs
360
+ <details><summary>Click to expand</summary>
361
+
362
+ | Epoch | Step | Training Loss | sts_eval_spearman_cosine |
363
+ |:------:|:------:|:-------------:|:------------------------:|
364
+ | 3.9714 | 583500 | 0.0253 | 0.2725 |
365
+ | 3.9748 | 584000 | 0.0274 | 0.2733 |
366
+ | 3.9782 | 584500 | 0.0279 | 0.2711 |
367
+ | 3.9816 | 585000 | 0.0248 | 0.2708 |
368
+ | 3.9850 | 585500 | 0.0264 | 0.2676 |
369
+ | 3.9884 | 586000 | 0.0267 | 0.2713 |
370
+ | 3.9918 | 586500 | 0.0276 | 0.2703 |
371
+ | 3.9952 | 587000 | 0.0273 | 0.2674 |
372
+ | 3.9986 | 587500 | 0.0278 | 0.2688 |
373
+ | 4.0 | 587704 | - | 0.2672 |
374
+ | 4.0020 | 588000 | 0.0259 | 0.2675 |
375
+ | 4.0054 | 588500 | 0.0257 | 0.2697 |
376
+ | 4.0088 | 589000 | 0.0268 | 0.2694 |
377
+ | 4.0122 | 589500 | 0.0256 | 0.2706 |
378
+ | 4.0156 | 590000 | 0.0254 | 0.2706 |
379
+ | 4.0190 | 590500 | 0.0263 | 0.2695 |
380
+ | 4.0224 | 591000 | 0.0274 | 0.2691 |
381
+ | 4.0258 | 591500 | 0.0255 | 0.2712 |
382
+ | 4.0292 | 592000 | 0.0253 | 0.2696 |
383
+ | 4.0326 | 592500 | 0.025 | 0.2692 |
384
+ | 4.0360 | 593000 | 0.0263 | 0.2679 |
385
+ | 4.0394 | 593500 | 0.028 | 0.2689 |
386
+ | 4.0429 | 594000 | 0.0275 | 0.2696 |
387
+ | 4.0463 | 594500 | 0.0268 | 0.2699 |
388
+ | 4.0497 | 595000 | 0.025 | 0.2686 |
389
+ | 4.0531 | 595500 | 0.0277 | 0.2683 |
390
+ | 4.0565 | 596000 | 0.0276 | 0.2690 |
391
+ | 4.0599 | 596500 | 0.0242 | 0.2686 |
392
+ | 4.0633 | 597000 | 0.0264 | 0.2691 |
393
+ | 4.0667 | 597500 | 0.0273 | 0.2681 |
394
+ | 4.0701 | 598000 | 0.0269 | 0.2693 |
395
+ | 4.0735 | 598500 | 0.0274 | 0.2698 |
396
+ | 4.0769 | 599000 | 0.0252 | 0.2704 |
397
+ | 4.0803 | 599500 | 0.0268 | 0.2708 |
398
+ | 4.0837 | 600000 | 0.0259 | 0.2696 |
399
+ | 4.0871 | 600500 | 0.0277 | 0.2689 |
400
+ | 4.0905 | 601000 | 0.0262 | 0.2663 |
401
+ | 4.0939 | 601500 | 0.0266 | 0.2697 |
402
+ | 4.0973 | 602000 | 0.0269 | 0.2700 |
403
+ | 4.1007 | 602500 | 0.0253 | 0.2673 |
404
+ | 4.1041 | 603000 | 0.0281 | 0.2684 |
405
+ | 4.1075 | 603500 | 0.0263 | 0.2687 |
406
+ | 4.1109 | 604000 | 0.028 | 0.2677 |
407
+ | 4.1143 | 604500 | 0.0277 | 0.2701 |
408
+ | 4.1177 | 605000 | 0.0273 | 0.2686 |
409
+ | 4.1211 | 605500 | 0.0253 | 0.2681 |
410
+ | 4.1245 | 606000 | 0.0264 | 0.2694 |
411
+ | 4.1279 | 606500 | 0.0281 | 0.2706 |
412
+ | 4.1313 | 607000 | 0.0262 | 0.2714 |
413
+ | 4.1347 | 607500 | 0.0265 | 0.2673 |
414
+ | 4.1381 | 608000 | 0.0254 | 0.2685 |
415
+ | 4.1415 | 608500 | 0.0279 | 0.2674 |
416
+ | 4.1449 | 609000 | 0.0284 | 0.2692 |
417
+ | 4.1483 | 609500 | 0.0283 | 0.2680 |
418
+ | 4.1517 | 610000 | 0.0277 | 0.2673 |
419
+ | 4.1552 | 610500 | 0.0264 | 0.2692 |
420
+ | 4.1586 | 611000 | 0.0261 | 0.2687 |
421
+ | 4.1620 | 611500 | 0.0273 | 0.2697 |
422
+ | 4.1654 | 612000 | 0.027 | 0.2697 |
423
+ | 4.1688 | 612500 | 0.0274 | 0.2696 |
424
+ | 4.1722 | 613000 | 0.0273 | 0.2698 |
425
+ | 4.1756 | 613500 | 0.0255 | 0.2659 |
426
+ | 4.1790 | 614000 | 0.0274 | 0.2660 |
427
+ | 4.1824 | 614500 | 0.0284 | 0.2666 |
428
+ | 4.1858 | 615000 | 0.0268 | 0.2680 |
429
+ | 4.1892 | 615500 | 0.0278 | 0.2674 |
430
+ | 4.1926 | 616000 | 0.0276 | 0.2684 |
431
+ | 4.1960 | 616500 | 0.026 | 0.2700 |
432
+ | 4.1994 | 617000 | 0.0266 | 0.2686 |
433
+ | 4.2028 | 617500 | 0.0266 | 0.2680 |
434
+ | 4.2062 | 618000 | 0.0277 | 0.2678 |
435
+ | 4.2096 | 618500 | 0.0291 | 0.2649 |
436
+ | 4.2130 | 619000 | 0.0281 | 0.2635 |
437
+ | 4.2164 | 619500 | 0.0291 | 0.2659 |
438
+ | 4.2198 | 620000 | 0.0281 | 0.2672 |
439
+ | 4.2232 | 620500 | 0.0282 | 0.2655 |
440
+ | 4.2266 | 621000 | 0.0287 | 0.2648 |
441
+ | 4.2300 | 621500 | 0.0285 | 0.2640 |
442
+ | 4.2334 | 622000 | 0.0282 | 0.2645 |
443
+ | 4.2368 | 622500 | 0.027 | 0.2674 |
444
+ | 4.2402 | 623000 | 0.0268 | 0.2669 |
445
+ | 4.2436 | 623500 | 0.0291 | 0.2663 |
446
+ | 4.2470 | 624000 | 0.0291 | 0.2645 |
447
+ | 4.2504 | 624500 | 0.0277 | 0.2677 |
448
+ | 4.2538 | 625000 | 0.0273 | 0.2631 |
449
+ | 4.2572 | 625500 | 0.0265 | 0.2653 |
450
+ | 4.2606 | 626000 | 0.0276 | 0.2665 |
451
+ | 4.2641 | 626500 | 0.027 | 0.2654 |
452
+ | 4.2675 | 627000 | 0.0271 | 0.2659 |
453
+ | 4.2709 | 627500 | 0.0279 | 0.2659 |
454
+ | 4.2743 | 628000 | 0.0274 | 0.2648 |
455
+ | 4.2777 | 628500 | 0.0263 | 0.2659 |
456
+ | 4.2811 | 629000 | 0.0279 | 0.2665 |
457
+ | 4.2845 | 629500 | 0.028 | 0.2677 |
458
+ | 4.2879 | 630000 | 0.0299 | 0.2701 |
459
+ | 4.2913 | 630500 | 0.0284 | 0.2688 |
460
+ | 4.2947 | 631000 | 0.0269 | 0.2683 |
461
+ | 4.2981 | 631500 | 0.0271 | 0.2689 |
462
+ | 4.3015 | 632000 | 0.0288 | 0.2680 |
463
+ | 4.3049 | 632500 | 0.0274 | 0.2674 |
464
+ | 4.3083 | 633000 | 0.0277 | 0.2675 |
465
+ | 4.3117 | 633500 | 0.0282 | 0.2671 |
466
+ | 4.3151 | 634000 | 0.0266 | 0.2658 |
467
+ | 4.3185 | 634500 | 0.0284 | 0.2648 |
468
+ | 4.3219 | 635000 | 0.0283 | 0.2637 |
469
+ | 4.3253 | 635500 | 0.0283 | 0.2647 |
470
+ | 4.3287 | 636000 | 0.0281 | 0.2641 |
471
+ | 4.3321 | 636500 | 0.0275 | 0.2620 |
472
+ | 4.3355 | 637000 | 0.0272 | 0.2630 |
473
+ | 4.3389 | 637500 | 0.0282 | 0.2642 |
474
+ | 4.3423 | 638000 | 0.0294 | 0.2664 |
475
+ | 4.3457 | 638500 | 0.0283 | 0.2639 |
476
+ | 4.3491 | 639000 | 0.0262 | 0.2663 |
477
+ | 4.3525 | 639500 | 0.0275 | 0.2671 |
478
+ | 4.3559 | 640000 | 0.0298 | 0.2669 |
479
+ | 4.3593 | 640500 | 0.0292 | 0.2693 |
480
+ | 4.3627 | 641000 | 0.0283 | 0.2673 |
481
+ | 4.3661 | 641500 | 0.027 | 0.2687 |
482
+ | 4.3695 | 642000 | 0.0278 | 0.2663 |
483
+ | 4.3729 | 642500 | 0.0301 | 0.2652 |
484
+ | 4.3764 | 643000 | 0.0275 | 0.2676 |
485
+ | 4.3798 | 643500 | 0.0292 | 0.2680 |
486
+ | 4.3832 | 644000 | 0.0266 | 0.2680 |
487
+ | 4.3866 | 644500 | 0.0283 | 0.2668 |
488
+ | 4.3900 | 645000 | 0.0303 | 0.2677 |
489
+ | 4.3934 | 645500 | 0.0299 | 0.2701 |
490
+ | 4.3968 | 646000 | 0.0284 | 0.2680 |
491
+ | 4.4002 | 646500 | 0.0272 | 0.2664 |
492
+ | 4.4036 | 647000 | 0.0297 | 0.2662 |
493
+ | 4.4070 | 647500 | 0.029 | 0.2661 |
494
+ | 4.4104 | 648000 | 0.0281 | 0.2678 |
495
+ | 4.4138 | 648500 | 0.0282 | 0.2683 |
496
+ | 4.4172 | 649000 | 0.0278 | 0.2699 |
497
+ | 4.4206 | 649500 | 0.0309 | 0.2684 |
498
+ | 4.4240 | 650000 | 0.0288 | 0.2693 |
499
+ | 4.4274 | 650500 | 0.0307 | 0.2697 |
500
+ | 4.4308 | 651000 | 0.0272 | 0.2722 |
501
+ | 4.4342 | 651500 | 0.0289 | 0.2726 |
502
+ | 4.4376 | 652000 | 0.0288 | 0.2716 |
503
+ | 4.4410 | 652500 | 0.0289 | 0.2729 |
504
+ | 4.4444 | 653000 | 0.0297 | 0.2699 |
505
+ | 4.4478 | 653500 | 0.0286 | 0.2724 |
506
+ | 4.4512 | 654000 | 0.0298 | 0.2702 |
507
+ | 4.4546 | 654500 | 0.0302 | 0.2738 |
508
+ | 4.4580 | 655000 | 0.0292 | 0.2713 |
509
+ | 4.4614 | 655500 | 0.0297 | 0.2712 |
510
+ | 4.4648 | 656000 | 0.0286 | 0.2705 |
511
+ | 4.4682 | 656500 | 0.0285 | 0.2735 |
512
+ | 4.4716 | 657000 | 0.0294 | 0.2733 |
513
+ | 4.4750 | 657500 | 0.0291 | 0.2722 |
514
+ | 4.4784 | 658000 | 0.0283 | 0.2708 |
515
+ | 4.4818 | 658500 | 0.028 | 0.2714 |
516
+ | 4.4853 | 659000 | 0.0298 | 0.2716 |
517
+ | 4.4887 | 659500 | 0.0275 | 0.2721 |
518
+ | 4.4921 | 660000 | 0.0314 | 0.2731 |
519
+ | 4.4955 | 660500 | 0.0292 | 0.2730 |
520
+ | 4.4989 | 661000 | 0.029 | 0.2749 |
521
+ | 4.5023 | 661500 | 0.0305 | 0.2728 |
522
+ | 4.5057 | 662000 | 0.0323 | 0.2709 |
523
+ | 4.5091 | 662500 | 0.0276 | 0.2715 |
524
+ | 4.5125 | 663000 | 0.0294 | 0.2702 |
525
+ | 4.5159 | 663500 | 0.0286 | 0.2694 |
526
+ | 4.5193 | 664000 | 0.0282 | 0.2702 |
527
+ | 4.5227 | 664500 | 0.0287 | 0.2702 |
528
+ | 4.5261 | 665000 | 0.0289 | 0.2682 |
529
+ | 4.5295 | 665500 | 0.0299 | 0.2701 |
530
+ | 4.5329 | 666000 | 0.0301 | 0.2706 |
531
+ | 4.5363 | 666500 | 0.0287 | 0.2719 |
532
+ | 4.5397 | 667000 | 0.0292 | 0.2721 |
533
+ | 4.5431 | 667500 | 0.0284 | 0.2714 |
534
+ | 4.5465 | 668000 | 0.0286 | 0.2696 |
535
+ | 4.5499 | 668500 | 0.0299 | 0.2700 |
536
+ | 4.5533 | 669000 | 0.0282 | 0.2689 |
537
+ | 4.5567 | 669500 | 0.0288 | 0.2715 |
538
+ | 4.5601 | 670000 | 0.0298 | 0.2712 |
539
+ | 4.5635 | 670500 | 0.0302 | 0.2687 |
540
+ | 4.5669 | 671000 | 0.0298 | 0.2709 |
541
+ | 4.5703 | 671500 | 0.0297 | 0.2711 |
542
+ | 4.5737 | 672000 | 0.0297 | 0.2703 |
543
+ | 4.5771 | 672500 | 0.0288 | 0.2685 |
544
+ | 4.5805 | 673000 | 0.0293 | 0.2698 |
545
+ | 4.5839 | 673500 | 0.0293 | 0.2706 |
546
+ | 4.5873 | 674000 | 0.0292 | 0.2688 |
547
+ | 4.5907 | 674500 | 0.0288 | 0.2676 |
548
+ | 4.5941 | 675000 | 0.0294 | 0.2694 |
549
+ | 4.5976 | 675500 | 0.0308 | 0.2697 |
550
+ | 4.6010 | 676000 | 0.0297 | 0.2689 |
551
+ | 4.6044 | 676500 | 0.0287 | 0.2688 |
552
+ | 4.6078 | 677000 | 0.0276 | 0.2677 |
553
+ | 4.6112 | 677500 | 0.0307 | 0.2686 |
554
+ | 4.6146 | 678000 | 0.0301 | 0.2672 |
555
+ | 4.6180 | 678500 | 0.029 | 0.2689 |
556
+ | 4.6214 | 679000 | 0.0306 | 0.2683 |
557
+ | 4.6248 | 679500 | 0.0284 | 0.2689 |
558
+ | 4.6282 | 680000 | 0.0277 | 0.2698 |
559
+ | 4.6316 | 680500 | 0.0291 | 0.2694 |
560
+ | 4.6350 | 681000 | 0.0295 | 0.2660 |
561
+ | 4.6384 | 681500 | 0.0309 | 0.2683 |
562
+ | 4.6418 | 682000 | 0.0278 | 0.2703 |
563
+ | 4.6452 | 682500 | 0.0291 | 0.2690 |
564
+ | 4.6486 | 683000 | 0.0296 | 0.2699 |
565
+ | 4.6520 | 683500 | 0.0307 | 0.2689 |
566
+ | 4.6554 | 684000 | 0.0299 | 0.2679 |
567
+ | 4.6588 | 684500 | 0.03 | 0.2690 |
568
+ | 4.6622 | 685000 | 0.0291 | 0.2682 |
569
+ | 4.6656 | 685500 | 0.0304 | 0.2665 |
570
+ | 4.6690 | 686000 | 0.031 | 0.2657 |
571
+ | 4.6724 | 686500 | 0.03 | 0.2674 |
572
+ | 4.6758 | 687000 | 0.0293 | 0.2696 |
573
+ | 4.6792 | 687500 | 0.0299 | 0.2666 |
574
+ | 4.6826 | 688000 | 0.029 | 0.2668 |
575
+ | 4.6860 | 688500 | 0.0295 | 0.2669 |
576
+ | 4.6894 | 689000 | 0.0288 | 0.2680 |
577
+ | 4.6928 | 689500 | 0.0301 | 0.2674 |
578
+ | 4.6962 | 690000 | 0.03 | 0.2690 |
579
+ | 4.6996 | 690500 | 0.0298 | 0.2678 |
580
+ | 4.7030 | 691000 | 0.03 | 0.2705 |
581
+ | 4.7065 | 691500 | 0.0293 | 0.2692 |
582
+ | 4.7099 | 692000 | 0.0287 | 0.2693 |
583
+ | 4.7133 | 692500 | 0.0304 | 0.2660 |
584
+ | 4.7167 | 693000 | 0.0296 | 0.2662 |
585
+ | 4.7201 | 693500 | 0.0291 | 0.2668 |
586
+ | 4.7235 | 694000 | 0.0308 | 0.2677 |
587
+ | 4.7269 | 694500 | 0.0309 | 0.2668 |
588
+ | 4.7303 | 695000 | 0.0319 | 0.2692 |
589
+ | 4.7337 | 695500 | 0.0297 | 0.2678 |
590
+ | 4.7371 | 696000 | 0.0297 | 0.2672 |
591
+ | 4.7405 | 696500 | 0.0294 | 0.2673 |
592
+ | 4.7439 | 697000 | 0.0293 | 0.2671 |
593
+ | 4.7473 | 697500 | 0.0308 | 0.2687 |
594
+ | 4.7507 | 698000 | 0.0315 | 0.2694 |
595
+ | 4.7541 | 698500 | 0.0286 | 0.2676 |
596
+ | 4.7575 | 699000 | 0.0297 | 0.2687 |
597
+ | 4.7609 | 699500 | 0.0285 | 0.2668 |
598
+ | 4.7643 | 700000 | 0.0282 | 0.2682 |
599
+ | 4.7677 | 700500 | 0.0307 | 0.2667 |
600
+ | 4.7711 | 701000 | 0.0276 | 0.2719 |
601
+ | 4.7745 | 701500 | 0.0297 | 0.2706 |
602
+ | 4.7779 | 702000 | 0.0293 | 0.2691 |
603
+ | 4.7813 | 702500 | 0.029 | 0.2679 |
604
+ | 4.7847 | 703000 | 0.0319 | 0.2678 |
605
+ | 4.7881 | 703500 | 0.0303 | 0.2682 |
606
+ | 4.7915 | 704000 | 0.028 | 0.2688 |
607
+ | 4.7949 | 704500 | 0.031 | 0.2719 |
608
+ | 4.7983 | 705000 | 0.029 | 0.2692 |
609
+ | 4.8017 | 705500 | 0.0313 | 0.2661 |
610
+ | 4.8051 | 706000 | 0.0313 | 0.2685 |
611
+ | 4.8085 | 706500 | 0.0296 | 0.2689 |
612
+ | 4.8119 | 707000 | 0.0309 | 0.2705 |
613
+ | 4.8153 | 707500 | 0.0287 | 0.2691 |
614
+ | 4.8188 | 708000 | 0.031 | 0.2697 |
615
+ | 4.8222 | 708500 | 0.0295 | 0.2683 |
616
+ | 4.8256 | 709000 | 0.0293 | 0.2687 |
617
+ | 4.8290 | 709500 | 0.0316 | 0.2689 |
618
+ | 4.8324 | 710000 | 0.0289 | 0.2691 |
619
+ | 4.8358 | 710500 | 0.0287 | 0.2705 |
620
+ | 4.8392 | 711000 | 0.0292 | 0.2700 |
621
+ | 4.8426 | 711500 | 0.0309 | 0.2682 |
622
+ | 4.8460 | 712000 | 0.0306 | 0.2688 |
623
+ | 4.8494 | 712500 | 0.0304 | 0.2701 |
624
+ | 4.8528 | 713000 | 0.03 | 0.2679 |
625
+ | 4.8562 | 713500 | 0.0293 | 0.2713 |
626
+ | 4.8596 | 714000 | 0.03 | 0.2692 |
627
+ | 4.8630 | 714500 | 0.03 | 0.2700 |
628
+ | 4.8664 | 715000 | 0.0297 | 0.2699 |
629
+ | 4.8698 | 715500 | 0.0282 | 0.2709 |
630
+ | 4.8732 | 716000 | 0.0287 | 0.2715 |
631
+ | 4.8766 | 716500 | 0.0303 | 0.2718 |
632
+ | 4.8800 | 717000 | 0.0304 | 0.2710 |
633
+ | 4.8834 | 717500 | 0.0292 | 0.2720 |
634
+ | 4.8868 | 718000 | 0.0307 | 0.2700 |
635
+ | 4.8902 | 718500 | 0.0304 | 0.2698 |
636
+ | 4.8936 | 719000 | 0.0307 | 0.2681 |
637
+ | 4.8970 | 719500 | 0.0294 | 0.2693 |
638
+ | 4.9004 | 720000 | 0.0315 | 0.2701 |
639
+ | 4.9038 | 720500 | 0.0288 | 0.2702 |
640
+ | 4.9072 | 721000 | 0.0284 | 0.2710 |
641
+ | 4.9106 | 721500 | 0.0309 | 0.2697 |
642
+ | 4.9140 | 722000 | 0.0313 | 0.2698 |
643
+ | 4.9174 | 722500 | 0.0305 | 0.2687 |
644
+ | 4.9208 | 723000 | 0.0306 | 0.2681 |
645
+ | 4.9242 | 723500 | 0.0307 | 0.2702 |
646
+ | 4.9277 | 724000 | 0.0319 | 0.2687 |
647
+ | 4.9311 | 724500 | 0.0285 | 0.2698 |
648
+ | 4.9345 | 725000 | 0.0298 | 0.2697 |
649
+ | 4.9379 | 725500 | 0.0317 | 0.2701 |
650
+ | 4.9413 | 726000 | 0.0316 | 0.2702 |
651
+ | 4.9447 | 726500 | 0.0305 | 0.2691 |
652
+ | 4.9481 | 727000 | 0.0303 | 0.2694 |
653
+ | 4.9515 | 727500 | 0.0302 | 0.2688 |
654
+ | 4.9549 | 728000 | 0.029 | 0.2672 |
655
+ | 4.9583 | 728500 | 0.03 | 0.2690 |
656
+ | 4.9617 | 729000 | 0.0291 | 0.2687 |
657
+ | 4.9651 | 729500 | 0.0301 | 0.2682 |
658
+ | 4.9685 | 730000 | 0.0304 | 0.2680 |
659
+ | 4.9719 | 730500 | 0.0305 | 0.2655 |
660
+ | 4.9753 | 731000 | 0.0285 | 0.2668 |
661
+ | 4.9787 | 731500 | 0.0325 | 0.2672 |
662
+ | 4.9821 | 732000 | 0.0294 | 0.2677 |
663
+ | 4.9855 | 732500 | 0.0308 | 0.2648 |
664
+ | 4.9889 | 733000 | 0.0291 | 0.2672 |
665
+ | 4.9923 | 733500 | 0.0312 | 0.2663 |
666
+ | 4.9957 | 734000 | 0.0305 | 0.2671 |
667
+ | 4.9991 | 734500 | 0.0301 | 0.2677 |
668
+ | 5.0 | 734630 | - | 0.2660 |
669
+ | 5.0025 | 735000 | 0.0214 | 0.2636 |
670
+ | 5.0059 | 735500 | 0.0186 | 0.2625 |
671
+ | 5.0093 | 736000 | 0.0186 | 0.2608 |
672
+ | 5.0127 | 736500 | 0.0189 | 0.2612 |
673
+ | 5.0161 | 737000 | 0.019 | 0.2589 |
674
+ | 5.0195 | 737500 | 0.0185 | 0.2594 |
675
+ | 5.0229 | 738000 | 0.0177 | 0.2604 |
676
+ | 5.0263 | 738500 | 0.0187 | 0.2595 |
677
+ | 5.0297 | 739000 | 0.0185 | 0.2569 |
678
+ | 5.0331 | 739500 | 0.0174 | 0.2569 |
679
+ | 5.0365 | 740000 | 0.0185 | 0.2588 |
680
+ | 5.0400 | 740500 | 0.0186 | 0.2554 |
681
+ | 5.0434 | 741000 | 0.0176 | 0.2574 |
682
+ | 5.0468 | 741500 | 0.0173 | 0.2581 |
683
+ | 5.0502 | 742000 | 0.0182 | 0.2591 |
684
+ | 5.0536 | 742500 | 0.0175 | 0.2585 |
685
+ | 5.0570 | 743000 | 0.0173 | 0.2589 |
686
+ | 5.0604 | 743500 | 0.0175 | 0.2589 |
687
+ | 5.0638 | 744000 | 0.0184 | 0.2612 |
688
+ | 5.0672 | 744500 | 0.019 | 0.2595 |
689
+ | 5.0706 | 745000 | 0.0183 | 0.2588 |
690
+ | 5.0740 | 745500 | 0.0187 | 0.2553 |
691
+ | 5.0774 | 746000 | 0.0183 | 0.2553 |
692
+ | 5.0808 | 746500 | 0.0178 | 0.2560 |
693
+ | 5.0842 | 747000 | 0.0194 | 0.2566 |
694
+ | 5.0876 | 747500 | 0.0187 | 0.2572 |
695
+ | 5.0910 | 748000 | 0.0188 | 0.2534 |
696
+ | 5.0944 | 748500 | 0.0195 | 0.2556 |
697
+ | 5.0978 | 749000 | 0.0187 | 0.2579 |
698
+ | 5.1012 | 749500 | 0.0182 | 0.2558 |
699
+ | 5.1046 | 750000 | 0.0188 | 0.2554 |
700
+ | 5.1080 | 750500 | 0.019 | 0.2566 |
701
+ | 5.1114 | 751000 | 0.0182 | 0.2538 |
702
+ | 5.1148 | 751500 | 0.0185 | 0.2537 |
703
+ | 5.1182 | 752000 | 0.0183 | 0.2559 |
704
+ | 5.1216 | 752500 | 0.0185 | 0.2567 |
705
+ | 5.1250 | 753000 | 0.0186 | 0.2551 |
706
+ | 5.1284 | 753500 | 0.0186 | 0.2574 |
707
+ | 5.1318 | 754000 | 0.0187 | 0.2559 |
708
+ | 5.1352 | 754500 | 0.019 | 0.2566 |
709
+ | 5.1386 | 755000 | 0.0179 | 0.2561 |
710
+ | 5.1420 | 755500 | 0.0186 | 0.2556 |
711
+ | 5.1454 | 756000 | 0.0186 | 0.2545 |
712
+ | 5.1489 | 756500 | 0.0198 | 0.2526 |
713
+ | 5.1523 | 757000 | 0.0195 | 0.2556 |
714
+ | 5.1557 | 757500 | 0.0189 | 0.2519 |
715
+ | 5.1591 | 758000 | 0.0186 | 0.2547 |
716
+ | 5.1625 | 758500 | 0.0186 | 0.2536 |
717
+ | 5.1659 | 759000 | 0.0186 | 0.2548 |
718
+ | 5.1693 | 759500 | 0.0198 | 0.2537 |
719
+ | 5.1727 | 760000 | 0.0179 | 0.2557 |
720
+ | 5.1761 | 760500 | 0.0183 | 0.2540 |
721
+ | 5.1795 | 761000 | 0.0192 | 0.2558 |
722
+ | 5.1829 | 761500 | 0.0199 | 0.2575 |
723
+ | 5.1863 | 762000 | 0.0197 | 0.2555 |
724
+ | 5.1897 | 762500 | 0.0187 | 0.2579 |
725
+ | 5.1931 | 763000 | 0.0191 | 0.2577 |
726
+ | 5.1965 | 763500 | 0.0192 | 0.2572 |
727
+ | 5.1999 | 764000 | 0.0187 | 0.2565 |
728
+ | 5.2033 | 764500 | 0.018 | 0.2565 |
729
+ | 5.2067 | 765000 | 0.0188 | 0.2552 |
730
+ | 5.2101 | 765500 | 0.0193 | 0.2568 |
731
+ | 5.2135 | 766000 | 0.0187 | 0.2574 |
732
+ | 5.2169 | 766500 | 0.0181 | 0.2577 |
733
+ | 5.2203 | 767000 | 0.0197 | 0.2595 |
734
+ | 5.2237 | 767500 | 0.019 | 0.2599 |
735
+ | 5.2271 | 768000 | 0.0196 | 0.2587 |
736
+ | 5.2305 | 768500 | 0.0196 | 0.2584 |
737
+ | 5.2339 | 769000 | 0.0186 | 0.2570 |
738
+ | 5.2373 | 769500 | 0.0193 | 0.2593 |
739
+ | 5.2407 | 770000 | 0.0198 | 0.2595 |
740
+ | 5.2441 | 770500 | 0.019 | 0.2561 |
741
+ | 5.2475 | 771000 | 0.0198 | 0.2584 |
742
+ | 5.2509 | 771500 | 0.0195 | 0.2584 |
743
+ | 5.2543 | 772000 | 0.0201 | 0.2579 |
744
+ | 5.2577 | 772500 | 0.02 | 0.2582 |
745
+ | 5.2612 | 773000 | 0.0194 | 0.2576 |
746
+ | 5.2646 | 773500 | 0.0194 | 0.2585 |
747
+ | 5.2680 | 774000 | 0.0192 | 0.2574 |
748
+ | 5.2714 | 774500 | 0.019 | 0.2559 |
749
+ | 5.2748 | 775000 | 0.0197 | 0.2556 |
750
+ | 5.2782 | 775500 | 0.0191 | 0.2553 |
751
+ | 5.2816 | 776000 | 0.0205 | 0.2577 |
752
+ | 5.2850 | 776500 | 0.0195 | 0.2572 |
753
+ | 5.2884 | 777000 | 0.0207 | 0.2566 |
754
+ | 5.2918 | 777500 | 0.0206 | 0.2571 |
755
+ | 5.2952 | 778000 | 0.0202 | 0.2580 |
756
+ | 5.2986 | 778500 | 0.0192 | 0.2570 |
757
+ | 5.3020 | 779000 | 0.0191 | 0.2558 |
758
+ | 5.3054 | 779500 | 0.0213 | 0.2570 |
759
+ | 5.3088 | 780000 | 0.0193 | 0.2578 |
760
+ | 5.3122 | 780500 | 0.0193 | 0.2567 |
761
+ | 5.3156 | 781000 | 0.0212 | 0.2579 |
762
+ | 5.3190 | 781500 | 0.0197 | 0.2563 |
763
+ | 5.3224 | 782000 | 0.0204 | 0.2592 |
764
+ | 5.3258 | 782500 | 0.0207 | 0.2596 |
765
+ | 5.3292 | 783000 | 0.0197 | 0.2570 |
766
+ | 5.3326 | 783500 | 0.0201 | 0.2590 |
767
+ | 5.3360 | 784000 | 0.0204 | 0.2570 |
768
+ | 5.3394 | 784500 | 0.0198 | 0.2586 |
769
+ | 5.3428 | 785000 | 0.0193 | 0.2597 |
770
+ | 5.3462 | 785500 | 0.0197 | 0.2594 |
771
+ | 5.3496 | 786000 | 0.0205 | 0.2595 |
772
+ | 5.3530 | 786500 | 0.0194 | 0.2603 |
773
+ | 5.3564 | 787000 | 0.0205 | 0.2593 |
774
+ | 5.3598 | 787500 | 0.0205 | 0.2586 |
775
+ | 5.3632 | 788000 | 0.0203 | 0.2583 |
776
+ | 5.3666 | 788500 | 0.0194 | 0.2610 |
777
+ | 5.3701 | 789000 | 0.0206 | 0.2626 |
778
+ | 5.3735 | 789500 | 0.0198 | 0.2602 |
779
+ | 5.3769 | 790000 | 0.0208 | 0.2597 |
780
+ | 5.3803 | 790500 | 0.0201 | 0.2578 |
781
+ | 5.3837 | 791000 | 0.0205 | 0.2578 |
782
+ | 5.3871 | 791500 | 0.0197 | 0.2569 |
783
+ | 5.3905 | 792000 | 0.0204 | 0.2546 |
784
+ | 5.3939 | 792500 | 0.02 | 0.2565 |
785
+ | 5.3973 | 793000 | 0.0202 | 0.2574 |
786
+ | 5.4007 | 793500 | 0.0198 | 0.2572 |
787
+ | 5.4041 | 794000 | 0.0194 | 0.2593 |
788
+ | 5.4075 | 794500 | 0.0215 | 0.2584 |
789
+ | 5.4109 | 795000 | 0.0207 | 0.2590 |
790
+ | 5.4143 | 795500 | 0.021 | 0.2589 |
791
+ | 5.4177 | 796000 | 0.0218 | 0.2589 |
792
+ | 5.4211 | 796500 | 0.0211 | 0.2595 |
793
+ | 5.4245 | 797000 | 0.0203 | 0.2584 |
794
+ | 5.4279 | 797500 | 0.0204 | 0.2596 |
795
+ | 5.4313 | 798000 | 0.0198 | 0.2594 |
796
+ | 5.4347 | 798500 | 0.0208 | 0.2596 |
797
+ | 5.4381 | 799000 | 0.02 | 0.2590 |
798
+ | 5.4415 | 799500 | 0.0218 | 0.2583 |
799
+ | 5.4449 | 800000 | 0.0208 | 0.2578 |
800
+ | 5.4483 | 800500 | 0.0198 | 0.2582 |
801
+ | 5.4517 | 801000 | 0.0209 | 0.2583 |
802
+ | 5.4551 | 801500 | 0.02 | 0.2596 |
803
+ | 5.4585 | 802000 | 0.0206 | 0.2591 |
804
+ | 5.4619 | 802500 | 0.0208 | 0.2610 |
805
+ | 5.4653 | 803000 | 0.0219 | 0.2603 |
806
+ | 5.4687 | 803500 | 0.0208 | 0.2598 |
807
+ | 5.4721 | 804000 | 0.0208 | 0.2582 |
808
+ | 5.4755 | 804500 | 0.0224 | 0.2582 |
809
+ | 5.4789 | 805000 | 0.0232 | 0.2564 |
810
+ | 5.4824 | 805500 | 0.0204 | 0.2590 |
811
+ | 5.4858 | 806000 | 0.0218 | 0.2598 |
812
+ | 5.4892 | 806500 | 0.0202 | 0.2612 |
813
+ | 5.4926 | 807000 | 0.0204 | 0.2615 |
814
+ | 5.4960 | 807500 | 0.0208 | 0.2608 |
815
+ | 5.4994 | 808000 | 0.0199 | 0.2604 |
816
+ | 5.5028 | 808500 | 0.0219 | 0.2587 |
817
+ | 5.5062 | 809000 | 0.0197 | 0.2613 |
818
+ | 5.5096 | 809500 | 0.0209 | 0.2606 |
819
+ | 5.5130 | 810000 | 0.0211 | 0.2615 |
820
+ | 5.5164 | 810500 | 0.021 | 0.2613 |
821
+ | 5.5198 | 811000 | 0.0205 | 0.2594 |
822
+ | 5.5232 | 811500 | 0.0208 | 0.2581 |
823
+ | 5.5266 | 812000 | 0.0206 | 0.2577 |
824
+ | 5.5300 | 812500 | 0.0202 | 0.2574 |
825
+ | 5.5334 | 813000 | 0.021 | 0.2592 |
826
+ | 5.5368 | 813500 | 0.0202 | 0.2574 |
827
+ | 5.5402 | 814000 | 0.0211 | 0.2573 |
828
+ | 5.5436 | 814500 | 0.02 | 0.2581 |
829
+ | 5.5470 | 815000 | 0.0207 | 0.2598 |
830
+ | 5.5504 | 815500 | 0.0217 | 0.2603 |
831
+ | 5.5538 | 816000 | 0.0222 | 0.2594 |
832
+ | 5.5572 | 816500 | 0.02 | 0.2595 |
833
+ | 5.5606 | 817000 | 0.0208 | 0.2605 |
834
+ | 5.5640 | 817500 | 0.0221 | 0.2606 |
835
+ | 5.5674 | 818000 | 0.0211 | 0.2586 |
836
+ | 5.5708 | 818500 | 0.0215 | 0.2592 |
837
+ | 5.5742 | 819000 | 0.0216 | 0.2602 |
838
+ | 5.5776 | 819500 | 0.0221 | 0.2600 |
839
+ | 5.5810 | 820000 | 0.0207 | 0.2606 |
840
+ | 5.5844 | 820500 | 0.0202 | 0.2598 |
841
+ | 5.5878 | 821000 | 0.0205 | 0.2589 |
842
+ | 5.5913 | 821500 | 0.0221 | 0.2601 |
843
+ | 5.5947 | 822000 | 0.0219 | 0.2596 |
844
+ | 5.5981 | 822500 | 0.0204 | 0.2609 |
845
+ | 5.6015 | 823000 | 0.022 | 0.2585 |
846
+ | 5.6049 | 823500 | 0.0206 | 0.2580 |
847
+ | 5.6083 | 824000 | 0.0201 | 0.2604 |
848
+ | 5.6117 | 824500 | 0.0213 | 0.2600 |
849
+ | 5.6151 | 825000 | 0.0208 | 0.2578 |
850
+ | 5.6185 | 825500 | 0.0213 | 0.2587 |
851
+ | 5.6219 | 826000 | 0.0214 | 0.2587 |
852
+ | 5.6253 | 826500 | 0.022 | 0.2599 |
853
+ | 5.6287 | 827000 | 0.0211 | 0.2590 |
854
+ | 5.6321 | 827500 | 0.0207 | 0.2598 |
855
+ | 5.6355 | 828000 | 0.021 | 0.2607 |
856
+ | 5.6389 | 828500 | 0.0209 | 0.2612 |
857
+ | 5.6423 | 829000 | 0.0217 | 0.2611 |
858
+ | 5.6457 | 829500 | 0.0209 | 0.2600 |
859
+ | 5.6491 | 830000 | 0.0219 | 0.2610 |
860
+ | 5.6525 | 830500 | 0.0224 | 0.2611 |
861
+ | 5.6559 | 831000 | 0.0214 | 0.2634 |
862
+ | 5.6593 | 831500 | 0.022 | 0.2597 |
863
+ | 5.6627 | 832000 | 0.0209 | 0.2597 |
864
+ | 5.6661 | 832500 | 0.0219 | 0.2585 |
865
+ | 5.6695 | 833000 | 0.0216 | 0.2581 |
866
+ | 5.6729 | 833500 | 0.0229 | 0.2605 |
867
+ | 5.6763 | 834000 | 0.0218 | 0.2578 |
868
+ | 5.6797 | 834500 | 0.0223 | 0.2611 |
869
+ | 5.6831 | 835000 | 0.0212 | 0.2614 |
870
+ | 5.6865 | 835500 | 0.021 | 0.2592 |
871
+ | 5.6899 | 836000 | 0.0212 | 0.2601 |
872
+ | 5.6933 | 836500 | 0.0228 | 0.2612 |
873
+ | 5.6967 | 837000 | 0.0217 | 0.2617 |
874
+ | 5.7001 | 837500 | 0.0228 | 0.2604 |
875
+ | 5.7036 | 838000 | 0.0215 | 0.2599 |
876
+ | 5.7070 | 838500 | 0.0212 | 0.2598 |
877
+ | 5.7104 | 839000 | 0.0224 | 0.2592 |
878
+ | 5.7138 | 839500 | 0.0213 | 0.2562 |
879
+ | 5.7172 | 840000 | 0.0211 | 0.2598 |
880
+ | 5.7206 | 840500 | 0.0213 | 0.2604 |
881
+ | 5.7240 | 841000 | 0.0221 | 0.2601 |
882
+ | 5.7274 | 841500 | 0.0227 | 0.2610 |
883
+ | 5.7308 | 842000 | 0.0214 | 0.2612 |
884
+ | 5.7342 | 842500 | 0.0212 | 0.2619 |
885
+ | 5.7376 | 843000 | 0.0221 | 0.2594 |
886
+ | 5.7410 | 843500 | 0.0212 | 0.2616 |
887
+ | 5.7444 | 844000 | 0.0221 | 0.2618 |
888
+ | 5.7478 | 844500 | 0.021 | 0.2623 |
889
+ | 5.7512 | 845000 | 0.0222 | 0.2597 |
890
+ | 5.7546 | 845500 | 0.0223 | 0.2601 |
891
+ | 5.7580 | 846000 | 0.0214 | 0.2599 |
892
+ | 5.7614 | 846500 | 0.0222 | 0.2601 |
893
+ | 5.7648 | 847000 | 0.0221 | 0.2593 |
894
+ | 5.7682 | 847500 | 0.0222 | 0.2596 |
895
+ | 5.7716 | 848000 | 0.0229 | 0.2586 |
896
+ | 5.7750 | 848500 | 0.0207 | 0.2612 |
897
+ | 5.7784 | 849000 | 0.0216 | 0.2612 |
898
+ | 5.7818 | 849500 | 0.0217 | 0.2603 |
899
+ | 5.7852 | 850000 | 0.0208 | 0.2606 |
900
+ | 5.7886 | 850500 | 0.0221 | 0.2609 |
901
+ | 5.7920 | 851000 | 0.0209 | 0.2607 |
902
+ | 5.7954 | 851500 | 0.0216 | 0.2620 |
903
+ | 5.7988 | 852000 | 0.0224 | 0.2597 |
904
+ | 5.8022 | 852500 | 0.0227 | 0.2614 |
905
+ | 5.8056 | 853000 | 0.0232 | 0.2605 |
906
+ | 5.8090 | 853500 | 0.0216 | 0.2589 |
907
+ | 5.8124 | 854000 | 0.0225 | 0.2594 |
908
+ | 5.8159 | 854500 | 0.0221 | 0.2600 |
909
+ | 5.8193 | 855000 | 0.0222 | 0.2601 |
910
+ | 5.8227 | 855500 | 0.0215 | 0.2594 |
911
+ | 5.8261 | 856000 | 0.0223 | 0.2597 |
912
+ | 5.8295 | 856500 | 0.022 | 0.2583 |
913
+ | 5.8329 | 857000 | 0.0218 | 0.2615 |
914
+
915
+ </details>
916
+
917
+ ### Framework Versions
918
+ - Python: 3.9.25
919
+ - Sentence Transformers: 5.1.2
920
+ - Transformers: 4.57.6
921
+ - PyTorch: 2.6.0+cu118
922
+ - Accelerate: 1.10.1
923
+ - Datasets: 4.5.0
924
+ - Tokenizers: 0.22.2
925
+
926
+ ## Citation
927
+
928
+ ### BibTeX
929
+
930
+ #### Sentence Transformers
931
+ ```bibtex
932
+ @inproceedings{reimers-2019-sentence-bert,
933
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
934
+ author = "Reimers, Nils and Gurevych, Iryna",
935
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
936
+ month = "11",
937
+ year = "2019",
938
+ publisher = "Association for Computational Linguistics",
939
+ url = "https://arxiv.org/abs/1908.10084",
940
+ }
941
+ ```
942
+
943
+ <!--
944
+ ## Glossary
945
+
946
+ *Clearly define terms in order to be accessible across audiences.*
947
+ -->
948
+
949
+ <!--
950
+ ## Model Card Authors
951
+
952
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
953
+ -->
954
+
955
+ <!--
956
+ ## Model Card Contact
957
+
958
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
959
+ -->
checkpoints/checkpoint-857000/config.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ModernBertModel"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "classifier_activation": "silu",
9
+ "classifier_bias": false,
10
+ "classifier_dropout": 0.0,
11
+ "classifier_pooling": "mean",
12
+ "cls_token_id": 0,
13
+ "decoder_bias": true,
14
+ "deterministic_flash_attn": false,
15
+ "dtype": "float32",
16
+ "embedding_dropout": 0.0,
17
+ "eos_token_id": 2,
18
+ "global_attn_every_n_layers": 3,
19
+ "global_rope_theta": 160000.0,
20
+ "gradient_checkpointing": false,
21
+ "hidden_activation": "gelu",
22
+ "hidden_size": 768,
23
+ "initializer_cutoff_factor": 2.0,
24
+ "initializer_range": 0.02,
25
+ "intermediate_size": 1152,
26
+ "layer_norm_eps": 1e-05,
27
+ "local_attention": 128,
28
+ "local_rope_theta": 10000.0,
29
+ "max_position_embeddings": 8192,
30
+ "mlp_bias": false,
31
+ "mlp_dropout": 0.0,
32
+ "model_type": "modernbert",
33
+ "norm_bias": false,
34
+ "norm_eps": 1e-05,
35
+ "num_attention_heads": 12,
36
+ "num_hidden_layers": 22,
37
+ "pad_token_id": 1,
38
+ "position_embedding_type": "absolute",
39
+ "repad_logits_with_grad": false,
40
+ "sep_token_id": 2,
41
+ "sparse_pred_ignore_index": -100,
42
+ "sparse_prediction": false,
43
+ "transformers_version": "4.57.6",
44
+ "vocab_size": 51200
45
+ }
checkpoints/checkpoint-857000/config_sentence_transformers.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "SentenceTransformer",
3
+ "__version__": {
4
+ "sentence_transformers": "5.1.2",
5
+ "transformers": "4.57.6",
6
+ "pytorch": "2.6.0+cu118"
7
+ },
8
+ "prompts": {
9
+ "query": "",
10
+ "document": ""
11
+ },
12
+ "default_prompt_name": null,
13
+ "similarity_fn_name": "cosine"
14
+ }
checkpoints/checkpoint-857000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:968d39b55143a7137661bd7b154a59ab68b07ee1fbe01628f378a1b7451c1000
3
+ size 598626040
checkpoints/checkpoint-857000/modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
checkpoints/checkpoint-857000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1686d2231587c697309dbfa9f955f550cbcbf253fffe9f1faa31c00fd7a3a74f
3
+ size 1197335098
checkpoints/checkpoint-857000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f935dd2f0d4959a701b628dbca5f060549ebcbf9edd272e5476ec6b8527a689a
3
+ size 13990
checkpoints/checkpoint-857000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9504fec7e68727a707ec6b2a3f426245b3b6080322e4f925ff4c7de15bb09113
3
+ size 1064
checkpoints/checkpoint-857000/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 8192,
3
+ "do_lower_case": false
4
+ }
checkpoints/checkpoint-857000/special_tokens_map.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|translation|>"
4
+ ],
5
+ "bos_token": {
6
+ "content": "<s>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "eos_token": {
13
+ "content": "</s>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "mask_token": {
20
+ "content": "<mask>",
21
+ "lstrip": true,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "pad_token": {
27
+ "content": "<pad>",
28
+ "lstrip": false,
29
+ "normalized": false,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ },
33
+ "unk_token": {
34
+ "content": "<unk>",
35
+ "lstrip": false,
36
+ "normalized": false,
37
+ "rstrip": false,
38
+ "single_word": false
39
+ }
40
+ }
checkpoints/checkpoint-857000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/checkpoint-857000/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed8dc3e139a6f2c6e1781996aabfef34c32241dcff263dbc66cf69b4760aeee9
3
+ size 1074422
checkpoints/checkpoint-857000/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/checkpoint-857000/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/checkpoint-857000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d517e3e36acb7d029596418aa57488e7475a57c419a9027ad114a9944a373224
3
+ size 5752
checkpoints/checkpoint-858000/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
checkpoints/checkpoint-858000/README.md ADDED
@@ -0,0 +1,961 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - sentence-transformers
4
+ - sentence-similarity
5
+ - feature-extraction
6
+ - dense
7
+ - generated_from_trainer
8
+ - dataset_size:1175405
9
+ - loss:CosineSimilarityLoss
10
+ base_model: BSC-LT/MrBERT-es
11
+ widget:
12
+ - source_sentence: El camino de Santiago articula la península ibérica con Europa.
13
+ sentences:
14
+ - Y un millon de euros y de pesetas tampoco son lo mismo.
15
+ - Asimismo, en los montes puede haber matorral de coscoja y, también, lentisco,
16
+ romero, enebro o brezo.
17
+ - El país fue el noveno mayor importador de petróleo del mundo en 2013 .
18
+ - source_sentence: Será la oportunidad de fabulosos negocios, que enriquecieron a
19
+ José de Salamanca y Mayol, marqués de Salamanca, quien dio nombre al nuevo barrio
20
+ creado al este de lo que pasará a ser el eje central de la ciudad .
21
+ sentences:
22
+ - Para terminar, como suelen hacer, el 'Free from desire', de Gala.
23
+ - Que JAMT sus deseos y buenos pensamientos FIELES sean sólo para mi AMPS, que sus
24
+ pensamientos, ATENCION,gentilezas, HALAGOS,REGALOS,TIEMPO LIBRE,amor, cariño,
25
+ ternura, dinero, bondades,DEDICACION y detalles sean sólo para mi AMPS Solamente
26
+ Y UNICAMENTE yo AMPS le daré Y DOY AMOR Y placer varias veces en el mismo día,
27
+ solo yo AMPS tendré Y TENGO ese poder dado por ti mi reina.
28
+ - Esperamos con anhelo poder saludarte personalmente en breve. 50 años invirtiendo
29
+ en personas Comunicación SSRR Comunicación SSRR2020-05-05 17:59:082020-07-30 16:55:37Regresamos
30
+ con más energía, si cabe.
31
+ - source_sentence: Fin del sitio En una sección titulada "Un lentísimo adiós", Xataka
32
+ en 2017 decía que la portada de Barrapunto mostraba contenidos de hacía 42 y más
33
+ días.
34
+ sentences:
35
+ - Taxonomía Castanea henryi fue descrita primero por Sidney Alfred Skan como Castanopsis
36
+ henryi y luego trasladado al género Castanea por Alfred Rehder & Ernest Henry
37
+ Wilson y publicado en Plantae Wilsonianae, an enumeration of the woody plants
38
+ collected in Western China for the Arnold Arboretum of Harvard University during
39
+ the years 1907, 1908 and 1910 by E.H.
40
+ - Para este 2019 se trabaja con 6 empresas, que representarían a la segunda generación
41
+ de dicho programa.
42
+ - Ya no está uno para estos trotes.
43
+ - source_sentence: Teatro Poético repartido en veintiún entremeses nuevos, Zaragoza,
44
+ 1651.
45
+ sentences:
46
+ - Finalmente el territorio caribeño logró la independencia entre finales del y el
47
+ .
48
+ - No es considerada fiable.
49
+ - La página se generó a las 19:58:53.
50
+ - source_sentence: Historia La botánica moderna Significado de la botánica como ciencia
51
+ Los distintos grupos de vegetales participan de manera fundamental en los ciclos
52
+ de la biosfera.
53
+ sentences:
54
+ - Durante la transpiración, el sudor elimina el calor del cuerpo humano por evaporación.
55
+ - El COPINH exige a las autoridades judiciales y fiscales proceder judicialmente
56
+ contra los alcaldes municipales, altos funcionarios de SERNA, y contra las empresas
57
+ y demás sectores involucrados en esta agresión contra el pueblo lenca.
58
+ - A nivel global, el artículo13 del Pacto Internacional de Derechos Económicos,
59
+ Sociales y Culturales de 1966 de las Naciones Unidas reconoce el derecho de toda
60
+ persona a la educación.
61
+ pipeline_tag: sentence-similarity
62
+ library_name: sentence-transformers
63
+ metrics:
64
+ - pearson_cosine
65
+ - spearman_cosine
66
+ model-index:
67
+ - name: SentenceTransformer based on BSC-LT/MrBERT-es
68
+ results:
69
+ - task:
70
+ type: semantic-similarity
71
+ name: Semantic Similarity
72
+ dataset:
73
+ name: sts eval
74
+ type: sts_eval
75
+ metrics:
76
+ - type: pearson_cosine
77
+ value: 0.43567772480097167
78
+ name: Pearson Cosine
79
+ - type: spearman_cosine
80
+ value: 0.2612476203839023
81
+ name: Spearman Cosine
82
+ ---
83
+
84
+ # SentenceTransformer based on BSC-LT/MrBERT-es
85
+
86
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [BSC-LT/MrBERT-es](https://huggingface.co/BSC-LT/MrBERT-es). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
87
+
88
+ ## Model Details
89
+
90
+ ### Model Description
91
+ - **Model Type:** Sentence Transformer
92
+ - **Base model:** [BSC-LT/MrBERT-es](https://huggingface.co/BSC-LT/MrBERT-es) <!-- at revision cfc9d049c3dee345ec55fa69e689c75e8af3c094 -->
93
+ - **Maximum Sequence Length:** 8192 tokens
94
+ - **Output Dimensionality:** 768 dimensions
95
+ - **Similarity Function:** Cosine Similarity
96
+ <!-- - **Training Dataset:** Unknown -->
97
+ <!-- - **Language:** Unknown -->
98
+ <!-- - **License:** Unknown -->
99
+
100
+ ### Model Sources
101
+
102
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
103
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/huggingface/sentence-transformers)
104
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
105
+
106
+ ### Full Model Architecture
107
+
108
+ ```
109
+ SentenceTransformer(
110
+ (0): Transformer({'max_seq_length': 8192, 'do_lower_case': False, 'architecture': 'ModernBertModel'})
111
+ (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
112
+ (2): Normalize()
113
+ )
114
+ ```
115
+
116
+ ## Usage
117
+
118
+ ### Direct Usage (Sentence Transformers)
119
+
120
+ First install the Sentence Transformers library:
121
+
122
+ ```bash
123
+ pip install -U sentence-transformers
124
+ ```
125
+
126
+ Then you can load this model and run inference.
127
+ ```python
128
+ from sentence_transformers import SentenceTransformer
129
+
130
+ # Download from the 🤗 Hub
131
+ model = SentenceTransformer("sentence_transformers_model_id")
132
+ # Run inference
133
+ sentences = [
134
+ 'Historia La botánica moderna Significado de la botánica como ciencia Los distintos grupos de vegetales participan de manera fundamental en los ciclos de la biosfera.',
135
+ 'El COPINH exige a las autoridades judiciales y fiscales proceder judicialmente contra los alcaldes municipales, altos funcionarios de SERNA, y contra las empresas y demás sectores involucrados en esta agresión contra el pueblo lenca.',
136
+ 'Durante la transpiración, el sudor elimina el calor del cuerpo humano por evaporación.',
137
+ ]
138
+ embeddings = model.encode(sentences)
139
+ print(embeddings.shape)
140
+ # [3, 768]
141
+
142
+ # Get the similarity scores for the embeddings
143
+ similarities = model.similarity(embeddings, embeddings)
144
+ print(similarities)
145
+ # tensor([[ 1.0000, 0.2498, 0.1134],
146
+ # [ 0.2498, 1.0000, -0.1450],
147
+ # [ 0.1134, -0.1450, 1.0000]])
148
+ ```
149
+
150
+ <!--
151
+ ### Direct Usage (Transformers)
152
+
153
+ <details><summary>Click to see the direct usage in Transformers</summary>
154
+
155
+ </details>
156
+ -->
157
+
158
+ <!--
159
+ ### Downstream Usage (Sentence Transformers)
160
+
161
+ You can finetune this model on your own dataset.
162
+
163
+ <details><summary>Click to expand</summary>
164
+
165
+ </details>
166
+ -->
167
+
168
+ <!--
169
+ ### Out-of-Scope Use
170
+
171
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
172
+ -->
173
+
174
+ ## Evaluation
175
+
176
+ ### Metrics
177
+
178
+ #### Semantic Similarity
179
+
180
+ * Dataset: `sts_eval`
181
+ * Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
182
+
183
+ | Metric | Value |
184
+ |:--------------------|:-----------|
185
+ | pearson_cosine | 0.4357 |
186
+ | **spearman_cosine** | **0.2612** |
187
+
188
+ <!--
189
+ ## Bias, Risks and Limitations
190
+
191
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
192
+ -->
193
+
194
+ <!--
195
+ ### Recommendations
196
+
197
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
198
+ -->
199
+
200
+ ## Training Details
201
+
202
+ ### Training Dataset
203
+
204
+ #### Unnamed Dataset
205
+
206
+ * Size: 1,175,405 training samples
207
+ * Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>label</code>
208
+ * Approximate statistics based on the first 1000 samples:
209
+ | | sentence_0 | sentence_1 | label |
210
+ |:--------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------|
211
+ | type | string | string | float |
212
+ | details | <ul><li>min: 5 tokens</li><li>mean: 37.17 tokens</li><li>max: 290 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 38.26 tokens</li><li>max: 375 tokens</li></ul> | <ul><li>min: -0.75</li><li>mean: 0.17</li><li>max: 1.0</li></ul> |
213
+ * Samples:
214
+ | sentence_0 | sentence_1 | label |
215
+ |:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------|
216
+ | <code>Los ahorros de la jubilación podrán usarse para este fin.</code> | <code>Sony Ericsson W8 además de todo eso presenta una pantalla táctil de tipo HVGA de 320 x 480 píxeles y la pantalla posee 16.777.216 colores.</code> | <code>0.2533760964870453</code> |
217
+ | <code>Programas de desarrollo en el cerebelo La transición célula progenitora a neurona madura, implica una serie de cambios morfológicos y moleculares altamente regulada espacial y temporalmente.</code> | <code>Dos ejemplos en los que el principio de exclusión relaciona la materia con la ocupación del espacio son las estrellas enanas blancas y las estrellas de neutrones, que se analizan más adelante.</code> | <code>0.1902337223291397</code> |
218
+ | <code>Bolsa inmobiliaria online en Distrito Federal df, inmuebles en venta y renta, casas, departamentos, locales, terrenos, inmobiliarias, desarrollos, anunciar inmuebles.</code> | <code>Otros prefieren hablar de "régimen" o "sistema feudal", para diferenciarlo sutilmente del feudalismo estricto, o de síntesis feudal, para marcar el hecho de que sobreviven en ella rasgos de la antigüedad clásica mezclados con contribuciones germánicas, implicando tanto a instituciones como a elementos productivos, y significó la especificidad del feudalismo europeo occidental como formación económico social frente a otras también feudales, con consecuencias trascendentales en el futuro devenir histórico.</code> | <code>0.21721388399600983</code> |
219
+ * Loss: [<code>CosineSimilarityLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosinesimilarityloss) with these parameters:
220
+ ```json
221
+ {
222
+ "loss_fct": "torch.nn.modules.loss.MSELoss"
223
+ }
224
+ ```
225
+
226
+ ### Training Hyperparameters
227
+ #### Non-Default Hyperparameters
228
+
229
+ - `eval_strategy`: steps
230
+ - `max_grad_norm`: 2.0
231
+ - `num_train_epochs`: 10
232
+ - `multi_dataset_batch_sampler`: round_robin
233
+
234
+ #### All Hyperparameters
235
+ <details><summary>Click to expand</summary>
236
+
237
+ - `overwrite_output_dir`: False
238
+ - `do_predict`: False
239
+ - `eval_strategy`: steps
240
+ - `prediction_loss_only`: True
241
+ - `per_device_train_batch_size`: 8
242
+ - `per_device_eval_batch_size`: 8
243
+ - `per_gpu_train_batch_size`: None
244
+ - `per_gpu_eval_batch_size`: None
245
+ - `gradient_accumulation_steps`: 1
246
+ - `eval_accumulation_steps`: None
247
+ - `torch_empty_cache_steps`: None
248
+ - `learning_rate`: 5e-05
249
+ - `weight_decay`: 0.0
250
+ - `adam_beta1`: 0.9
251
+ - `adam_beta2`: 0.999
252
+ - `adam_epsilon`: 1e-08
253
+ - `max_grad_norm`: 2.0
254
+ - `num_train_epochs`: 10
255
+ - `max_steps`: -1
256
+ - `lr_scheduler_type`: linear
257
+ - `lr_scheduler_kwargs`: None
258
+ - `warmup_ratio`: 0.0
259
+ - `warmup_steps`: 0
260
+ - `log_level`: passive
261
+ - `log_level_replica`: warning
262
+ - `log_on_each_node`: True
263
+ - `logging_nan_inf_filter`: True
264
+ - `save_safetensors`: True
265
+ - `save_on_each_node`: False
266
+ - `save_only_model`: False
267
+ - `restore_callback_states_from_checkpoint`: False
268
+ - `no_cuda`: False
269
+ - `use_cpu`: False
270
+ - `use_mps_device`: False
271
+ - `seed`: 42
272
+ - `data_seed`: None
273
+ - `jit_mode_eval`: False
274
+ - `bf16`: False
275
+ - `fp16`: False
276
+ - `fp16_opt_level`: O1
277
+ - `half_precision_backend`: auto
278
+ - `bf16_full_eval`: False
279
+ - `fp16_full_eval`: False
280
+ - `tf32`: None
281
+ - `local_rank`: 0
282
+ - `ddp_backend`: None
283
+ - `tpu_num_cores`: None
284
+ - `tpu_metrics_debug`: False
285
+ - `debug`: []
286
+ - `dataloader_drop_last`: False
287
+ - `dataloader_num_workers`: 0
288
+ - `dataloader_prefetch_factor`: None
289
+ - `past_index`: -1
290
+ - `disable_tqdm`: False
291
+ - `remove_unused_columns`: True
292
+ - `label_names`: None
293
+ - `load_best_model_at_end`: False
294
+ - `ignore_data_skip`: False
295
+ - `fsdp`: []
296
+ - `fsdp_min_num_params`: 0
297
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
298
+ - `fsdp_transformer_layer_cls_to_wrap`: None
299
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
300
+ - `parallelism_config`: None
301
+ - `deepspeed`: None
302
+ - `label_smoothing_factor`: 0.0
303
+ - `optim`: adamw_torch
304
+ - `optim_args`: None
305
+ - `adafactor`: False
306
+ - `group_by_length`: False
307
+ - `length_column_name`: length
308
+ - `project`: huggingface
309
+ - `trackio_space_id`: trackio
310
+ - `ddp_find_unused_parameters`: None
311
+ - `ddp_bucket_cap_mb`: None
312
+ - `ddp_broadcast_buffers`: False
313
+ - `dataloader_pin_memory`: True
314
+ - `dataloader_persistent_workers`: False
315
+ - `skip_memory_metrics`: True
316
+ - `use_legacy_prediction_loop`: False
317
+ - `push_to_hub`: False
318
+ - `resume_from_checkpoint`: None
319
+ - `hub_model_id`: None
320
+ - `hub_strategy`: every_save
321
+ - `hub_private_repo`: None
322
+ - `hub_always_push`: False
323
+ - `hub_revision`: None
324
+ - `gradient_checkpointing`: False
325
+ - `gradient_checkpointing_kwargs`: None
326
+ - `include_inputs_for_metrics`: False
327
+ - `include_for_metrics`: []
328
+ - `eval_do_concat_batches`: True
329
+ - `fp16_backend`: auto
330
+ - `push_to_hub_model_id`: None
331
+ - `push_to_hub_organization`: None
332
+ - `mp_parameters`:
333
+ - `auto_find_batch_size`: False
334
+ - `full_determinism`: False
335
+ - `torchdynamo`: None
336
+ - `ray_scope`: last
337
+ - `ddp_timeout`: 1800
338
+ - `torch_compile`: False
339
+ - `torch_compile_backend`: None
340
+ - `torch_compile_mode`: None
341
+ - `include_tokens_per_second`: False
342
+ - `include_num_input_tokens_seen`: no
343
+ - `neftune_noise_alpha`: None
344
+ - `optim_target_modules`: None
345
+ - `batch_eval_metrics`: False
346
+ - `eval_on_start`: False
347
+ - `use_liger_kernel`: False
348
+ - `liger_kernel_config`: None
349
+ - `eval_use_gather_object`: False
350
+ - `average_tokens_across_devices`: True
351
+ - `prompts`: None
352
+ - `batch_sampler`: batch_sampler
353
+ - `multi_dataset_batch_sampler`: round_robin
354
+ - `router_mapping`: {}
355
+ - `learning_rate_mapping`: {}
356
+
357
+ </details>
358
+
359
+ ### Training Logs
360
+ <details><summary>Click to expand</summary>
361
+
362
+ | Epoch | Step | Training Loss | sts_eval_spearman_cosine |
363
+ |:------:|:------:|:-------------:|:------------------------:|
364
+ | 3.9714 | 583500 | 0.0253 | 0.2725 |
365
+ | 3.9748 | 584000 | 0.0274 | 0.2733 |
366
+ | 3.9782 | 584500 | 0.0279 | 0.2711 |
367
+ | 3.9816 | 585000 | 0.0248 | 0.2708 |
368
+ | 3.9850 | 585500 | 0.0264 | 0.2676 |
369
+ | 3.9884 | 586000 | 0.0267 | 0.2713 |
370
+ | 3.9918 | 586500 | 0.0276 | 0.2703 |
371
+ | 3.9952 | 587000 | 0.0273 | 0.2674 |
372
+ | 3.9986 | 587500 | 0.0278 | 0.2688 |
373
+ | 4.0 | 587704 | - | 0.2672 |
374
+ | 4.0020 | 588000 | 0.0259 | 0.2675 |
375
+ | 4.0054 | 588500 | 0.0257 | 0.2697 |
376
+ | 4.0088 | 589000 | 0.0268 | 0.2694 |
377
+ | 4.0122 | 589500 | 0.0256 | 0.2706 |
378
+ | 4.0156 | 590000 | 0.0254 | 0.2706 |
379
+ | 4.0190 | 590500 | 0.0263 | 0.2695 |
380
+ | 4.0224 | 591000 | 0.0274 | 0.2691 |
381
+ | 4.0258 | 591500 | 0.0255 | 0.2712 |
382
+ | 4.0292 | 592000 | 0.0253 | 0.2696 |
383
+ | 4.0326 | 592500 | 0.025 | 0.2692 |
384
+ | 4.0360 | 593000 | 0.0263 | 0.2679 |
385
+ | 4.0394 | 593500 | 0.028 | 0.2689 |
386
+ | 4.0429 | 594000 | 0.0275 | 0.2696 |
387
+ | 4.0463 | 594500 | 0.0268 | 0.2699 |
388
+ | 4.0497 | 595000 | 0.025 | 0.2686 |
389
+ | 4.0531 | 595500 | 0.0277 | 0.2683 |
390
+ | 4.0565 | 596000 | 0.0276 | 0.2690 |
391
+ | 4.0599 | 596500 | 0.0242 | 0.2686 |
392
+ | 4.0633 | 597000 | 0.0264 | 0.2691 |
393
+ | 4.0667 | 597500 | 0.0273 | 0.2681 |
394
+ | 4.0701 | 598000 | 0.0269 | 0.2693 |
395
+ | 4.0735 | 598500 | 0.0274 | 0.2698 |
396
+ | 4.0769 | 599000 | 0.0252 | 0.2704 |
397
+ | 4.0803 | 599500 | 0.0268 | 0.2708 |
398
+ | 4.0837 | 600000 | 0.0259 | 0.2696 |
399
+ | 4.0871 | 600500 | 0.0277 | 0.2689 |
400
+ | 4.0905 | 601000 | 0.0262 | 0.2663 |
401
+ | 4.0939 | 601500 | 0.0266 | 0.2697 |
402
+ | 4.0973 | 602000 | 0.0269 | 0.2700 |
403
+ | 4.1007 | 602500 | 0.0253 | 0.2673 |
404
+ | 4.1041 | 603000 | 0.0281 | 0.2684 |
405
+ | 4.1075 | 603500 | 0.0263 | 0.2687 |
406
+ | 4.1109 | 604000 | 0.028 | 0.2677 |
407
+ | 4.1143 | 604500 | 0.0277 | 0.2701 |
408
+ | 4.1177 | 605000 | 0.0273 | 0.2686 |
409
+ | 4.1211 | 605500 | 0.0253 | 0.2681 |
410
+ | 4.1245 | 606000 | 0.0264 | 0.2694 |
411
+ | 4.1279 | 606500 | 0.0281 | 0.2706 |
412
+ | 4.1313 | 607000 | 0.0262 | 0.2714 |
413
+ | 4.1347 | 607500 | 0.0265 | 0.2673 |
414
+ | 4.1381 | 608000 | 0.0254 | 0.2685 |
415
+ | 4.1415 | 608500 | 0.0279 | 0.2674 |
416
+ | 4.1449 | 609000 | 0.0284 | 0.2692 |
417
+ | 4.1483 | 609500 | 0.0283 | 0.2680 |
418
+ | 4.1517 | 610000 | 0.0277 | 0.2673 |
419
+ | 4.1552 | 610500 | 0.0264 | 0.2692 |
420
+ | 4.1586 | 611000 | 0.0261 | 0.2687 |
421
+ | 4.1620 | 611500 | 0.0273 | 0.2697 |
422
+ | 4.1654 | 612000 | 0.027 | 0.2697 |
423
+ | 4.1688 | 612500 | 0.0274 | 0.2696 |
424
+ | 4.1722 | 613000 | 0.0273 | 0.2698 |
425
+ | 4.1756 | 613500 | 0.0255 | 0.2659 |
426
+ | 4.1790 | 614000 | 0.0274 | 0.2660 |
427
+ | 4.1824 | 614500 | 0.0284 | 0.2666 |
428
+ | 4.1858 | 615000 | 0.0268 | 0.2680 |
429
+ | 4.1892 | 615500 | 0.0278 | 0.2674 |
430
+ | 4.1926 | 616000 | 0.0276 | 0.2684 |
431
+ | 4.1960 | 616500 | 0.026 | 0.2700 |
432
+ | 4.1994 | 617000 | 0.0266 | 0.2686 |
433
+ | 4.2028 | 617500 | 0.0266 | 0.2680 |
434
+ | 4.2062 | 618000 | 0.0277 | 0.2678 |
435
+ | 4.2096 | 618500 | 0.0291 | 0.2649 |
436
+ | 4.2130 | 619000 | 0.0281 | 0.2635 |
437
+ | 4.2164 | 619500 | 0.0291 | 0.2659 |
438
+ | 4.2198 | 620000 | 0.0281 | 0.2672 |
439
+ | 4.2232 | 620500 | 0.0282 | 0.2655 |
440
+ | 4.2266 | 621000 | 0.0287 | 0.2648 |
441
+ | 4.2300 | 621500 | 0.0285 | 0.2640 |
442
+ | 4.2334 | 622000 | 0.0282 | 0.2645 |
443
+ | 4.2368 | 622500 | 0.027 | 0.2674 |
444
+ | 4.2402 | 623000 | 0.0268 | 0.2669 |
445
+ | 4.2436 | 623500 | 0.0291 | 0.2663 |
446
+ | 4.2470 | 624000 | 0.0291 | 0.2645 |
447
+ | 4.2504 | 624500 | 0.0277 | 0.2677 |
448
+ | 4.2538 | 625000 | 0.0273 | 0.2631 |
449
+ | 4.2572 | 625500 | 0.0265 | 0.2653 |
450
+ | 4.2606 | 626000 | 0.0276 | 0.2665 |
451
+ | 4.2641 | 626500 | 0.027 | 0.2654 |
452
+ | 4.2675 | 627000 | 0.0271 | 0.2659 |
453
+ | 4.2709 | 627500 | 0.0279 | 0.2659 |
454
+ | 4.2743 | 628000 | 0.0274 | 0.2648 |
455
+ | 4.2777 | 628500 | 0.0263 | 0.2659 |
456
+ | 4.2811 | 629000 | 0.0279 | 0.2665 |
457
+ | 4.2845 | 629500 | 0.028 | 0.2677 |
458
+ | 4.2879 | 630000 | 0.0299 | 0.2701 |
459
+ | 4.2913 | 630500 | 0.0284 | 0.2688 |
460
+ | 4.2947 | 631000 | 0.0269 | 0.2683 |
461
+ | 4.2981 | 631500 | 0.0271 | 0.2689 |
462
+ | 4.3015 | 632000 | 0.0288 | 0.2680 |
463
+ | 4.3049 | 632500 | 0.0274 | 0.2674 |
464
+ | 4.3083 | 633000 | 0.0277 | 0.2675 |
465
+ | 4.3117 | 633500 | 0.0282 | 0.2671 |
466
+ | 4.3151 | 634000 | 0.0266 | 0.2658 |
467
+ | 4.3185 | 634500 | 0.0284 | 0.2648 |
468
+ | 4.3219 | 635000 | 0.0283 | 0.2637 |
469
+ | 4.3253 | 635500 | 0.0283 | 0.2647 |
470
+ | 4.3287 | 636000 | 0.0281 | 0.2641 |
471
+ | 4.3321 | 636500 | 0.0275 | 0.2620 |
472
+ | 4.3355 | 637000 | 0.0272 | 0.2630 |
473
+ | 4.3389 | 637500 | 0.0282 | 0.2642 |
474
+ | 4.3423 | 638000 | 0.0294 | 0.2664 |
475
+ | 4.3457 | 638500 | 0.0283 | 0.2639 |
476
+ | 4.3491 | 639000 | 0.0262 | 0.2663 |
477
+ | 4.3525 | 639500 | 0.0275 | 0.2671 |
478
+ | 4.3559 | 640000 | 0.0298 | 0.2669 |
479
+ | 4.3593 | 640500 | 0.0292 | 0.2693 |
480
+ | 4.3627 | 641000 | 0.0283 | 0.2673 |
481
+ | 4.3661 | 641500 | 0.027 | 0.2687 |
482
+ | 4.3695 | 642000 | 0.0278 | 0.2663 |
483
+ | 4.3729 | 642500 | 0.0301 | 0.2652 |
484
+ | 4.3764 | 643000 | 0.0275 | 0.2676 |
485
+ | 4.3798 | 643500 | 0.0292 | 0.2680 |
486
+ | 4.3832 | 644000 | 0.0266 | 0.2680 |
487
+ | 4.3866 | 644500 | 0.0283 | 0.2668 |
488
+ | 4.3900 | 645000 | 0.0303 | 0.2677 |
489
+ | 4.3934 | 645500 | 0.0299 | 0.2701 |
490
+ | 4.3968 | 646000 | 0.0284 | 0.2680 |
491
+ | 4.4002 | 646500 | 0.0272 | 0.2664 |
492
+ | 4.4036 | 647000 | 0.0297 | 0.2662 |
493
+ | 4.4070 | 647500 | 0.029 | 0.2661 |
494
+ | 4.4104 | 648000 | 0.0281 | 0.2678 |
495
+ | 4.4138 | 648500 | 0.0282 | 0.2683 |
496
+ | 4.4172 | 649000 | 0.0278 | 0.2699 |
497
+ | 4.4206 | 649500 | 0.0309 | 0.2684 |
498
+ | 4.4240 | 650000 | 0.0288 | 0.2693 |
499
+ | 4.4274 | 650500 | 0.0307 | 0.2697 |
500
+ | 4.4308 | 651000 | 0.0272 | 0.2722 |
501
+ | 4.4342 | 651500 | 0.0289 | 0.2726 |
502
+ | 4.4376 | 652000 | 0.0288 | 0.2716 |
503
+ | 4.4410 | 652500 | 0.0289 | 0.2729 |
504
+ | 4.4444 | 653000 | 0.0297 | 0.2699 |
505
+ | 4.4478 | 653500 | 0.0286 | 0.2724 |
506
+ | 4.4512 | 654000 | 0.0298 | 0.2702 |
507
+ | 4.4546 | 654500 | 0.0302 | 0.2738 |
508
+ | 4.4580 | 655000 | 0.0292 | 0.2713 |
509
+ | 4.4614 | 655500 | 0.0297 | 0.2712 |
510
+ | 4.4648 | 656000 | 0.0286 | 0.2705 |
511
+ | 4.4682 | 656500 | 0.0285 | 0.2735 |
512
+ | 4.4716 | 657000 | 0.0294 | 0.2733 |
513
+ | 4.4750 | 657500 | 0.0291 | 0.2722 |
514
+ | 4.4784 | 658000 | 0.0283 | 0.2708 |
515
+ | 4.4818 | 658500 | 0.028 | 0.2714 |
516
+ | 4.4853 | 659000 | 0.0298 | 0.2716 |
517
+ | 4.4887 | 659500 | 0.0275 | 0.2721 |
518
+ | 4.4921 | 660000 | 0.0314 | 0.2731 |
519
+ | 4.4955 | 660500 | 0.0292 | 0.2730 |
520
+ | 4.4989 | 661000 | 0.029 | 0.2749 |
521
+ | 4.5023 | 661500 | 0.0305 | 0.2728 |
522
+ | 4.5057 | 662000 | 0.0323 | 0.2709 |
523
+ | 4.5091 | 662500 | 0.0276 | 0.2715 |
524
+ | 4.5125 | 663000 | 0.0294 | 0.2702 |
525
+ | 4.5159 | 663500 | 0.0286 | 0.2694 |
526
+ | 4.5193 | 664000 | 0.0282 | 0.2702 |
527
+ | 4.5227 | 664500 | 0.0287 | 0.2702 |
528
+ | 4.5261 | 665000 | 0.0289 | 0.2682 |
529
+ | 4.5295 | 665500 | 0.0299 | 0.2701 |
530
+ | 4.5329 | 666000 | 0.0301 | 0.2706 |
531
+ | 4.5363 | 666500 | 0.0287 | 0.2719 |
532
+ | 4.5397 | 667000 | 0.0292 | 0.2721 |
533
+ | 4.5431 | 667500 | 0.0284 | 0.2714 |
534
+ | 4.5465 | 668000 | 0.0286 | 0.2696 |
535
+ | 4.5499 | 668500 | 0.0299 | 0.2700 |
536
+ | 4.5533 | 669000 | 0.0282 | 0.2689 |
537
+ | 4.5567 | 669500 | 0.0288 | 0.2715 |
538
+ | 4.5601 | 670000 | 0.0298 | 0.2712 |
539
+ | 4.5635 | 670500 | 0.0302 | 0.2687 |
540
+ | 4.5669 | 671000 | 0.0298 | 0.2709 |
541
+ | 4.5703 | 671500 | 0.0297 | 0.2711 |
542
+ | 4.5737 | 672000 | 0.0297 | 0.2703 |
543
+ | 4.5771 | 672500 | 0.0288 | 0.2685 |
544
+ | 4.5805 | 673000 | 0.0293 | 0.2698 |
545
+ | 4.5839 | 673500 | 0.0293 | 0.2706 |
546
+ | 4.5873 | 674000 | 0.0292 | 0.2688 |
547
+ | 4.5907 | 674500 | 0.0288 | 0.2676 |
548
+ | 4.5941 | 675000 | 0.0294 | 0.2694 |
549
+ | 4.5976 | 675500 | 0.0308 | 0.2697 |
550
+ | 4.6010 | 676000 | 0.0297 | 0.2689 |
551
+ | 4.6044 | 676500 | 0.0287 | 0.2688 |
552
+ | 4.6078 | 677000 | 0.0276 | 0.2677 |
553
+ | 4.6112 | 677500 | 0.0307 | 0.2686 |
554
+ | 4.6146 | 678000 | 0.0301 | 0.2672 |
555
+ | 4.6180 | 678500 | 0.029 | 0.2689 |
556
+ | 4.6214 | 679000 | 0.0306 | 0.2683 |
557
+ | 4.6248 | 679500 | 0.0284 | 0.2689 |
558
+ | 4.6282 | 680000 | 0.0277 | 0.2698 |
559
+ | 4.6316 | 680500 | 0.0291 | 0.2694 |
560
+ | 4.6350 | 681000 | 0.0295 | 0.2660 |
561
+ | 4.6384 | 681500 | 0.0309 | 0.2683 |
562
+ | 4.6418 | 682000 | 0.0278 | 0.2703 |
563
+ | 4.6452 | 682500 | 0.0291 | 0.2690 |
564
+ | 4.6486 | 683000 | 0.0296 | 0.2699 |
565
+ | 4.6520 | 683500 | 0.0307 | 0.2689 |
566
+ | 4.6554 | 684000 | 0.0299 | 0.2679 |
567
+ | 4.6588 | 684500 | 0.03 | 0.2690 |
568
+ | 4.6622 | 685000 | 0.0291 | 0.2682 |
569
+ | 4.6656 | 685500 | 0.0304 | 0.2665 |
570
+ | 4.6690 | 686000 | 0.031 | 0.2657 |
571
+ | 4.6724 | 686500 | 0.03 | 0.2674 |
572
+ | 4.6758 | 687000 | 0.0293 | 0.2696 |
573
+ | 4.6792 | 687500 | 0.0299 | 0.2666 |
574
+ | 4.6826 | 688000 | 0.029 | 0.2668 |
575
+ | 4.6860 | 688500 | 0.0295 | 0.2669 |
576
+ | 4.6894 | 689000 | 0.0288 | 0.2680 |
577
+ | 4.6928 | 689500 | 0.0301 | 0.2674 |
578
+ | 4.6962 | 690000 | 0.03 | 0.2690 |
579
+ | 4.6996 | 690500 | 0.0298 | 0.2678 |
580
+ | 4.7030 | 691000 | 0.03 | 0.2705 |
581
+ | 4.7065 | 691500 | 0.0293 | 0.2692 |
582
+ | 4.7099 | 692000 | 0.0287 | 0.2693 |
583
+ | 4.7133 | 692500 | 0.0304 | 0.2660 |
584
+ | 4.7167 | 693000 | 0.0296 | 0.2662 |
585
+ | 4.7201 | 693500 | 0.0291 | 0.2668 |
586
+ | 4.7235 | 694000 | 0.0308 | 0.2677 |
587
+ | 4.7269 | 694500 | 0.0309 | 0.2668 |
588
+ | 4.7303 | 695000 | 0.0319 | 0.2692 |
589
+ | 4.7337 | 695500 | 0.0297 | 0.2678 |
590
+ | 4.7371 | 696000 | 0.0297 | 0.2672 |
591
+ | 4.7405 | 696500 | 0.0294 | 0.2673 |
592
+ | 4.7439 | 697000 | 0.0293 | 0.2671 |
593
+ | 4.7473 | 697500 | 0.0308 | 0.2687 |
594
+ | 4.7507 | 698000 | 0.0315 | 0.2694 |
595
+ | 4.7541 | 698500 | 0.0286 | 0.2676 |
596
+ | 4.7575 | 699000 | 0.0297 | 0.2687 |
597
+ | 4.7609 | 699500 | 0.0285 | 0.2668 |
598
+ | 4.7643 | 700000 | 0.0282 | 0.2682 |
599
+ | 4.7677 | 700500 | 0.0307 | 0.2667 |
600
+ | 4.7711 | 701000 | 0.0276 | 0.2719 |
601
+ | 4.7745 | 701500 | 0.0297 | 0.2706 |
602
+ | 4.7779 | 702000 | 0.0293 | 0.2691 |
603
+ | 4.7813 | 702500 | 0.029 | 0.2679 |
604
+ | 4.7847 | 703000 | 0.0319 | 0.2678 |
605
+ | 4.7881 | 703500 | 0.0303 | 0.2682 |
606
+ | 4.7915 | 704000 | 0.028 | 0.2688 |
607
+ | 4.7949 | 704500 | 0.031 | 0.2719 |
608
+ | 4.7983 | 705000 | 0.029 | 0.2692 |
609
+ | 4.8017 | 705500 | 0.0313 | 0.2661 |
610
+ | 4.8051 | 706000 | 0.0313 | 0.2685 |
611
+ | 4.8085 | 706500 | 0.0296 | 0.2689 |
612
+ | 4.8119 | 707000 | 0.0309 | 0.2705 |
613
+ | 4.8153 | 707500 | 0.0287 | 0.2691 |
614
+ | 4.8188 | 708000 | 0.031 | 0.2697 |
615
+ | 4.8222 | 708500 | 0.0295 | 0.2683 |
616
+ | 4.8256 | 709000 | 0.0293 | 0.2687 |
617
+ | 4.8290 | 709500 | 0.0316 | 0.2689 |
618
+ | 4.8324 | 710000 | 0.0289 | 0.2691 |
619
+ | 4.8358 | 710500 | 0.0287 | 0.2705 |
620
+ | 4.8392 | 711000 | 0.0292 | 0.2700 |
621
+ | 4.8426 | 711500 | 0.0309 | 0.2682 |
622
+ | 4.8460 | 712000 | 0.0306 | 0.2688 |
623
+ | 4.8494 | 712500 | 0.0304 | 0.2701 |
624
+ | 4.8528 | 713000 | 0.03 | 0.2679 |
625
+ | 4.8562 | 713500 | 0.0293 | 0.2713 |
626
+ | 4.8596 | 714000 | 0.03 | 0.2692 |
627
+ | 4.8630 | 714500 | 0.03 | 0.2700 |
628
+ | 4.8664 | 715000 | 0.0297 | 0.2699 |
629
+ | 4.8698 | 715500 | 0.0282 | 0.2709 |
630
+ | 4.8732 | 716000 | 0.0287 | 0.2715 |
631
+ | 4.8766 | 716500 | 0.0303 | 0.2718 |
632
+ | 4.8800 | 717000 | 0.0304 | 0.2710 |
633
+ | 4.8834 | 717500 | 0.0292 | 0.2720 |
634
+ | 4.8868 | 718000 | 0.0307 | 0.2700 |
635
+ | 4.8902 | 718500 | 0.0304 | 0.2698 |
636
+ | 4.8936 | 719000 | 0.0307 | 0.2681 |
637
+ | 4.8970 | 719500 | 0.0294 | 0.2693 |
638
+ | 4.9004 | 720000 | 0.0315 | 0.2701 |
639
+ | 4.9038 | 720500 | 0.0288 | 0.2702 |
640
+ | 4.9072 | 721000 | 0.0284 | 0.2710 |
641
+ | 4.9106 | 721500 | 0.0309 | 0.2697 |
642
+ | 4.9140 | 722000 | 0.0313 | 0.2698 |
643
+ | 4.9174 | 722500 | 0.0305 | 0.2687 |
644
+ | 4.9208 | 723000 | 0.0306 | 0.2681 |
645
+ | 4.9242 | 723500 | 0.0307 | 0.2702 |
646
+ | 4.9277 | 724000 | 0.0319 | 0.2687 |
647
+ | 4.9311 | 724500 | 0.0285 | 0.2698 |
648
+ | 4.9345 | 725000 | 0.0298 | 0.2697 |
649
+ | 4.9379 | 725500 | 0.0317 | 0.2701 |
650
+ | 4.9413 | 726000 | 0.0316 | 0.2702 |
651
+ | 4.9447 | 726500 | 0.0305 | 0.2691 |
652
+ | 4.9481 | 727000 | 0.0303 | 0.2694 |
653
+ | 4.9515 | 727500 | 0.0302 | 0.2688 |
654
+ | 4.9549 | 728000 | 0.029 | 0.2672 |
655
+ | 4.9583 | 728500 | 0.03 | 0.2690 |
656
+ | 4.9617 | 729000 | 0.0291 | 0.2687 |
657
+ | 4.9651 | 729500 | 0.0301 | 0.2682 |
658
+ | 4.9685 | 730000 | 0.0304 | 0.2680 |
659
+ | 4.9719 | 730500 | 0.0305 | 0.2655 |
660
+ | 4.9753 | 731000 | 0.0285 | 0.2668 |
661
+ | 4.9787 | 731500 | 0.0325 | 0.2672 |
662
+ | 4.9821 | 732000 | 0.0294 | 0.2677 |
663
+ | 4.9855 | 732500 | 0.0308 | 0.2648 |
664
+ | 4.9889 | 733000 | 0.0291 | 0.2672 |
665
+ | 4.9923 | 733500 | 0.0312 | 0.2663 |
666
+ | 4.9957 | 734000 | 0.0305 | 0.2671 |
667
+ | 4.9991 | 734500 | 0.0301 | 0.2677 |
668
+ | 5.0 | 734630 | - | 0.2660 |
669
+ | 5.0025 | 735000 | 0.0214 | 0.2636 |
670
+ | 5.0059 | 735500 | 0.0186 | 0.2625 |
671
+ | 5.0093 | 736000 | 0.0186 | 0.2608 |
672
+ | 5.0127 | 736500 | 0.0189 | 0.2612 |
673
+ | 5.0161 | 737000 | 0.019 | 0.2589 |
674
+ | 5.0195 | 737500 | 0.0185 | 0.2594 |
675
+ | 5.0229 | 738000 | 0.0177 | 0.2604 |
676
+ | 5.0263 | 738500 | 0.0187 | 0.2595 |
677
+ | 5.0297 | 739000 | 0.0185 | 0.2569 |
678
+ | 5.0331 | 739500 | 0.0174 | 0.2569 |
679
+ | 5.0365 | 740000 | 0.0185 | 0.2588 |
680
+ | 5.0400 | 740500 | 0.0186 | 0.2554 |
681
+ | 5.0434 | 741000 | 0.0176 | 0.2574 |
682
+ | 5.0468 | 741500 | 0.0173 | 0.2581 |
683
+ | 5.0502 | 742000 | 0.0182 | 0.2591 |
684
+ | 5.0536 | 742500 | 0.0175 | 0.2585 |
685
+ | 5.0570 | 743000 | 0.0173 | 0.2589 |
686
+ | 5.0604 | 743500 | 0.0175 | 0.2589 |
687
+ | 5.0638 | 744000 | 0.0184 | 0.2612 |
688
+ | 5.0672 | 744500 | 0.019 | 0.2595 |
689
+ | 5.0706 | 745000 | 0.0183 | 0.2588 |
690
+ | 5.0740 | 745500 | 0.0187 | 0.2553 |
691
+ | 5.0774 | 746000 | 0.0183 | 0.2553 |
692
+ | 5.0808 | 746500 | 0.0178 | 0.2560 |
693
+ | 5.0842 | 747000 | 0.0194 | 0.2566 |
694
+ | 5.0876 | 747500 | 0.0187 | 0.2572 |
695
+ | 5.0910 | 748000 | 0.0188 | 0.2534 |
696
+ | 5.0944 | 748500 | 0.0195 | 0.2556 |
697
+ | 5.0978 | 749000 | 0.0187 | 0.2579 |
698
+ | 5.1012 | 749500 | 0.0182 | 0.2558 |
699
+ | 5.1046 | 750000 | 0.0188 | 0.2554 |
700
+ | 5.1080 | 750500 | 0.019 | 0.2566 |
701
+ | 5.1114 | 751000 | 0.0182 | 0.2538 |
702
+ | 5.1148 | 751500 | 0.0185 | 0.2537 |
703
+ | 5.1182 | 752000 | 0.0183 | 0.2559 |
704
+ | 5.1216 | 752500 | 0.0185 | 0.2567 |
705
+ | 5.1250 | 753000 | 0.0186 | 0.2551 |
706
+ | 5.1284 | 753500 | 0.0186 | 0.2574 |
707
+ | 5.1318 | 754000 | 0.0187 | 0.2559 |
708
+ | 5.1352 | 754500 | 0.019 | 0.2566 |
709
+ | 5.1386 | 755000 | 0.0179 | 0.2561 |
710
+ | 5.1420 | 755500 | 0.0186 | 0.2556 |
711
+ | 5.1454 | 756000 | 0.0186 | 0.2545 |
712
+ | 5.1489 | 756500 | 0.0198 | 0.2526 |
713
+ | 5.1523 | 757000 | 0.0195 | 0.2556 |
714
+ | 5.1557 | 757500 | 0.0189 | 0.2519 |
715
+ | 5.1591 | 758000 | 0.0186 | 0.2547 |
716
+ | 5.1625 | 758500 | 0.0186 | 0.2536 |
717
+ | 5.1659 | 759000 | 0.0186 | 0.2548 |
718
+ | 5.1693 | 759500 | 0.0198 | 0.2537 |
719
+ | 5.1727 | 760000 | 0.0179 | 0.2557 |
720
+ | 5.1761 | 760500 | 0.0183 | 0.2540 |
721
+ | 5.1795 | 761000 | 0.0192 | 0.2558 |
722
+ | 5.1829 | 761500 | 0.0199 | 0.2575 |
723
+ | 5.1863 | 762000 | 0.0197 | 0.2555 |
724
+ | 5.1897 | 762500 | 0.0187 | 0.2579 |
725
+ | 5.1931 | 763000 | 0.0191 | 0.2577 |
726
+ | 5.1965 | 763500 | 0.0192 | 0.2572 |
727
+ | 5.1999 | 764000 | 0.0187 | 0.2565 |
728
+ | 5.2033 | 764500 | 0.018 | 0.2565 |
729
+ | 5.2067 | 765000 | 0.0188 | 0.2552 |
730
+ | 5.2101 | 765500 | 0.0193 | 0.2568 |
731
+ | 5.2135 | 766000 | 0.0187 | 0.2574 |
732
+ | 5.2169 | 766500 | 0.0181 | 0.2577 |
733
+ | 5.2203 | 767000 | 0.0197 | 0.2595 |
734
+ | 5.2237 | 767500 | 0.019 | 0.2599 |
735
+ | 5.2271 | 768000 | 0.0196 | 0.2587 |
736
+ | 5.2305 | 768500 | 0.0196 | 0.2584 |
737
+ | 5.2339 | 769000 | 0.0186 | 0.2570 |
738
+ | 5.2373 | 769500 | 0.0193 | 0.2593 |
739
+ | 5.2407 | 770000 | 0.0198 | 0.2595 |
740
+ | 5.2441 | 770500 | 0.019 | 0.2561 |
741
+ | 5.2475 | 771000 | 0.0198 | 0.2584 |
742
+ | 5.2509 | 771500 | 0.0195 | 0.2584 |
743
+ | 5.2543 | 772000 | 0.0201 | 0.2579 |
744
+ | 5.2577 | 772500 | 0.02 | 0.2582 |
745
+ | 5.2612 | 773000 | 0.0194 | 0.2576 |
746
+ | 5.2646 | 773500 | 0.0194 | 0.2585 |
747
+ | 5.2680 | 774000 | 0.0192 | 0.2574 |
748
+ | 5.2714 | 774500 | 0.019 | 0.2559 |
749
+ | 5.2748 | 775000 | 0.0197 | 0.2556 |
750
+ | 5.2782 | 775500 | 0.0191 | 0.2553 |
751
+ | 5.2816 | 776000 | 0.0205 | 0.2577 |
752
+ | 5.2850 | 776500 | 0.0195 | 0.2572 |
753
+ | 5.2884 | 777000 | 0.0207 | 0.2566 |
754
+ | 5.2918 | 777500 | 0.0206 | 0.2571 |
755
+ | 5.2952 | 778000 | 0.0202 | 0.2580 |
756
+ | 5.2986 | 778500 | 0.0192 | 0.2570 |
757
+ | 5.3020 | 779000 | 0.0191 | 0.2558 |
758
+ | 5.3054 | 779500 | 0.0213 | 0.2570 |
759
+ | 5.3088 | 780000 | 0.0193 | 0.2578 |
760
+ | 5.3122 | 780500 | 0.0193 | 0.2567 |
761
+ | 5.3156 | 781000 | 0.0212 | 0.2579 |
762
+ | 5.3190 | 781500 | 0.0197 | 0.2563 |
763
+ | 5.3224 | 782000 | 0.0204 | 0.2592 |
764
+ | 5.3258 | 782500 | 0.0207 | 0.2596 |
765
+ | 5.3292 | 783000 | 0.0197 | 0.2570 |
766
+ | 5.3326 | 783500 | 0.0201 | 0.2590 |
767
+ | 5.3360 | 784000 | 0.0204 | 0.2570 |
768
+ | 5.3394 | 784500 | 0.0198 | 0.2586 |
769
+ | 5.3428 | 785000 | 0.0193 | 0.2597 |
770
+ | 5.3462 | 785500 | 0.0197 | 0.2594 |
771
+ | 5.3496 | 786000 | 0.0205 | 0.2595 |
772
+ | 5.3530 | 786500 | 0.0194 | 0.2603 |
773
+ | 5.3564 | 787000 | 0.0205 | 0.2593 |
774
+ | 5.3598 | 787500 | 0.0205 | 0.2586 |
775
+ | 5.3632 | 788000 | 0.0203 | 0.2583 |
776
+ | 5.3666 | 788500 | 0.0194 | 0.2610 |
777
+ | 5.3701 | 789000 | 0.0206 | 0.2626 |
778
+ | 5.3735 | 789500 | 0.0198 | 0.2602 |
779
+ | 5.3769 | 790000 | 0.0208 | 0.2597 |
780
+ | 5.3803 | 790500 | 0.0201 | 0.2578 |
781
+ | 5.3837 | 791000 | 0.0205 | 0.2578 |
782
+ | 5.3871 | 791500 | 0.0197 | 0.2569 |
783
+ | 5.3905 | 792000 | 0.0204 | 0.2546 |
784
+ | 5.3939 | 792500 | 0.02 | 0.2565 |
785
+ | 5.3973 | 793000 | 0.0202 | 0.2574 |
786
+ | 5.4007 | 793500 | 0.0198 | 0.2572 |
787
+ | 5.4041 | 794000 | 0.0194 | 0.2593 |
788
+ | 5.4075 | 794500 | 0.0215 | 0.2584 |
789
+ | 5.4109 | 795000 | 0.0207 | 0.2590 |
790
+ | 5.4143 | 795500 | 0.021 | 0.2589 |
791
+ | 5.4177 | 796000 | 0.0218 | 0.2589 |
792
+ | 5.4211 | 796500 | 0.0211 | 0.2595 |
793
+ | 5.4245 | 797000 | 0.0203 | 0.2584 |
794
+ | 5.4279 | 797500 | 0.0204 | 0.2596 |
795
+ | 5.4313 | 798000 | 0.0198 | 0.2594 |
796
+ | 5.4347 | 798500 | 0.0208 | 0.2596 |
797
+ | 5.4381 | 799000 | 0.02 | 0.2590 |
798
+ | 5.4415 | 799500 | 0.0218 | 0.2583 |
799
+ | 5.4449 | 800000 | 0.0208 | 0.2578 |
800
+ | 5.4483 | 800500 | 0.0198 | 0.2582 |
801
+ | 5.4517 | 801000 | 0.0209 | 0.2583 |
802
+ | 5.4551 | 801500 | 0.02 | 0.2596 |
803
+ | 5.4585 | 802000 | 0.0206 | 0.2591 |
804
+ | 5.4619 | 802500 | 0.0208 | 0.2610 |
805
+ | 5.4653 | 803000 | 0.0219 | 0.2603 |
806
+ | 5.4687 | 803500 | 0.0208 | 0.2598 |
807
+ | 5.4721 | 804000 | 0.0208 | 0.2582 |
808
+ | 5.4755 | 804500 | 0.0224 | 0.2582 |
809
+ | 5.4789 | 805000 | 0.0232 | 0.2564 |
810
+ | 5.4824 | 805500 | 0.0204 | 0.2590 |
811
+ | 5.4858 | 806000 | 0.0218 | 0.2598 |
812
+ | 5.4892 | 806500 | 0.0202 | 0.2612 |
813
+ | 5.4926 | 807000 | 0.0204 | 0.2615 |
814
+ | 5.4960 | 807500 | 0.0208 | 0.2608 |
815
+ | 5.4994 | 808000 | 0.0199 | 0.2604 |
816
+ | 5.5028 | 808500 | 0.0219 | 0.2587 |
817
+ | 5.5062 | 809000 | 0.0197 | 0.2613 |
818
+ | 5.5096 | 809500 | 0.0209 | 0.2606 |
819
+ | 5.5130 | 810000 | 0.0211 | 0.2615 |
820
+ | 5.5164 | 810500 | 0.021 | 0.2613 |
821
+ | 5.5198 | 811000 | 0.0205 | 0.2594 |
822
+ | 5.5232 | 811500 | 0.0208 | 0.2581 |
823
+ | 5.5266 | 812000 | 0.0206 | 0.2577 |
824
+ | 5.5300 | 812500 | 0.0202 | 0.2574 |
825
+ | 5.5334 | 813000 | 0.021 | 0.2592 |
826
+ | 5.5368 | 813500 | 0.0202 | 0.2574 |
827
+ | 5.5402 | 814000 | 0.0211 | 0.2573 |
828
+ | 5.5436 | 814500 | 0.02 | 0.2581 |
829
+ | 5.5470 | 815000 | 0.0207 | 0.2598 |
830
+ | 5.5504 | 815500 | 0.0217 | 0.2603 |
831
+ | 5.5538 | 816000 | 0.0222 | 0.2594 |
832
+ | 5.5572 | 816500 | 0.02 | 0.2595 |
833
+ | 5.5606 | 817000 | 0.0208 | 0.2605 |
834
+ | 5.5640 | 817500 | 0.0221 | 0.2606 |
835
+ | 5.5674 | 818000 | 0.0211 | 0.2586 |
836
+ | 5.5708 | 818500 | 0.0215 | 0.2592 |
837
+ | 5.5742 | 819000 | 0.0216 | 0.2602 |
838
+ | 5.5776 | 819500 | 0.0221 | 0.2600 |
839
+ | 5.5810 | 820000 | 0.0207 | 0.2606 |
840
+ | 5.5844 | 820500 | 0.0202 | 0.2598 |
841
+ | 5.5878 | 821000 | 0.0205 | 0.2589 |
842
+ | 5.5913 | 821500 | 0.0221 | 0.2601 |
843
+ | 5.5947 | 822000 | 0.0219 | 0.2596 |
844
+ | 5.5981 | 822500 | 0.0204 | 0.2609 |
845
+ | 5.6015 | 823000 | 0.022 | 0.2585 |
846
+ | 5.6049 | 823500 | 0.0206 | 0.2580 |
847
+ | 5.6083 | 824000 | 0.0201 | 0.2604 |
848
+ | 5.6117 | 824500 | 0.0213 | 0.2600 |
849
+ | 5.6151 | 825000 | 0.0208 | 0.2578 |
850
+ | 5.6185 | 825500 | 0.0213 | 0.2587 |
851
+ | 5.6219 | 826000 | 0.0214 | 0.2587 |
852
+ | 5.6253 | 826500 | 0.022 | 0.2599 |
853
+ | 5.6287 | 827000 | 0.0211 | 0.2590 |
854
+ | 5.6321 | 827500 | 0.0207 | 0.2598 |
855
+ | 5.6355 | 828000 | 0.021 | 0.2607 |
856
+ | 5.6389 | 828500 | 0.0209 | 0.2612 |
857
+ | 5.6423 | 829000 | 0.0217 | 0.2611 |
858
+ | 5.6457 | 829500 | 0.0209 | 0.2600 |
859
+ | 5.6491 | 830000 | 0.0219 | 0.2610 |
860
+ | 5.6525 | 830500 | 0.0224 | 0.2611 |
861
+ | 5.6559 | 831000 | 0.0214 | 0.2634 |
862
+ | 5.6593 | 831500 | 0.022 | 0.2597 |
863
+ | 5.6627 | 832000 | 0.0209 | 0.2597 |
864
+ | 5.6661 | 832500 | 0.0219 | 0.2585 |
865
+ | 5.6695 | 833000 | 0.0216 | 0.2581 |
866
+ | 5.6729 | 833500 | 0.0229 | 0.2605 |
867
+ | 5.6763 | 834000 | 0.0218 | 0.2578 |
868
+ | 5.6797 | 834500 | 0.0223 | 0.2611 |
869
+ | 5.6831 | 835000 | 0.0212 | 0.2614 |
870
+ | 5.6865 | 835500 | 0.021 | 0.2592 |
871
+ | 5.6899 | 836000 | 0.0212 | 0.2601 |
872
+ | 5.6933 | 836500 | 0.0228 | 0.2612 |
873
+ | 5.6967 | 837000 | 0.0217 | 0.2617 |
874
+ | 5.7001 | 837500 | 0.0228 | 0.2604 |
875
+ | 5.7036 | 838000 | 0.0215 | 0.2599 |
876
+ | 5.7070 | 838500 | 0.0212 | 0.2598 |
877
+ | 5.7104 | 839000 | 0.0224 | 0.2592 |
878
+ | 5.7138 | 839500 | 0.0213 | 0.2562 |
879
+ | 5.7172 | 840000 | 0.0211 | 0.2598 |
880
+ | 5.7206 | 840500 | 0.0213 | 0.2604 |
881
+ | 5.7240 | 841000 | 0.0221 | 0.2601 |
882
+ | 5.7274 | 841500 | 0.0227 | 0.2610 |
883
+ | 5.7308 | 842000 | 0.0214 | 0.2612 |
884
+ | 5.7342 | 842500 | 0.0212 | 0.2619 |
885
+ | 5.7376 | 843000 | 0.0221 | 0.2594 |
886
+ | 5.7410 | 843500 | 0.0212 | 0.2616 |
887
+ | 5.7444 | 844000 | 0.0221 | 0.2618 |
888
+ | 5.7478 | 844500 | 0.021 | 0.2623 |
889
+ | 5.7512 | 845000 | 0.0222 | 0.2597 |
890
+ | 5.7546 | 845500 | 0.0223 | 0.2601 |
891
+ | 5.7580 | 846000 | 0.0214 | 0.2599 |
892
+ | 5.7614 | 846500 | 0.0222 | 0.2601 |
893
+ | 5.7648 | 847000 | 0.0221 | 0.2593 |
894
+ | 5.7682 | 847500 | 0.0222 | 0.2596 |
895
+ | 5.7716 | 848000 | 0.0229 | 0.2586 |
896
+ | 5.7750 | 848500 | 0.0207 | 0.2612 |
897
+ | 5.7784 | 849000 | 0.0216 | 0.2612 |
898
+ | 5.7818 | 849500 | 0.0217 | 0.2603 |
899
+ | 5.7852 | 850000 | 0.0208 | 0.2606 |
900
+ | 5.7886 | 850500 | 0.0221 | 0.2609 |
901
+ | 5.7920 | 851000 | 0.0209 | 0.2607 |
902
+ | 5.7954 | 851500 | 0.0216 | 0.2620 |
903
+ | 5.7988 | 852000 | 0.0224 | 0.2597 |
904
+ | 5.8022 | 852500 | 0.0227 | 0.2614 |
905
+ | 5.8056 | 853000 | 0.0232 | 0.2605 |
906
+ | 5.8090 | 853500 | 0.0216 | 0.2589 |
907
+ | 5.8124 | 854000 | 0.0225 | 0.2594 |
908
+ | 5.8159 | 854500 | 0.0221 | 0.2600 |
909
+ | 5.8193 | 855000 | 0.0222 | 0.2601 |
910
+ | 5.8227 | 855500 | 0.0215 | 0.2594 |
911
+ | 5.8261 | 856000 | 0.0223 | 0.2597 |
912
+ | 5.8295 | 856500 | 0.022 | 0.2583 |
913
+ | 5.8329 | 857000 | 0.0218 | 0.2615 |
914
+ | 5.8363 | 857500 | 0.0221 | 0.2605 |
915
+ | 5.8397 | 858000 | 0.0216 | 0.2612 |
916
+
917
+ </details>
918
+
919
+ ### Framework Versions
920
+ - Python: 3.9.25
921
+ - Sentence Transformers: 5.1.2
922
+ - Transformers: 4.57.6
923
+ - PyTorch: 2.6.0+cu118
924
+ - Accelerate: 1.10.1
925
+ - Datasets: 4.5.0
926
+ - Tokenizers: 0.22.2
927
+
928
+ ## Citation
929
+
930
+ ### BibTeX
931
+
932
+ #### Sentence Transformers
933
+ ```bibtex
934
+ @inproceedings{reimers-2019-sentence-bert,
935
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
936
+ author = "Reimers, Nils and Gurevych, Iryna",
937
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
938
+ month = "11",
939
+ year = "2019",
940
+ publisher = "Association for Computational Linguistics",
941
+ url = "https://arxiv.org/abs/1908.10084",
942
+ }
943
+ ```
944
+
945
+ <!--
946
+ ## Glossary
947
+
948
+ *Clearly define terms in order to be accessible across audiences.*
949
+ -->
950
+
951
+ <!--
952
+ ## Model Card Authors
953
+
954
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
955
+ -->
956
+
957
+ <!--
958
+ ## Model Card Contact
959
+
960
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
961
+ -->
checkpoints/checkpoint-858000/config.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ModernBertModel"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "classifier_activation": "silu",
9
+ "classifier_bias": false,
10
+ "classifier_dropout": 0.0,
11
+ "classifier_pooling": "mean",
12
+ "cls_token_id": 0,
13
+ "decoder_bias": true,
14
+ "deterministic_flash_attn": false,
15
+ "dtype": "float32",
16
+ "embedding_dropout": 0.0,
17
+ "eos_token_id": 2,
18
+ "global_attn_every_n_layers": 3,
19
+ "global_rope_theta": 160000.0,
20
+ "gradient_checkpointing": false,
21
+ "hidden_activation": "gelu",
22
+ "hidden_size": 768,
23
+ "initializer_cutoff_factor": 2.0,
24
+ "initializer_range": 0.02,
25
+ "intermediate_size": 1152,
26
+ "layer_norm_eps": 1e-05,
27
+ "local_attention": 128,
28
+ "local_rope_theta": 10000.0,
29
+ "max_position_embeddings": 8192,
30
+ "mlp_bias": false,
31
+ "mlp_dropout": 0.0,
32
+ "model_type": "modernbert",
33
+ "norm_bias": false,
34
+ "norm_eps": 1e-05,
35
+ "num_attention_heads": 12,
36
+ "num_hidden_layers": 22,
37
+ "pad_token_id": 1,
38
+ "position_embedding_type": "absolute",
39
+ "repad_logits_with_grad": false,
40
+ "sep_token_id": 2,
41
+ "sparse_pred_ignore_index": -100,
42
+ "sparse_prediction": false,
43
+ "transformers_version": "4.57.6",
44
+ "vocab_size": 51200
45
+ }
checkpoints/checkpoint-858000/config_sentence_transformers.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "SentenceTransformer",
3
+ "__version__": {
4
+ "sentence_transformers": "5.1.2",
5
+ "transformers": "4.57.6",
6
+ "pytorch": "2.6.0+cu118"
7
+ },
8
+ "prompts": {
9
+ "query": "",
10
+ "document": ""
11
+ },
12
+ "default_prompt_name": null,
13
+ "similarity_fn_name": "cosine"
14
+ }
checkpoints/checkpoint-858000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41122b0634dddd2514be12e56c883b94a53f4601e650988ec8c3e2b28b1b78b4
3
+ size 598626040
checkpoints/checkpoint-858000/modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
checkpoints/checkpoint-858000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c726896f88a4e772f08e71403b03b105f89670bd89ee85d8789d9473caca587
3
+ size 1197335098
checkpoints/checkpoint-858000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b4ee9583cb966aef2cd03a5d2c02aabbbabbf352f9bfd7eb12e64cf720e3630
3
+ size 13990
checkpoints/checkpoint-858000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a941ba5895da270d9317c9315dc14fc625f3d0e81067ed936891687e6cd67daf
3
+ size 1064
checkpoints/checkpoint-858000/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 8192,
3
+ "do_lower_case": false
4
+ }
checkpoints/checkpoint-858000/special_tokens_map.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|translation|>"
4
+ ],
5
+ "bos_token": {
6
+ "content": "<s>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "eos_token": {
13
+ "content": "</s>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "mask_token": {
20
+ "content": "<mask>",
21
+ "lstrip": true,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "pad_token": {
27
+ "content": "<pad>",
28
+ "lstrip": false,
29
+ "normalized": false,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ },
33
+ "unk_token": {
34
+ "content": "<unk>",
35
+ "lstrip": false,
36
+ "normalized": false,
37
+ "rstrip": false,
38
+ "single_word": false
39
+ }
40
+ }
checkpoints/checkpoint-858000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/checkpoint-858000/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed8dc3e139a6f2c6e1781996aabfef34c32241dcff263dbc66cf69b4760aeee9
3
+ size 1074422
checkpoints/checkpoint-858000/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/checkpoint-858000/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/checkpoint-858000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d517e3e36acb7d029596418aa57488e7475a57c419a9027ad114a9944a373224
3
+ size 5752
checkpoints/eval/similarity_evaluation_sts_eval_results.csv CHANGED
@@ -1654,3 +1654,68 @@ epoch,steps,cosine_pearson,cosine_spearman
1654
  5.615071532608252,825000,0.42846062570206433,0.2577734206138443
1655
  5.618474606264378,825500,0.42918582923473,0.25867156492938675
1656
  5.621877679920504,826000,0.4312627860850702,0.25874991892197813
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1654
  5.615071532608252,825000,0.42846062570206433,0.2577734206138443
1655
  5.618474606264378,825500,0.42918582923473,0.25867156492938675
1656
  5.621877679920504,826000,0.4312627860850702,0.25874991892197813
1657
+ 5.6252807535766305,826500,0.4322375505318786,0.2599445032237904
1658
+ 5.628683827232757,827000,0.4312322540322735,0.259047614098999
1659
+ 5.632086900888883,827500,0.4314428236285231,0.2597500405175389
1660
+ 5.635489974545009,828000,0.4304326223212685,0.2606560414831987
1661
+ 5.638893048201135,828500,0.43189705686819624,0.26115613502974583
1662
+ 5.642296121857261,829000,0.43274313841006157,0.26107421544751747
1663
+ 5.645699195513387,829500,0.4301816270542287,0.26004104118795357
1664
+ 5.649102269169514,830000,0.43268026210049554,0.2609632573040913
1665
+ 5.65250534282564,830500,0.4298335525167642,0.2611476790219
1666
+ 5.655908416481767,831000,0.4355529784848795,0.2634255764557948
1667
+ 5.659311490137893,831500,0.42908588819002086,0.259663237313531
1668
+ 5.662714563794019,832000,0.4308556547724387,0.25966148176231774
1669
+ 5.666117637450145,832500,0.4292756347233242,0.25847911195059414
1670
+ 5.669520711106271,833000,0.4288729091137465,0.2581210686855319
1671
+ 5.672923784762397,833500,0.43453520373188576,0.26053213502771705
1672
+ 5.676326858418523,834000,0.42773438472521486,0.2578494145405628
1673
+ 5.6797299320746495,834500,0.4314289568423713,0.2610561512339484
1674
+ 5.683133005730776,835000,0.4356219372943762,0.26139091545784615
1675
+ 5.686536079386903,835500,0.43115058843127163,0.2592353372047813
1676
+ 5.689939153043029,836000,0.433154073384719,0.26013152242210263
1677
+ 5.693342226699155,836500,0.4341812211266761,0.2611790412640903
1678
+ 5.696745300355281,837000,0.4366870055313762,0.2616949882489352
1679
+ 5.700148374011407,837500,0.4313982732796759,0.26042213435326955
1680
+ 5.703551447667533,838000,0.4310997915539135,0.25989218467418024
1681
+ 5.706954521323659,838500,0.4340528675056105,0.25976711571142025
1682
+ 5.710357594979786,839000,0.4324216816078344,0.2592109086457857
1683
+ 5.713760668635912,839500,0.42852812110045707,0.25619828697251046
1684
+ 5.717163742292038,840000,0.4343968216091597,0.25981057640600425
1685
+ 5.720566815948164,840500,0.4351199149490327,0.2603967823487855
1686
+ 5.723969889604291,841000,0.43677624116805636,0.26014918959736466
1687
+ 5.727372963260417,841500,0.43711610948627433,0.2610466550872624
1688
+ 5.730776036916543,842000,0.43700052347131757,0.26117776039539403
1689
+ 5.734179110572669,842500,0.43737615946096625,0.2618618125092355
1690
+ 5.7375821842287955,843000,0.4324289172414595,0.2593978486778603
1691
+ 5.740985257884922,843500,0.4376029840251314,0.26157317951809017
1692
+ 5.744388331541048,844000,0.4348604630648318,0.2617528097305283
1693
+ 5.747791405197174,844500,0.43650232871345895,0.26225123288937663
1694
+ 5.7511944788533,845000,0.43286410280857435,0.25966359160030633
1695
+ 5.754597552509426,845500,0.43351271889919385,0.2601473566547892
1696
+ 5.758000626165552,846000,0.4371670506583656,0.2598806306007943
1697
+ 5.761403699821679,846500,0.43609249220074475,0.2601301585875556
1698
+ 5.7648067734778055,847000,0.43444436679334913,0.2592638462506676
1699
+ 5.768209847133932,847500,0.4322604746602808,0.25961040055626333
1700
+ 5.771612920790058,848000,0.4295992117255118,0.2585984212497361
1701
+ 5.775015994446184,848500,0.43558665853191075,0.26120061517712845
1702
+ 5.77841906810231,849000,0.4347235882589332,0.26115250722513106
1703
+ 5.781822141758436,849500,0.43407474695277626,0.2602731212701283
1704
+ 5.785225215414562,850000,0.433602182183516,0.2606036830159628
1705
+ 5.788628289070688,850500,0.4342658504858041,0.26087675523650905
1706
+ 5.7920313627268145,851000,0.43458953413553036,0.26072343206342596
1707
+ 5.795434436382941,851500,0.43678708470854294,0.2620441873046349
1708
+ 5.798837510039068,852000,0.4324098431160557,0.2597299148911367
1709
+ 5.802240583695194,852500,0.4334405925258195,0.26142312903569775
1710
+ 5.80564365735132,853000,0.4331208043384407,0.2605012062309546
1711
+ 5.809046731007446,853500,0.4320394344741354,0.25887311407772856
1712
+ 5.812449804663572,854000,0.4326245287479479,0.25941315202761056
1713
+ 5.815852878319698,854500,0.43478034719268893,0.26003746147994267
1714
+ 5.8192559519758245,855000,0.43358202586305294,0.26006441253003737
1715
+ 5.822659025631951,855500,0.4344364467727841,0.25942615245090644
1716
+ 5.826062099288077,856000,0.43442931591911665,0.2596907649612308
1717
+ 5.829465172944203,856500,0.43187637019582614,0.2582526806019195
1718
+ 5.832868246600329,857000,0.43681572237432503,0.26154343151201004
1719
+ 5.836271320256456,857500,0.4362315565640548,0.2605489889638962
1720
+ 5.839674393912582,858000,0.43567772480097167,0.2612476203839023
1721
+ 5.843077467568708,858500,0.4346035877515352,0.2602703501214779
checkpoints/runs/Mar24_10-41-10_debianerickserver/events.out.tfevents.1774359676.debianerickserver.23411.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f8da5397afa5c2393f9e717b1ea16cb1bcf01b48def74cdfd3141b9e73a1fb9
3
- size 285911
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:603fcaab1ea810eb09abe89cf7c65f766e3a74f73ea0f24772440b7acd5f679a
3
+ size 323546