Upload folder using huggingface_hub
Browse files- 1_Pooling/config.json +10 -0
- README.md +566 -0
- checkpoint-1500/1_Pooling/config.json +10 -0
- checkpoint-1500/README.md +558 -0
- checkpoint-1500/config.json +23 -0
- checkpoint-1500/config_sentence_transformers.json +14 -0
- checkpoint-1500/model.safetensors +3 -0
- checkpoint-1500/modules.json +20 -0
- checkpoint-1500/optimizer.pt +3 -0
- checkpoint-1500/rng_state.pth +3 -0
- checkpoint-1500/scaler.pt +3 -0
- checkpoint-1500/scheduler.pt +3 -0
- checkpoint-1500/sentence_bert_config.json +4 -0
- checkpoint-1500/special_tokens_map.json +51 -0
- checkpoint-1500/tokenizer.json +0 -0
- checkpoint-1500/tokenizer_config.json +73 -0
- checkpoint-1500/trainer_state.json +215 -0
- checkpoint-1500/training_args.bin +3 -0
- checkpoint-1500/vocab.txt +0 -0
- checkpoint-2206/1_Pooling/config.json +10 -0
- checkpoint-2206/README.md +565 -0
- checkpoint-2206/config.json +23 -0
- checkpoint-2206/config_sentence_transformers.json +14 -0
- checkpoint-2206/model.safetensors +3 -0
- checkpoint-2206/modules.json +20 -0
- checkpoint-2206/optimizer.pt +3 -0
- checkpoint-2206/rng_state.pth +3 -0
- checkpoint-2206/scaler.pt +3 -0
- checkpoint-2206/scheduler.pt +3 -0
- checkpoint-2206/sentence_bert_config.json +4 -0
- checkpoint-2206/special_tokens_map.json +51 -0
- checkpoint-2206/tokenizer.json +0 -0
- checkpoint-2206/tokenizer_config.json +73 -0
- checkpoint-2206/trainer_state.json +287 -0
- checkpoint-2206/training_args.bin +3 -0
- checkpoint-2206/vocab.txt +0 -0
- config.json +23 -0
- config_sentence_transformers.json +14 -0
- eval/Information-Retrieval_evaluation_enarm-ir_results.csv +7 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +51 -0
- tokenizer.json +0 -0
- tokenizer_config.json +73 -0
- training_info.json +27 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word_embedding_dimension": 768,
|
| 3 |
+
"pooling_mode_cls_token": false,
|
| 4 |
+
"pooling_mode_mean_tokens": true,
|
| 5 |
+
"pooling_mode_max_tokens": false,
|
| 6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
| 7 |
+
"pooling_mode_weightedmean_tokens": false,
|
| 8 |
+
"pooling_mode_lasttoken": false,
|
| 9 |
+
"include_prompt": true
|
| 10 |
+
}
|
README.md
ADDED
|
@@ -0,0 +1,566 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- sentence-transformers
|
| 4 |
+
- sentence-similarity
|
| 5 |
+
- feature-extraction
|
| 6 |
+
- dense
|
| 7 |
+
- generated_from_trainer
|
| 8 |
+
- dataset_size:35280
|
| 9 |
+
- loss:MultipleNegativesRankingLoss
|
| 10 |
+
base_model: sentence-transformers/all-mpnet-base-v2
|
| 11 |
+
widget:
|
| 12 |
+
- source_sentence: ¿Cuál es la utilidad de la valoración ecográfica del cérvix?
|
| 13 |
+
sentences:
|
| 14 |
+
- Papel del rituximab en pacientes con linfoma no Hodgkin En algunos pacientes con
|
| 15 |
+
linfoma No Hodgkin existe sobreexpresión del antígeno CD20. En estos pacientes
|
| 16 |
+
se deberá utilizar un anticuerpo anti CD20 (rituximab), el cual ha demostrado
|
| 17 |
+
tener toxicidad limitada y mayor supervivencia.
|
| 18 |
+
- '¿Cuál es la característica del material de sutura absorbible de origen sintético?
|
| 19 |
+
En general se absorbe más despacio que el catgut y se hace por hidrólisis lenta
|
| 20 |
+
y fagocitosis, es prácticamente inerte pero sí produce reacción tisular con invasión
|
| 21 |
+
de macrófagos. En términos generales pierde 50% de su fuerza de tensión en menos
|
| 22 |
+
de 25 días. El ácido poliglicólico pierde su fuerza de tensión en 15 días y permanece
|
| 23 |
+
en los tejidos durante alrededor de 80 días; la poliglactina 910 permanece en
|
| 24 |
+
los tejidos durante '
|
| 25 |
+
- ¿Cuál es la utilidad de la valoración ecográfica del cérvix? No está indicada
|
| 26 |
+
en todas las mujeres embarazadas sino con mayor precisión sólo en aquellas con
|
| 27 |
+
mayor riesgo de parto pretérmino o bien aquellas con sospecha de cérvix acortado.
|
| 28 |
+
La medición de la longitud cervical, si es superior a 30 mm, tiene un alto valor
|
| 29 |
+
predictivo negativo. La presencia de fenómeno de embudo o insinuación de membranas
|
| 30 |
+
en el canal cervical se asocia con elevada probabilidad de parto pretérmino.
|
| 31 |
+
- source_sentence: Inmunodeficiencia congénita más común, en la cual los pacientes
|
| 32 |
+
presentan infecciones sinopulmonares y gastrointestinales
|
| 33 |
+
sentences:
|
| 34 |
+
- Causas de miocardiopatía dilatada Alcoholismo, beri beri, cocaína, infección por
|
| 35 |
+
Coxsackie B, enfermedad de Chagas, doxorrubicina, embarazo
|
| 36 |
+
- Mecanismo de acción de los barbitúricos Facilitan la acción de los canales GABA,
|
| 37 |
+
al aumentar la duración de la apertura del canal de cloro, con lo cual disminuye
|
| 38 |
+
la despolarización
|
| 39 |
+
- Deficiencia selectiva de IgA Inmunodeficiencia congénita más común, en la cual
|
| 40 |
+
los pacientes presentan infecciones sinopulmonares y gastrointestinales. El tratamiento
|
| 41 |
+
es con antibióticos y la administración de inmunoglobulinas se contraindica.
|
| 42 |
+
- source_sentence: Bloqueo de los receptores a serotonina 5-HT2, alfa adrenérgicos,
|
| 43 |
+
H1 histaminérgicos, y dopaminérgicos
|
| 44 |
+
sentences:
|
| 45 |
+
- Etiología de la anemia sideroblástica • Causas hereditarias. Éstas ocurren ya
|
| 46 |
+
sea por un defecto en la sintasa de ácido aminolevulínico o una anormalidad en
|
| 47 |
+
el metabolismo de la vitamina B6. • Causas adquiridas. Ocurren por fármacos como
|
| 48 |
+
el cloranfenicol, la isoniazida y el alcohol. La intoxicación por hierro puede
|
| 49 |
+
ocasionar anemia sideroblástica, así como los síndromes mielodisplásicos y la
|
| 50 |
+
anemia refractaria (estos dos últimos puede progresar a leucemia mieloide aguda
|
| 51 |
+
en un pequeño porcentaje de paciente
|
| 52 |
+
- Tipo de fractura craneal más frecuente ocasionada durante el trabajo de parto
|
| 53 |
+
Fractura craneal lineal
|
| 54 |
+
- Mecanismo de acción de los antipsicóticos atípicos Bloqueo de los receptores a
|
| 55 |
+
serotonina 5-HT2, alfa adrenérgicos, H1 histaminérgicos, y dopaminérgicos. Incluyen
|
| 56 |
+
clozapina, olanzapina, risperidona, aripiprazol, quetiapina, ziprasidona.
|
| 57 |
+
- source_sentence: 'Pediatría: ¿Cómo son las crisis convulsivas febriles?'
|
| 58 |
+
sentences:
|
| 59 |
+
- Utilidad del cultivo de citomegalovirus de muestra de orina en caso de sospecha
|
| 60 |
+
de infección congénita Si es negativo se excluye la posibilidad de infección congénita
|
| 61 |
+
por citomegalovirus
|
| 62 |
+
- ¿Qué es la displasia broncopulmonar (DBP)? Es una enfermedad pulmonar crónica
|
| 63 |
+
caracterizada por dependencia de oxígeno por un periodo mayor a 28 días. Se produce
|
| 64 |
+
como consecuencia de la exposición del pulmón inmaduro del prematuro a noxas ambientales
|
| 65 |
+
( oxígeno, infecciones, barotrauma, volutrauma).
|
| 66 |
+
- ¿Cómo son las crisis convulsivas febriles? Pueden ser simples (90% de los casos)
|
| 67 |
+
con convulsiones generalizadas mayormente clónicas, de duración menor a 15 minutos,
|
| 68 |
+
simétricas y ocurren en pacientes dentro del grupo de edad de riesgo, sin recurrencia
|
| 69 |
+
en 24 h. También pueden ser complejas o atípicas (menos de 10%), las cuales son
|
| 70 |
+
focales o generalizadas, de duración mayor a quince minutos, múltiples o recurrentes
|
| 71 |
+
en 24 h.
|
| 72 |
+
- source_sentence: Complicaciones de la anorexia nerviosa
|
| 73 |
+
sentences:
|
| 74 |
+
- Complicaciones de la anorexia nerviosa • Prolapso de la válvula mitral. • Arritmias. •
|
| 75 |
+
Hipotensión. • Bradicardia. • Amenorrea (ausencia de menstruación en tres ciclos
|
| 76 |
+
consecutivos). • Nefrolitiasis. • Osteoporosis. • Fracturas múltiples por estrés. •
|
| 77 |
+
Pancitopenia. • Anomalías tiroideas. • Mortalidad por complicaciones o suicidio
|
| 78 |
+
>10%.
|
| 79 |
+
- Hallazgo clásico del Pityrosporum orbiculare (pitiriasis versicolor) en la preparación
|
| 80 |
+
de KOH Espagueti a la boloñesa (hifas + esporas)
|
| 81 |
+
- Indicación de cateterismo en pacientes con angina inestable o infarto miocárdico
|
| 82 |
+
sin elevación del segmento ST Pacientes con puntal TIMI ≥ 3 Pacientes con dolor
|
| 83 |
+
torácico refractario a tratamiento Pacientes con elevación de troponinas Pacientes
|
| 84 |
+
con depresión del segmento ST > 1 mm
|
| 85 |
+
pipeline_tag: sentence-similarity
|
| 86 |
+
library_name: sentence-transformers
|
| 87 |
+
metrics:
|
| 88 |
+
- cosine_accuracy@1
|
| 89 |
+
- cosine_accuracy@3
|
| 90 |
+
- cosine_accuracy@5
|
| 91 |
+
- cosine_accuracy@10
|
| 92 |
+
- cosine_precision@1
|
| 93 |
+
- cosine_precision@3
|
| 94 |
+
- cosine_precision@5
|
| 95 |
+
- cosine_precision@10
|
| 96 |
+
- cosine_recall@1
|
| 97 |
+
- cosine_recall@3
|
| 98 |
+
- cosine_recall@5
|
| 99 |
+
- cosine_recall@10
|
| 100 |
+
- cosine_ndcg@10
|
| 101 |
+
- cosine_mrr@10
|
| 102 |
+
- cosine_map@100
|
| 103 |
+
model-index:
|
| 104 |
+
- name: SentenceTransformer based on sentence-transformers/all-mpnet-base-v2
|
| 105 |
+
results:
|
| 106 |
+
- task:
|
| 107 |
+
type: information-retrieval
|
| 108 |
+
name: Information Retrieval
|
| 109 |
+
dataset:
|
| 110 |
+
name: enarm ir
|
| 111 |
+
type: enarm-ir
|
| 112 |
+
metrics:
|
| 113 |
+
- type: cosine_accuracy@1
|
| 114 |
+
value: 0.9112244897959184
|
| 115 |
+
name: Cosine Accuracy@1
|
| 116 |
+
- type: cosine_accuracy@3
|
| 117 |
+
value: 0.999234693877551
|
| 118 |
+
name: Cosine Accuracy@3
|
| 119 |
+
- type: cosine_accuracy@5
|
| 120 |
+
value: 1.0
|
| 121 |
+
name: Cosine Accuracy@5
|
| 122 |
+
- type: cosine_accuracy@10
|
| 123 |
+
value: 1.0
|
| 124 |
+
name: Cosine Accuracy@10
|
| 125 |
+
- type: cosine_precision@1
|
| 126 |
+
value: 0.9112244897959184
|
| 127 |
+
name: Cosine Precision@1
|
| 128 |
+
- type: cosine_precision@3
|
| 129 |
+
value: 0.33307823129251696
|
| 130 |
+
name: Cosine Precision@3
|
| 131 |
+
- type: cosine_precision@5
|
| 132 |
+
value: 0.20000000000000007
|
| 133 |
+
name: Cosine Precision@5
|
| 134 |
+
- type: cosine_precision@10
|
| 135 |
+
value: 0.10000000000000003
|
| 136 |
+
name: Cosine Precision@10
|
| 137 |
+
- type: cosine_recall@1
|
| 138 |
+
value: 0.9112244897959184
|
| 139 |
+
name: Cosine Recall@1
|
| 140 |
+
- type: cosine_recall@3
|
| 141 |
+
value: 0.999234693877551
|
| 142 |
+
name: Cosine Recall@3
|
| 143 |
+
- type: cosine_recall@5
|
| 144 |
+
value: 1.0
|
| 145 |
+
name: Cosine Recall@5
|
| 146 |
+
- type: cosine_recall@10
|
| 147 |
+
value: 1.0
|
| 148 |
+
name: Cosine Recall@10
|
| 149 |
+
- type: cosine_ndcg@10
|
| 150 |
+
value: 0.9666147393128501
|
| 151 |
+
name: Cosine Ndcg@10
|
| 152 |
+
- type: cosine_mrr@10
|
| 153 |
+
value: 0.954825680272109
|
| 154 |
+
name: Cosine Mrr@10
|
| 155 |
+
- type: cosine_map@100
|
| 156 |
+
value: 0.9548256802721089
|
| 157 |
+
name: Cosine Map@100
|
| 158 |
+
---
|
| 159 |
+
|
| 160 |
+
# SentenceTransformer based on sentence-transformers/all-mpnet-base-v2
|
| 161 |
+
|
| 162 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) on the json dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
| 163 |
+
|
| 164 |
+
## Model Details
|
| 165 |
+
|
| 166 |
+
### Model Description
|
| 167 |
+
- **Model Type:** Sentence Transformer
|
| 168 |
+
- **Base model:** [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) <!-- at revision e8c3b32edf5434bc2275fc9bab85f82640a19130 -->
|
| 169 |
+
- **Maximum Sequence Length:** 384 tokens
|
| 170 |
+
- **Output Dimensionality:** 768 dimensions
|
| 171 |
+
- **Similarity Function:** Cosine Similarity
|
| 172 |
+
- **Training Dataset:**
|
| 173 |
+
- json
|
| 174 |
+
<!-- - **Language:** Unknown -->
|
| 175 |
+
<!-- - **License:** Unknown -->
|
| 176 |
+
|
| 177 |
+
### Model Sources
|
| 178 |
+
|
| 179 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
| 180 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
| 181 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
| 182 |
+
|
| 183 |
+
### Full Model Architecture
|
| 184 |
+
|
| 185 |
+
```
|
| 186 |
+
SentenceTransformer(
|
| 187 |
+
(0): Transformer({'max_seq_length': 384, 'do_lower_case': False, 'architecture': 'MPNetModel'})
|
| 188 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
| 189 |
+
(2): Normalize()
|
| 190 |
+
)
|
| 191 |
+
```
|
| 192 |
+
|
| 193 |
+
## Usage
|
| 194 |
+
|
| 195 |
+
### Direct Usage (Sentence Transformers)
|
| 196 |
+
|
| 197 |
+
First install the Sentence Transformers library:
|
| 198 |
+
|
| 199 |
+
```bash
|
| 200 |
+
pip install -U sentence-transformers
|
| 201 |
+
```
|
| 202 |
+
|
| 203 |
+
Then you can load this model and run inference.
|
| 204 |
+
```python
|
| 205 |
+
from sentence_transformers import SentenceTransformer
|
| 206 |
+
|
| 207 |
+
# Download from the 🤗 Hub
|
| 208 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
| 209 |
+
# Run inference
|
| 210 |
+
sentences = [
|
| 211 |
+
'Complicaciones de la anorexia nerviosa',
|
| 212 |
+
'Complicaciones de la anorexia nerviosa • Prolapso de la válvula mitral. • Arritmias. • Hipotensión. • Bradicardia. • Amenorrea (ausencia de menstruación en tres ciclos consecutivos). • Nefrolitiasis. • Osteoporosis. • Fracturas múltiples por estrés. • Pancitopenia. • Anomalías tiroideas. • Mortalidad por complicaciones o suicidio >10%.',
|
| 213 |
+
'Indicación de cateterismo en pacientes con angina inestable o infarto miocárdico sin elevación del segmento ST Pacientes con puntal TIMI ≥ 3 Pacientes con dolor torácico refractario a tratamiento Pacientes con elevación de troponinas Pacientes con depresión del segmento ST > 1 mm',
|
| 214 |
+
]
|
| 215 |
+
embeddings = model.encode(sentences)
|
| 216 |
+
print(embeddings.shape)
|
| 217 |
+
# [3, 768]
|
| 218 |
+
|
| 219 |
+
# Get the similarity scores for the embeddings
|
| 220 |
+
similarities = model.similarity(embeddings, embeddings)
|
| 221 |
+
print(similarities)
|
| 222 |
+
# tensor([[1.0000, 0.7534, 0.0072],
|
| 223 |
+
# [0.7534, 1.0000, 0.0091],
|
| 224 |
+
# [0.0072, 0.0091, 1.0000]])
|
| 225 |
+
```
|
| 226 |
+
|
| 227 |
+
<!--
|
| 228 |
+
### Direct Usage (Transformers)
|
| 229 |
+
|
| 230 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
| 231 |
+
|
| 232 |
+
</details>
|
| 233 |
+
-->
|
| 234 |
+
|
| 235 |
+
<!--
|
| 236 |
+
### Downstream Usage (Sentence Transformers)
|
| 237 |
+
|
| 238 |
+
You can finetune this model on your own dataset.
|
| 239 |
+
|
| 240 |
+
<details><summary>Click to expand</summary>
|
| 241 |
+
|
| 242 |
+
</details>
|
| 243 |
+
-->
|
| 244 |
+
|
| 245 |
+
<!--
|
| 246 |
+
### Out-of-Scope Use
|
| 247 |
+
|
| 248 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
| 249 |
+
-->
|
| 250 |
+
|
| 251 |
+
## Evaluation
|
| 252 |
+
|
| 253 |
+
### Metrics
|
| 254 |
+
|
| 255 |
+
#### Information Retrieval
|
| 256 |
+
|
| 257 |
+
* Dataset: `enarm-ir`
|
| 258 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
|
| 259 |
+
|
| 260 |
+
| Metric | Value |
|
| 261 |
+
|:--------------------|:-----------|
|
| 262 |
+
| cosine_accuracy@1 | 0.9112 |
|
| 263 |
+
| cosine_accuracy@3 | 0.9992 |
|
| 264 |
+
| cosine_accuracy@5 | 1.0 |
|
| 265 |
+
| cosine_accuracy@10 | 1.0 |
|
| 266 |
+
| cosine_precision@1 | 0.9112 |
|
| 267 |
+
| cosine_precision@3 | 0.3331 |
|
| 268 |
+
| cosine_precision@5 | 0.2 |
|
| 269 |
+
| cosine_precision@10 | 0.1 |
|
| 270 |
+
| cosine_recall@1 | 0.9112 |
|
| 271 |
+
| cosine_recall@3 | 0.9992 |
|
| 272 |
+
| cosine_recall@5 | 1.0 |
|
| 273 |
+
| cosine_recall@10 | 1.0 |
|
| 274 |
+
| **cosine_ndcg@10** | **0.9666** |
|
| 275 |
+
| cosine_mrr@10 | 0.9548 |
|
| 276 |
+
| cosine_map@100 | 0.9548 |
|
| 277 |
+
|
| 278 |
+
<!--
|
| 279 |
+
## Bias, Risks and Limitations
|
| 280 |
+
|
| 281 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
| 282 |
+
-->
|
| 283 |
+
|
| 284 |
+
<!--
|
| 285 |
+
### Recommendations
|
| 286 |
+
|
| 287 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
| 288 |
+
-->
|
| 289 |
+
|
| 290 |
+
## Training Details
|
| 291 |
+
|
| 292 |
+
### Training Dataset
|
| 293 |
+
|
| 294 |
+
#### json
|
| 295 |
+
|
| 296 |
+
* Dataset: json
|
| 297 |
+
* Size: 35,280 training samples
|
| 298 |
+
* Columns: <code>anchor</code> and <code>positive</code>
|
| 299 |
+
* Approximate statistics based on the first 1000 samples:
|
| 300 |
+
| | anchor | positive |
|
| 301 |
+
|:--------|:-----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
|
| 302 |
+
| type | string | string |
|
| 303 |
+
| details | <ul><li>min: 5 tokens</li><li>mean: 30.93 tokens</li><li>max: 150 tokens</li></ul> | <ul><li>min: 16 tokens</li><li>mean: 93.86 tokens</li><li>max: 197 tokens</li></ul> |
|
| 304 |
+
* Samples:
|
| 305 |
+
| anchor | positive |
|
| 306 |
+
|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 307 |
+
| <code>• Principalmente por deficiencia dietética (p</code> | <code>Etiología de deficiencia de ácido fólico • Principalmente por deficiencia dietética (p.ej., en vegetarianos y alcohólicos) • Embarazo • Pérdidas cutáneas (p. ej., eczema o psoriasis) • Incremento de las pérdidas por diálisis • Anticonvulsivantes (fenitoína)</code> |
|
| 308 |
+
| <code>46,XX (en algunos casos 46,XY), lo cual ocurre por fertilización de un óvulo vacío por dos espermatozoides</code> | <code>Mola completa, cariotipo 46,XX (en algunos casos 46,XY), lo cual ocurre por fertilización de un óvulo vacío por dos espermatozoides. Por otro lado, la mola parcial tiene un cariotipo 69,XXY, lo cual ocurre por la fertilización de un óvulo por dos espermatozoides.</code> |
|
| 309 |
+
| <code>La enfermedad tiene dos picos de edad de los 20 a los 40 años y de los 60 a 80 años de edad, con mayor frecuencia en la etapa productiva y con una edad promedio de presentación a los 30 años</code> | <code>¿Cuál es la edad de presentación de la EII y la población más afectada? La enfermedad tiene dos picos de edad de los 20 a los 40 años y de los 60 a 80 años de edad, con mayor frecuencia en la etapa productiva y con una edad promedio de presentación a los 30 años. Se presenta más a menudo en países industrializados y zonas urbanas, con mayor prevalencia e incidencia en los países del norte de América y Europa; sin embargo, los judíos ashkenazí son los más afectados.</code> |
|
| 310 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
| 311 |
+
```json
|
| 312 |
+
{
|
| 313 |
+
"scale": 20.0,
|
| 314 |
+
"similarity_fct": "cos_sim",
|
| 315 |
+
"gather_across_devices": false
|
| 316 |
+
}
|
| 317 |
+
```
|
| 318 |
+
|
| 319 |
+
### Evaluation Dataset
|
| 320 |
+
|
| 321 |
+
#### json
|
| 322 |
+
|
| 323 |
+
* Dataset: json
|
| 324 |
+
* Size: 3,920 evaluation samples
|
| 325 |
+
* Columns: <code>anchor</code> and <code>positive</code>
|
| 326 |
+
* Approximate statistics based on the first 1000 samples:
|
| 327 |
+
| | anchor | positive |
|
| 328 |
+
|:--------|:-----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
|
| 329 |
+
| type | string | string |
|
| 330 |
+
| details | <ul><li>min: 5 tokens</li><li>mean: 29.98 tokens</li><li>max: 384 tokens</li></ul> | <ul><li>min: 16 tokens</li><li>mean: 89.54 tokens</li><li>max: 197 tokens</li></ul> |
|
| 331 |
+
* Samples:
|
| 332 |
+
| anchor | positive |
|
| 333 |
+
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 334 |
+
| <code>Focal (afecta a un único músculo o grupo muscular): blefaroespasmo, distonía oromandibular, distonía espasmódica o distonía, tortícolis o distonia cervical y espasmo del escribiente</code> | <code>¿Cuáles son los tipos de distonías? Focal (afecta a un único músculo o grupo muscular): blefaroespasmo, distonía oromandibular, distonía espasmódica o distonía, tortícolis o distonia cervical y espasmo del escribiente.Segmentaria (afecta a varios músculos de dos o más regiones contiguas): craneal (musculatura craneal y cervical y síndrome de Meige: blefaroespasmo con distonía oromandibular), braquiocervical (afectación de la musculatura de cuello y miembros superiores), axial (musculatura de cuello y tronco</code> |
|
| 335 |
+
| <code>¿Cuál es el diagnóstico diferencial de la cefalea en racimos?</code> | <code>¿Cuál es el diagnóstico diferencial de la cefalea en racimos? Incluye cefaleas primarias, como la cefalea hemicránea paroxística benigna y la neuralgia del trigérnino, al igual que cefaleas secundarias, como el sindrome de Tolosa-Hunt y la cefalea por arteritis temporal.</code> |
|
| 336 |
+
| <code>Tratamiento general de la dermatitis atópica</code> | <code>Tratamiento general de la dermatitis atópica Esteroides tópicos (no sistémicos), terapia PUVA, inmunomoduladores.</code> |
|
| 337 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
| 338 |
+
```json
|
| 339 |
+
{
|
| 340 |
+
"scale": 20.0,
|
| 341 |
+
"similarity_fct": "cos_sim",
|
| 342 |
+
"gather_across_devices": false
|
| 343 |
+
}
|
| 344 |
+
```
|
| 345 |
+
|
| 346 |
+
### Training Hyperparameters
|
| 347 |
+
#### Non-Default Hyperparameters
|
| 348 |
+
|
| 349 |
+
- `eval_strategy`: steps
|
| 350 |
+
- `per_device_train_batch_size`: 32
|
| 351 |
+
- `per_device_eval_batch_size`: 32
|
| 352 |
+
- `learning_rate`: 2e-05
|
| 353 |
+
- `num_train_epochs`: 2
|
| 354 |
+
- `warmup_ratio`: 0.1
|
| 355 |
+
- `fp16`: True
|
| 356 |
+
- `load_best_model_at_end`: True
|
| 357 |
+
|
| 358 |
+
#### All Hyperparameters
|
| 359 |
+
<details><summary>Click to expand</summary>
|
| 360 |
+
|
| 361 |
+
- `overwrite_output_dir`: False
|
| 362 |
+
- `do_predict`: False
|
| 363 |
+
- `eval_strategy`: steps
|
| 364 |
+
- `prediction_loss_only`: True
|
| 365 |
+
- `per_device_train_batch_size`: 32
|
| 366 |
+
- `per_device_eval_batch_size`: 32
|
| 367 |
+
- `per_gpu_train_batch_size`: None
|
| 368 |
+
- `per_gpu_eval_batch_size`: None
|
| 369 |
+
- `gradient_accumulation_steps`: 1
|
| 370 |
+
- `eval_accumulation_steps`: None
|
| 371 |
+
- `torch_empty_cache_steps`: None
|
| 372 |
+
- `learning_rate`: 2e-05
|
| 373 |
+
- `weight_decay`: 0.0
|
| 374 |
+
- `adam_beta1`: 0.9
|
| 375 |
+
- `adam_beta2`: 0.999
|
| 376 |
+
- `adam_epsilon`: 1e-08
|
| 377 |
+
- `max_grad_norm`: 1.0
|
| 378 |
+
- `num_train_epochs`: 2
|
| 379 |
+
- `max_steps`: -1
|
| 380 |
+
- `lr_scheduler_type`: linear
|
| 381 |
+
- `lr_scheduler_kwargs`: {}
|
| 382 |
+
- `warmup_ratio`: 0.1
|
| 383 |
+
- `warmup_steps`: 0
|
| 384 |
+
- `log_level`: passive
|
| 385 |
+
- `log_level_replica`: warning
|
| 386 |
+
- `log_on_each_node`: True
|
| 387 |
+
- `logging_nan_inf_filter`: True
|
| 388 |
+
- `save_safetensors`: True
|
| 389 |
+
- `save_on_each_node`: False
|
| 390 |
+
- `save_only_model`: False
|
| 391 |
+
- `restore_callback_states_from_checkpoint`: False
|
| 392 |
+
- `no_cuda`: False
|
| 393 |
+
- `use_cpu`: False
|
| 394 |
+
- `use_mps_device`: False
|
| 395 |
+
- `seed`: 42
|
| 396 |
+
- `data_seed`: None
|
| 397 |
+
- `jit_mode_eval`: False
|
| 398 |
+
- `bf16`: False
|
| 399 |
+
- `fp16`: True
|
| 400 |
+
- `fp16_opt_level`: O1
|
| 401 |
+
- `half_precision_backend`: auto
|
| 402 |
+
- `bf16_full_eval`: False
|
| 403 |
+
- `fp16_full_eval`: False
|
| 404 |
+
- `tf32`: None
|
| 405 |
+
- `local_rank`: 0
|
| 406 |
+
- `ddp_backend`: None
|
| 407 |
+
- `tpu_num_cores`: None
|
| 408 |
+
- `tpu_metrics_debug`: False
|
| 409 |
+
- `debug`: []
|
| 410 |
+
- `dataloader_drop_last`: False
|
| 411 |
+
- `dataloader_num_workers`: 0
|
| 412 |
+
- `dataloader_prefetch_factor`: None
|
| 413 |
+
- `past_index`: -1
|
| 414 |
+
- `disable_tqdm`: False
|
| 415 |
+
- `remove_unused_columns`: True
|
| 416 |
+
- `label_names`: None
|
| 417 |
+
- `load_best_model_at_end`: True
|
| 418 |
+
- `ignore_data_skip`: False
|
| 419 |
+
- `fsdp`: []
|
| 420 |
+
- `fsdp_min_num_params`: 0
|
| 421 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
| 422 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
| 423 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
| 424 |
+
- `parallelism_config`: None
|
| 425 |
+
- `deepspeed`: None
|
| 426 |
+
- `label_smoothing_factor`: 0.0
|
| 427 |
+
- `optim`: adamw_torch
|
| 428 |
+
- `optim_args`: None
|
| 429 |
+
- `adafactor`: False
|
| 430 |
+
- `group_by_length`: False
|
| 431 |
+
- `length_column_name`: length
|
| 432 |
+
- `project`: huggingface
|
| 433 |
+
- `trackio_space_id`: trackio
|
| 434 |
+
- `ddp_find_unused_parameters`: None
|
| 435 |
+
- `ddp_bucket_cap_mb`: None
|
| 436 |
+
- `ddp_broadcast_buffers`: False
|
| 437 |
+
- `dataloader_pin_memory`: True
|
| 438 |
+
- `dataloader_persistent_workers`: False
|
| 439 |
+
- `skip_memory_metrics`: True
|
| 440 |
+
- `use_legacy_prediction_loop`: False
|
| 441 |
+
- `push_to_hub`: False
|
| 442 |
+
- `resume_from_checkpoint`: None
|
| 443 |
+
- `hub_model_id`: None
|
| 444 |
+
- `hub_strategy`: every_save
|
| 445 |
+
- `hub_private_repo`: None
|
| 446 |
+
- `hub_always_push`: False
|
| 447 |
+
- `hub_revision`: None
|
| 448 |
+
- `gradient_checkpointing`: False
|
| 449 |
+
- `gradient_checkpointing_kwargs`: None
|
| 450 |
+
- `include_inputs_for_metrics`: False
|
| 451 |
+
- `include_for_metrics`: []
|
| 452 |
+
- `eval_do_concat_batches`: True
|
| 453 |
+
- `fp16_backend`: auto
|
| 454 |
+
- `push_to_hub_model_id`: None
|
| 455 |
+
- `push_to_hub_organization`: None
|
| 456 |
+
- `mp_parameters`:
|
| 457 |
+
- `auto_find_batch_size`: False
|
| 458 |
+
- `full_determinism`: False
|
| 459 |
+
- `torchdynamo`: None
|
| 460 |
+
- `ray_scope`: last
|
| 461 |
+
- `ddp_timeout`: 1800
|
| 462 |
+
- `torch_compile`: False
|
| 463 |
+
- `torch_compile_backend`: None
|
| 464 |
+
- `torch_compile_mode`: None
|
| 465 |
+
- `include_tokens_per_second`: False
|
| 466 |
+
- `include_num_input_tokens_seen`: no
|
| 467 |
+
- `neftune_noise_alpha`: None
|
| 468 |
+
- `optim_target_modules`: None
|
| 469 |
+
- `batch_eval_metrics`: False
|
| 470 |
+
- `eval_on_start`: False
|
| 471 |
+
- `use_liger_kernel`: False
|
| 472 |
+
- `liger_kernel_config`: None
|
| 473 |
+
- `eval_use_gather_object`: False
|
| 474 |
+
- `average_tokens_across_devices`: True
|
| 475 |
+
- `prompts`: None
|
| 476 |
+
- `batch_sampler`: batch_sampler
|
| 477 |
+
- `multi_dataset_batch_sampler`: proportional
|
| 478 |
+
- `router_mapping`: {}
|
| 479 |
+
- `learning_rate_mapping`: {}
|
| 480 |
+
|
| 481 |
+
</details>
|
| 482 |
+
|
| 483 |
+
### Training Logs
|
| 484 |
+
| Epoch | Step | Training Loss | Validation Loss | enarm-ir_cosine_ndcg@10 |
|
| 485 |
+
|:----------:|:--------:|:-------------:|:---------------:|:-----------------------:|
|
| 486 |
+
| 0.0009 | 1 | 0.3939 | - | - |
|
| 487 |
+
| 0.0907 | 100 | 0.1209 | - | - |
|
| 488 |
+
| 0.1813 | 200 | 0.014 | - | - |
|
| 489 |
+
| 0.2720 | 300 | 0.0057 | - | - |
|
| 490 |
+
| 0.3626 | 400 | 0.0047 | - | - |
|
| 491 |
+
| 0.4533 | 500 | 0.0048 | 0.0027 | 0.9629 |
|
| 492 |
+
| 0.5440 | 600 | 0.0038 | - | - |
|
| 493 |
+
| 0.6346 | 700 | 0.0041 | - | - |
|
| 494 |
+
| 0.7253 | 800 | 0.005 | - | - |
|
| 495 |
+
| 0.8160 | 900 | 0.003 | - | - |
|
| 496 |
+
| 0.9066 | 1000 | 0.0019 | 0.0023 | 0.9644 |
|
| 497 |
+
| 0.9973 | 1100 | 0.0021 | - | - |
|
| 498 |
+
| 1.0879 | 1200 | 0.0025 | - | - |
|
| 499 |
+
| 1.1786 | 1300 | 0.0025 | - | - |
|
| 500 |
+
| 1.2693 | 1400 | 0.0025 | - | - |
|
| 501 |
+
| **1.3599** | **1500** | **0.0031** | **0.0021** | **0.9682** |
|
| 502 |
+
| 1.4506 | 1600 | 0.0025 | - | - |
|
| 503 |
+
| 1.5413 | 1700 | 0.0016 | - | - |
|
| 504 |
+
| 1.6319 | 1800 | 0.003 | - | - |
|
| 505 |
+
| 1.7226 | 1900 | 0.0024 | - | - |
|
| 506 |
+
| 1.8132 | 2000 | 0.0024 | 0.0020 | 0.9666 |
|
| 507 |
+
| 1.9039 | 2100 | 0.0037 | - | - |
|
| 508 |
+
| 1.9946 | 2200 | 0.0013 | - | - |
|
| 509 |
+
|
| 510 |
+
* The bold row denotes the saved checkpoint.
|
| 511 |
+
|
| 512 |
+
### Framework Versions
|
| 513 |
+
- Python: 3.11.13
|
| 514 |
+
- Sentence Transformers: 5.1.1
|
| 515 |
+
- Transformers: 4.57.1
|
| 516 |
+
- PyTorch: 2.6.0+cu124
|
| 517 |
+
- Accelerate: 1.11.0
|
| 518 |
+
- Datasets: 4.0.0
|
| 519 |
+
- Tokenizers: 0.22.1
|
| 520 |
+
|
| 521 |
+
## Citation
|
| 522 |
+
|
| 523 |
+
### BibTeX
|
| 524 |
+
|
| 525 |
+
#### Sentence Transformers
|
| 526 |
+
```bibtex
|
| 527 |
+
@inproceedings{reimers-2019-sentence-bert,
|
| 528 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
| 529 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
| 530 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
| 531 |
+
month = "11",
|
| 532 |
+
year = "2019",
|
| 533 |
+
publisher = "Association for Computational Linguistics",
|
| 534 |
+
url = "https://arxiv.org/abs/1908.10084",
|
| 535 |
+
}
|
| 536 |
+
```
|
| 537 |
+
|
| 538 |
+
#### MultipleNegativesRankingLoss
|
| 539 |
+
```bibtex
|
| 540 |
+
@misc{henderson2017efficient,
|
| 541 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
| 542 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
| 543 |
+
year={2017},
|
| 544 |
+
eprint={1705.00652},
|
| 545 |
+
archivePrefix={arXiv},
|
| 546 |
+
primaryClass={cs.CL}
|
| 547 |
+
}
|
| 548 |
+
```
|
| 549 |
+
|
| 550 |
+
<!--
|
| 551 |
+
## Glossary
|
| 552 |
+
|
| 553 |
+
*Clearly define terms in order to be accessible across audiences.*
|
| 554 |
+
-->
|
| 555 |
+
|
| 556 |
+
<!--
|
| 557 |
+
## Model Card Authors
|
| 558 |
+
|
| 559 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
| 560 |
+
-->
|
| 561 |
+
|
| 562 |
+
<!--
|
| 563 |
+
## Model Card Contact
|
| 564 |
+
|
| 565 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
| 566 |
+
-->
|
checkpoint-1500/1_Pooling/config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word_embedding_dimension": 768,
|
| 3 |
+
"pooling_mode_cls_token": false,
|
| 4 |
+
"pooling_mode_mean_tokens": true,
|
| 5 |
+
"pooling_mode_max_tokens": false,
|
| 6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
| 7 |
+
"pooling_mode_weightedmean_tokens": false,
|
| 8 |
+
"pooling_mode_lasttoken": false,
|
| 9 |
+
"include_prompt": true
|
| 10 |
+
}
|
checkpoint-1500/README.md
ADDED
|
@@ -0,0 +1,558 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- sentence-transformers
|
| 4 |
+
- sentence-similarity
|
| 5 |
+
- feature-extraction
|
| 6 |
+
- dense
|
| 7 |
+
- generated_from_trainer
|
| 8 |
+
- dataset_size:35280
|
| 9 |
+
- loss:MultipleNegativesRankingLoss
|
| 10 |
+
base_model: sentence-transformers/all-mpnet-base-v2
|
| 11 |
+
widget:
|
| 12 |
+
- source_sentence: ¿Cuál es la utilidad de la valoración ecográfica del cérvix?
|
| 13 |
+
sentences:
|
| 14 |
+
- Papel del rituximab en pacientes con linfoma no Hodgkin En algunos pacientes con
|
| 15 |
+
linfoma No Hodgkin existe sobreexpresión del antígeno CD20. En estos pacientes
|
| 16 |
+
se deberá utilizar un anticuerpo anti CD20 (rituximab), el cual ha demostrado
|
| 17 |
+
tener toxicidad limitada y mayor supervivencia.
|
| 18 |
+
- '¿Cuál es la característica del material de sutura absorbible de origen sintético?
|
| 19 |
+
En general se absorbe más despacio que el catgut y se hace por hidrólisis lenta
|
| 20 |
+
y fagocitosis, es prácticamente inerte pero sí produce reacción tisular con invasión
|
| 21 |
+
de macrófagos. En términos generales pierde 50% de su fuerza de tensión en menos
|
| 22 |
+
de 25 días. El ácido poliglicólico pierde su fuerza de tensión en 15 días y permanece
|
| 23 |
+
en los tejidos durante alrededor de 80 días; la poliglactina 910 permanece en
|
| 24 |
+
los tejidos durante '
|
| 25 |
+
- ¿Cuál es la utilidad de la valoración ecográfica del cérvix? No está indicada
|
| 26 |
+
en todas las mujeres embarazadas sino con mayor precisión sólo en aquellas con
|
| 27 |
+
mayor riesgo de parto pretérmino o bien aquellas con sospecha de cérvix acortado.
|
| 28 |
+
La medición de la longitud cervical, si es superior a 30 mm, tiene un alto valor
|
| 29 |
+
predictivo negativo. La presencia de fenómeno de embudo o insinuación de membranas
|
| 30 |
+
en el canal cervical se asocia con elevada probabilidad de parto pretérmino.
|
| 31 |
+
- source_sentence: Inmunodeficiencia congénita más común, en la cual los pacientes
|
| 32 |
+
presentan infecciones sinopulmonares y gastrointestinales
|
| 33 |
+
sentences:
|
| 34 |
+
- Causas de miocardiopatía dilatada Alcoholismo, beri beri, cocaína, infección por
|
| 35 |
+
Coxsackie B, enfermedad de Chagas, doxorrubicina, embarazo
|
| 36 |
+
- Mecanismo de acción de los barbitúricos Facilitan la acción de los canales GABA,
|
| 37 |
+
al aumentar la duración de la apertura del canal de cloro, con lo cual disminuye
|
| 38 |
+
la despolarización
|
| 39 |
+
- Deficiencia selectiva de IgA Inmunodeficiencia congénita más común, en la cual
|
| 40 |
+
los pacientes presentan infecciones sinopulmonares y gastrointestinales. El tratamiento
|
| 41 |
+
es con antibióticos y la administración de inmunoglobulinas se contraindica.
|
| 42 |
+
- source_sentence: Bloqueo de los receptores a serotonina 5-HT2, alfa adrenérgicos,
|
| 43 |
+
H1 histaminérgicos, y dopaminérgicos
|
| 44 |
+
sentences:
|
| 45 |
+
- Etiología de la anemia sideroblástica • Causas hereditarias. Éstas ocurren ya
|
| 46 |
+
sea por un defecto en la sintasa de ácido aminolevulínico o una anormalidad en
|
| 47 |
+
el metabolismo de la vitamina B6. • Causas adquiridas. Ocurren por fármacos como
|
| 48 |
+
el cloranfenicol, la isoniazida y el alcohol. La intoxicación por hierro puede
|
| 49 |
+
ocasionar anemia sideroblástica, así como los síndromes mielodisplásicos y la
|
| 50 |
+
anemia refractaria (estos dos últimos puede progresar a leucemia mieloide aguda
|
| 51 |
+
en un pequeño porcentaje de paciente
|
| 52 |
+
- Tipo de fractura craneal más frecuente ocasionada durante el trabajo de parto
|
| 53 |
+
Fractura craneal lineal
|
| 54 |
+
- Mecanismo de acción de los antipsicóticos atípicos Bloqueo de los receptores a
|
| 55 |
+
serotonina 5-HT2, alfa adrenérgicos, H1 histaminérgicos, y dopaminérgicos. Incluyen
|
| 56 |
+
clozapina, olanzapina, risperidona, aripiprazol, quetiapina, ziprasidona.
|
| 57 |
+
- source_sentence: 'Pediatría: ¿Cómo son las crisis convulsivas febriles?'
|
| 58 |
+
sentences:
|
| 59 |
+
- Utilidad del cultivo de citomegalovirus de muestra de orina en caso de sospecha
|
| 60 |
+
de infección congénita Si es negativo se excluye la posibilidad de infección congénita
|
| 61 |
+
por citomegalovirus
|
| 62 |
+
- ¿Qué es la displasia broncopulmonar (DBP)? Es una enfermedad pulmonar crónica
|
| 63 |
+
caracterizada por dependencia de oxígeno por un periodo mayor a 28 días. Se produce
|
| 64 |
+
como consecuencia de la exposición del pulmón inmaduro del prematuro a noxas ambientales
|
| 65 |
+
( oxígeno, infecciones, barotrauma, volutrauma).
|
| 66 |
+
- ¿Cómo son las crisis convulsivas febriles? Pueden ser simples (90% de los casos)
|
| 67 |
+
con convulsiones generalizadas mayormente clónicas, de duración menor a 15 minutos,
|
| 68 |
+
simétricas y ocurren en pacientes dentro del grupo de edad de riesgo, sin recurrencia
|
| 69 |
+
en 24 h. También pueden ser complejas o atípicas (menos de 10%), las cuales son
|
| 70 |
+
focales o generalizadas, de duración mayor a quince minutos, múltiples o recurrentes
|
| 71 |
+
en 24 h.
|
| 72 |
+
- source_sentence: Complicaciones de la anorexia nerviosa
|
| 73 |
+
sentences:
|
| 74 |
+
- Complicaciones de la anorexia nerviosa • Prolapso de la válvula mitral. • Arritmias. •
|
| 75 |
+
Hipotensión. • Bradicardia. • Amenorrea (ausencia de menstruación en tres ciclos
|
| 76 |
+
consecutivos). • Nefrolitiasis. • Osteoporosis. • Fracturas múltiples por estrés. •
|
| 77 |
+
Pancitopenia. • Anomalías tiroideas. • Mortalidad por complicaciones o suicidio
|
| 78 |
+
>10%.
|
| 79 |
+
- Hallazgo clásico del Pityrosporum orbiculare (pitiriasis versicolor) en la preparación
|
| 80 |
+
de KOH Espagueti a la boloñesa (hifas + esporas)
|
| 81 |
+
- Indicación de cateterismo en pacientes con angina inestable o infarto miocárdico
|
| 82 |
+
sin elevación del segmento ST Pacientes con puntal TIMI ≥ 3 Pacientes con dolor
|
| 83 |
+
torácico refractario a tratamiento Pacientes con elevación de troponinas Pacientes
|
| 84 |
+
con depresión del segmento ST > 1 mm
|
| 85 |
+
pipeline_tag: sentence-similarity
|
| 86 |
+
library_name: sentence-transformers
|
| 87 |
+
metrics:
|
| 88 |
+
- cosine_accuracy@1
|
| 89 |
+
- cosine_accuracy@3
|
| 90 |
+
- cosine_accuracy@5
|
| 91 |
+
- cosine_accuracy@10
|
| 92 |
+
- cosine_precision@1
|
| 93 |
+
- cosine_precision@3
|
| 94 |
+
- cosine_precision@5
|
| 95 |
+
- cosine_precision@10
|
| 96 |
+
- cosine_recall@1
|
| 97 |
+
- cosine_recall@3
|
| 98 |
+
- cosine_recall@5
|
| 99 |
+
- cosine_recall@10
|
| 100 |
+
- cosine_ndcg@10
|
| 101 |
+
- cosine_mrr@10
|
| 102 |
+
- cosine_map@100
|
| 103 |
+
model-index:
|
| 104 |
+
- name: SentenceTransformer based on sentence-transformers/all-mpnet-base-v2
|
| 105 |
+
results:
|
| 106 |
+
- task:
|
| 107 |
+
type: information-retrieval
|
| 108 |
+
name: Information Retrieval
|
| 109 |
+
dataset:
|
| 110 |
+
name: enarm ir
|
| 111 |
+
type: enarm-ir
|
| 112 |
+
metrics:
|
| 113 |
+
- type: cosine_accuracy@1
|
| 114 |
+
value: 0.9150510204081632
|
| 115 |
+
name: Cosine Accuracy@1
|
| 116 |
+
- type: cosine_accuracy@3
|
| 117 |
+
value: 0.9994897959183674
|
| 118 |
+
name: Cosine Accuracy@3
|
| 119 |
+
- type: cosine_accuracy@5
|
| 120 |
+
value: 1.0
|
| 121 |
+
name: Cosine Accuracy@5
|
| 122 |
+
- type: cosine_accuracy@10
|
| 123 |
+
value: 1.0
|
| 124 |
+
name: Cosine Accuracy@10
|
| 125 |
+
- type: cosine_precision@1
|
| 126 |
+
value: 0.9150510204081632
|
| 127 |
+
name: Cosine Precision@1
|
| 128 |
+
- type: cosine_precision@3
|
| 129 |
+
value: 0.3331632653061224
|
| 130 |
+
name: Cosine Precision@3
|
| 131 |
+
- type: cosine_precision@5
|
| 132 |
+
value: 0.20000000000000007
|
| 133 |
+
name: Cosine Precision@5
|
| 134 |
+
- type: cosine_precision@10
|
| 135 |
+
value: 0.10000000000000003
|
| 136 |
+
name: Cosine Precision@10
|
| 137 |
+
- type: cosine_recall@1
|
| 138 |
+
value: 0.9150510204081632
|
| 139 |
+
name: Cosine Recall@1
|
| 140 |
+
- type: cosine_recall@3
|
| 141 |
+
value: 0.9994897959183674
|
| 142 |
+
name: Cosine Recall@3
|
| 143 |
+
- type: cosine_recall@5
|
| 144 |
+
value: 1.0
|
| 145 |
+
name: Cosine Recall@5
|
| 146 |
+
- type: cosine_recall@10
|
| 147 |
+
value: 1.0
|
| 148 |
+
name: Cosine Recall@10
|
| 149 |
+
- type: cosine_ndcg@10
|
| 150 |
+
value: 0.9681782842497483
|
| 151 |
+
name: Cosine Ndcg@10
|
| 152 |
+
- type: cosine_mrr@10
|
| 153 |
+
value: 0.9569302721088436
|
| 154 |
+
name: Cosine Mrr@10
|
| 155 |
+
- type: cosine_map@100
|
| 156 |
+
value: 0.9569302721088435
|
| 157 |
+
name: Cosine Map@100
|
| 158 |
+
---
|
| 159 |
+
|
| 160 |
+
# SentenceTransformer based on sentence-transformers/all-mpnet-base-v2
|
| 161 |
+
|
| 162 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) on the json dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
| 163 |
+
|
| 164 |
+
## Model Details
|
| 165 |
+
|
| 166 |
+
### Model Description
|
| 167 |
+
- **Model Type:** Sentence Transformer
|
| 168 |
+
- **Base model:** [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) <!-- at revision e8c3b32edf5434bc2275fc9bab85f82640a19130 -->
|
| 169 |
+
- **Maximum Sequence Length:** 384 tokens
|
| 170 |
+
- **Output Dimensionality:** 768 dimensions
|
| 171 |
+
- **Similarity Function:** Cosine Similarity
|
| 172 |
+
- **Training Dataset:**
|
| 173 |
+
- json
|
| 174 |
+
<!-- - **Language:** Unknown -->
|
| 175 |
+
<!-- - **License:** Unknown -->
|
| 176 |
+
|
| 177 |
+
### Model Sources
|
| 178 |
+
|
| 179 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
| 180 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
| 181 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
| 182 |
+
|
| 183 |
+
### Full Model Architecture
|
| 184 |
+
|
| 185 |
+
```
|
| 186 |
+
SentenceTransformer(
|
| 187 |
+
(0): Transformer({'max_seq_length': 384, 'do_lower_case': False, 'architecture': 'MPNetModel'})
|
| 188 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
| 189 |
+
(2): Normalize()
|
| 190 |
+
)
|
| 191 |
+
```
|
| 192 |
+
|
| 193 |
+
## Usage
|
| 194 |
+
|
| 195 |
+
### Direct Usage (Sentence Transformers)
|
| 196 |
+
|
| 197 |
+
First install the Sentence Transformers library:
|
| 198 |
+
|
| 199 |
+
```bash
|
| 200 |
+
pip install -U sentence-transformers
|
| 201 |
+
```
|
| 202 |
+
|
| 203 |
+
Then you can load this model and run inference.
|
| 204 |
+
```python
|
| 205 |
+
from sentence_transformers import SentenceTransformer
|
| 206 |
+
|
| 207 |
+
# Download from the 🤗 Hub
|
| 208 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
| 209 |
+
# Run inference
|
| 210 |
+
sentences = [
|
| 211 |
+
'Complicaciones de la anorexia nerviosa',
|
| 212 |
+
'Complicaciones de la anorexia nerviosa • Prolapso de la válvula mitral. • Arritmias. • Hipotensión. • Bradicardia. • Amenorrea (ausencia de menstruación en tres ciclos consecutivos). • Nefrolitiasis. • Osteoporosis. • Fracturas múltiples por estrés. • Pancitopenia. • Anomalías tiroideas. • Mortalidad por complicaciones o suicidio >10%.',
|
| 213 |
+
'Indicación de cateterismo en pacientes con angina inestable o infarto miocárdico sin elevación del segmento ST Pacientes con puntal TIMI ≥ 3 Pacientes con dolor torácico refractario a tratamiento Pacientes con elevación de troponinas Pacientes con depresión del segmento ST > 1 mm',
|
| 214 |
+
]
|
| 215 |
+
embeddings = model.encode(sentences)
|
| 216 |
+
print(embeddings.shape)
|
| 217 |
+
# [3, 768]
|
| 218 |
+
|
| 219 |
+
# Get the similarity scores for the embeddings
|
| 220 |
+
similarities = model.similarity(embeddings, embeddings)
|
| 221 |
+
print(similarities)
|
| 222 |
+
# tensor([[1.0000, 0.7534, 0.0072],
|
| 223 |
+
# [0.7534, 1.0000, 0.0091],
|
| 224 |
+
# [0.0072, 0.0091, 1.0000]])
|
| 225 |
+
```
|
| 226 |
+
|
| 227 |
+
<!--
|
| 228 |
+
### Direct Usage (Transformers)
|
| 229 |
+
|
| 230 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
| 231 |
+
|
| 232 |
+
</details>
|
| 233 |
+
-->
|
| 234 |
+
|
| 235 |
+
<!--
|
| 236 |
+
### Downstream Usage (Sentence Transformers)
|
| 237 |
+
|
| 238 |
+
You can finetune this model on your own dataset.
|
| 239 |
+
|
| 240 |
+
<details><summary>Click to expand</summary>
|
| 241 |
+
|
| 242 |
+
</details>
|
| 243 |
+
-->
|
| 244 |
+
|
| 245 |
+
<!--
|
| 246 |
+
### Out-of-Scope Use
|
| 247 |
+
|
| 248 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
| 249 |
+
-->
|
| 250 |
+
|
| 251 |
+
## Evaluation
|
| 252 |
+
|
| 253 |
+
### Metrics
|
| 254 |
+
|
| 255 |
+
#### Information Retrieval
|
| 256 |
+
|
| 257 |
+
* Dataset: `enarm-ir`
|
| 258 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
|
| 259 |
+
|
| 260 |
+
| Metric | Value |
|
| 261 |
+
|:--------------------|:-----------|
|
| 262 |
+
| cosine_accuracy@1 | 0.9151 |
|
| 263 |
+
| cosine_accuracy@3 | 0.9995 |
|
| 264 |
+
| cosine_accuracy@5 | 1.0 |
|
| 265 |
+
| cosine_accuracy@10 | 1.0 |
|
| 266 |
+
| cosine_precision@1 | 0.9151 |
|
| 267 |
+
| cosine_precision@3 | 0.3332 |
|
| 268 |
+
| cosine_precision@5 | 0.2 |
|
| 269 |
+
| cosine_precision@10 | 0.1 |
|
| 270 |
+
| cosine_recall@1 | 0.9151 |
|
| 271 |
+
| cosine_recall@3 | 0.9995 |
|
| 272 |
+
| cosine_recall@5 | 1.0 |
|
| 273 |
+
| cosine_recall@10 | 1.0 |
|
| 274 |
+
| **cosine_ndcg@10** | **0.9682** |
|
| 275 |
+
| cosine_mrr@10 | 0.9569 |
|
| 276 |
+
| cosine_map@100 | 0.9569 |
|
| 277 |
+
|
| 278 |
+
<!--
|
| 279 |
+
## Bias, Risks and Limitations
|
| 280 |
+
|
| 281 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
| 282 |
+
-->
|
| 283 |
+
|
| 284 |
+
<!--
|
| 285 |
+
### Recommendations
|
| 286 |
+
|
| 287 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
| 288 |
+
-->
|
| 289 |
+
|
| 290 |
+
## Training Details
|
| 291 |
+
|
| 292 |
+
### Training Dataset
|
| 293 |
+
|
| 294 |
+
#### json
|
| 295 |
+
|
| 296 |
+
* Dataset: json
|
| 297 |
+
* Size: 35,280 training samples
|
| 298 |
+
* Columns: <code>anchor</code> and <code>positive</code>
|
| 299 |
+
* Approximate statistics based on the first 1000 samples:
|
| 300 |
+
| | anchor | positive |
|
| 301 |
+
|:--------|:-----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
|
| 302 |
+
| type | string | string |
|
| 303 |
+
| details | <ul><li>min: 5 tokens</li><li>mean: 30.93 tokens</li><li>max: 150 tokens</li></ul> | <ul><li>min: 16 tokens</li><li>mean: 93.86 tokens</li><li>max: 197 tokens</li></ul> |
|
| 304 |
+
* Samples:
|
| 305 |
+
| anchor | positive |
|
| 306 |
+
|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 307 |
+
| <code>• Principalmente por deficiencia dietética (p</code> | <code>Etiología de deficiencia de ácido fólico • Principalmente por deficiencia dietética (p.ej., en vegetarianos y alcohólicos) • Embarazo • Pérdidas cutáneas (p. ej., eczema o psoriasis) • Incremento de las pérdidas por diálisis • Anticonvulsivantes (fenitoína)</code> |
|
| 308 |
+
| <code>46,XX (en algunos casos 46,XY), lo cual ocurre por fertilización de un óvulo vacío por dos espermatozoides</code> | <code>Mola completa, cariotipo 46,XX (en algunos casos 46,XY), lo cual ocurre por fertilización de un óvulo vacío por dos espermatozoides. Por otro lado, la mola parcial tiene un cariotipo 69,XXY, lo cual ocurre por la fertilización de un óvulo por dos espermatozoides.</code> |
|
| 309 |
+
| <code>La enfermedad tiene dos picos de edad de los 20 a los 40 años y de los 60 a 80 años de edad, con mayor frecuencia en la etapa productiva y con una edad promedio de presentación a los 30 años</code> | <code>¿Cuál es la edad de presentación de la EII y la población más afectada? La enfermedad tiene dos picos de edad de los 20 a los 40 años y de los 60 a 80 años de edad, con mayor frecuencia en la etapa productiva y con una edad promedio de presentación a los 30 años. Se presenta más a menudo en países industrializados y zonas urbanas, con mayor prevalencia e incidencia en los países del norte de América y Europa; sin embargo, los judíos ashkenazí son los más afectados.</code> |
|
| 310 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
| 311 |
+
```json
|
| 312 |
+
{
|
| 313 |
+
"scale": 20.0,
|
| 314 |
+
"similarity_fct": "cos_sim",
|
| 315 |
+
"gather_across_devices": false
|
| 316 |
+
}
|
| 317 |
+
```
|
| 318 |
+
|
| 319 |
+
### Evaluation Dataset
|
| 320 |
+
|
| 321 |
+
#### json
|
| 322 |
+
|
| 323 |
+
* Dataset: json
|
| 324 |
+
* Size: 3,920 evaluation samples
|
| 325 |
+
* Columns: <code>anchor</code> and <code>positive</code>
|
| 326 |
+
* Approximate statistics based on the first 1000 samples:
|
| 327 |
+
| | anchor | positive |
|
| 328 |
+
|:--------|:-----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
|
| 329 |
+
| type | string | string |
|
| 330 |
+
| details | <ul><li>min: 5 tokens</li><li>mean: 29.98 tokens</li><li>max: 384 tokens</li></ul> | <ul><li>min: 16 tokens</li><li>mean: 89.54 tokens</li><li>max: 197 tokens</li></ul> |
|
| 331 |
+
* Samples:
|
| 332 |
+
| anchor | positive |
|
| 333 |
+
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 334 |
+
| <code>Focal (afecta a un único músculo o grupo muscular): blefaroespasmo, distonía oromandibular, distonía espasmódica o distonía, tortícolis o distonia cervical y espasmo del escribiente</code> | <code>¿Cuáles son los tipos de distonías? Focal (afecta a un único músculo o grupo muscular): blefaroespasmo, distonía oromandibular, distonía espasmódica o distonía, tortícolis o distonia cervical y espasmo del escribiente.Segmentaria (afecta a varios músculos de dos o más regiones contiguas): craneal (musculatura craneal y cervical y síndrome de Meige: blefaroespasmo con distonía oromandibular), braquiocervical (afectación de la musculatura de cuello y miembros superiores), axial (musculatura de cuello y tronco</code> |
|
| 335 |
+
| <code>¿Cuál es el diagnóstico diferencial de la cefalea en racimos?</code> | <code>¿Cuál es el diagnóstico diferencial de la cefalea en racimos? Incluye cefaleas primarias, como la cefalea hemicránea paroxística benigna y la neuralgia del trigérnino, al igual que cefaleas secundarias, como el sindrome de Tolosa-Hunt y la cefalea por arteritis temporal.</code> |
|
| 336 |
+
| <code>Tratamiento general de la dermatitis atópica</code> | <code>Tratamiento general de la dermatitis atópica Esteroides tópicos (no sistémicos), terapia PUVA, inmunomoduladores.</code> |
|
| 337 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
| 338 |
+
```json
|
| 339 |
+
{
|
| 340 |
+
"scale": 20.0,
|
| 341 |
+
"similarity_fct": "cos_sim",
|
| 342 |
+
"gather_across_devices": false
|
| 343 |
+
}
|
| 344 |
+
```
|
| 345 |
+
|
| 346 |
+
### Training Hyperparameters
|
| 347 |
+
#### Non-Default Hyperparameters
|
| 348 |
+
|
| 349 |
+
- `eval_strategy`: steps
|
| 350 |
+
- `per_device_train_batch_size`: 32
|
| 351 |
+
- `per_device_eval_batch_size`: 32
|
| 352 |
+
- `learning_rate`: 2e-05
|
| 353 |
+
- `num_train_epochs`: 2
|
| 354 |
+
- `warmup_ratio`: 0.1
|
| 355 |
+
- `fp16`: True
|
| 356 |
+
- `load_best_model_at_end`: True
|
| 357 |
+
|
| 358 |
+
#### All Hyperparameters
|
| 359 |
+
<details><summary>Click to expand</summary>
|
| 360 |
+
|
| 361 |
+
- `overwrite_output_dir`: False
|
| 362 |
+
- `do_predict`: False
|
| 363 |
+
- `eval_strategy`: steps
|
| 364 |
+
- `prediction_loss_only`: True
|
| 365 |
+
- `per_device_train_batch_size`: 32
|
| 366 |
+
- `per_device_eval_batch_size`: 32
|
| 367 |
+
- `per_gpu_train_batch_size`: None
|
| 368 |
+
- `per_gpu_eval_batch_size`: None
|
| 369 |
+
- `gradient_accumulation_steps`: 1
|
| 370 |
+
- `eval_accumulation_steps`: None
|
| 371 |
+
- `torch_empty_cache_steps`: None
|
| 372 |
+
- `learning_rate`: 2e-05
|
| 373 |
+
- `weight_decay`: 0.0
|
| 374 |
+
- `adam_beta1`: 0.9
|
| 375 |
+
- `adam_beta2`: 0.999
|
| 376 |
+
- `adam_epsilon`: 1e-08
|
| 377 |
+
- `max_grad_norm`: 1.0
|
| 378 |
+
- `num_train_epochs`: 2
|
| 379 |
+
- `max_steps`: -1
|
| 380 |
+
- `lr_scheduler_type`: linear
|
| 381 |
+
- `lr_scheduler_kwargs`: {}
|
| 382 |
+
- `warmup_ratio`: 0.1
|
| 383 |
+
- `warmup_steps`: 0
|
| 384 |
+
- `log_level`: passive
|
| 385 |
+
- `log_level_replica`: warning
|
| 386 |
+
- `log_on_each_node`: True
|
| 387 |
+
- `logging_nan_inf_filter`: True
|
| 388 |
+
- `save_safetensors`: True
|
| 389 |
+
- `save_on_each_node`: False
|
| 390 |
+
- `save_only_model`: False
|
| 391 |
+
- `restore_callback_states_from_checkpoint`: False
|
| 392 |
+
- `no_cuda`: False
|
| 393 |
+
- `use_cpu`: False
|
| 394 |
+
- `use_mps_device`: False
|
| 395 |
+
- `seed`: 42
|
| 396 |
+
- `data_seed`: None
|
| 397 |
+
- `jit_mode_eval`: False
|
| 398 |
+
- `bf16`: False
|
| 399 |
+
- `fp16`: True
|
| 400 |
+
- `fp16_opt_level`: O1
|
| 401 |
+
- `half_precision_backend`: auto
|
| 402 |
+
- `bf16_full_eval`: False
|
| 403 |
+
- `fp16_full_eval`: False
|
| 404 |
+
- `tf32`: None
|
| 405 |
+
- `local_rank`: 0
|
| 406 |
+
- `ddp_backend`: None
|
| 407 |
+
- `tpu_num_cores`: None
|
| 408 |
+
- `tpu_metrics_debug`: False
|
| 409 |
+
- `debug`: []
|
| 410 |
+
- `dataloader_drop_last`: False
|
| 411 |
+
- `dataloader_num_workers`: 0
|
| 412 |
+
- `dataloader_prefetch_factor`: None
|
| 413 |
+
- `past_index`: -1
|
| 414 |
+
- `disable_tqdm`: False
|
| 415 |
+
- `remove_unused_columns`: True
|
| 416 |
+
- `label_names`: None
|
| 417 |
+
- `load_best_model_at_end`: True
|
| 418 |
+
- `ignore_data_skip`: False
|
| 419 |
+
- `fsdp`: []
|
| 420 |
+
- `fsdp_min_num_params`: 0
|
| 421 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
| 422 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
| 423 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
| 424 |
+
- `parallelism_config`: None
|
| 425 |
+
- `deepspeed`: None
|
| 426 |
+
- `label_smoothing_factor`: 0.0
|
| 427 |
+
- `optim`: adamw_torch
|
| 428 |
+
- `optim_args`: None
|
| 429 |
+
- `adafactor`: False
|
| 430 |
+
- `group_by_length`: False
|
| 431 |
+
- `length_column_name`: length
|
| 432 |
+
- `project`: huggingface
|
| 433 |
+
- `trackio_space_id`: trackio
|
| 434 |
+
- `ddp_find_unused_parameters`: None
|
| 435 |
+
- `ddp_bucket_cap_mb`: None
|
| 436 |
+
- `ddp_broadcast_buffers`: False
|
| 437 |
+
- `dataloader_pin_memory`: True
|
| 438 |
+
- `dataloader_persistent_workers`: False
|
| 439 |
+
- `skip_memory_metrics`: True
|
| 440 |
+
- `use_legacy_prediction_loop`: False
|
| 441 |
+
- `push_to_hub`: False
|
| 442 |
+
- `resume_from_checkpoint`: None
|
| 443 |
+
- `hub_model_id`: None
|
| 444 |
+
- `hub_strategy`: every_save
|
| 445 |
+
- `hub_private_repo`: None
|
| 446 |
+
- `hub_always_push`: False
|
| 447 |
+
- `hub_revision`: None
|
| 448 |
+
- `gradient_checkpointing`: False
|
| 449 |
+
- `gradient_checkpointing_kwargs`: None
|
| 450 |
+
- `include_inputs_for_metrics`: False
|
| 451 |
+
- `include_for_metrics`: []
|
| 452 |
+
- `eval_do_concat_batches`: True
|
| 453 |
+
- `fp16_backend`: auto
|
| 454 |
+
- `push_to_hub_model_id`: None
|
| 455 |
+
- `push_to_hub_organization`: None
|
| 456 |
+
- `mp_parameters`:
|
| 457 |
+
- `auto_find_batch_size`: False
|
| 458 |
+
- `full_determinism`: False
|
| 459 |
+
- `torchdynamo`: None
|
| 460 |
+
- `ray_scope`: last
|
| 461 |
+
- `ddp_timeout`: 1800
|
| 462 |
+
- `torch_compile`: False
|
| 463 |
+
- `torch_compile_backend`: None
|
| 464 |
+
- `torch_compile_mode`: None
|
| 465 |
+
- `include_tokens_per_second`: False
|
| 466 |
+
- `include_num_input_tokens_seen`: no
|
| 467 |
+
- `neftune_noise_alpha`: None
|
| 468 |
+
- `optim_target_modules`: None
|
| 469 |
+
- `batch_eval_metrics`: False
|
| 470 |
+
- `eval_on_start`: False
|
| 471 |
+
- `use_liger_kernel`: False
|
| 472 |
+
- `liger_kernel_config`: None
|
| 473 |
+
- `eval_use_gather_object`: False
|
| 474 |
+
- `average_tokens_across_devices`: True
|
| 475 |
+
- `prompts`: None
|
| 476 |
+
- `batch_sampler`: batch_sampler
|
| 477 |
+
- `multi_dataset_batch_sampler`: proportional
|
| 478 |
+
- `router_mapping`: {}
|
| 479 |
+
- `learning_rate_mapping`: {}
|
| 480 |
+
|
| 481 |
+
</details>
|
| 482 |
+
|
| 483 |
+
### Training Logs
|
| 484 |
+
| Epoch | Step | Training Loss | Validation Loss | enarm-ir_cosine_ndcg@10 |
|
| 485 |
+
|:------:|:----:|:-------------:|:---------------:|:-----------------------:|
|
| 486 |
+
| 0.0009 | 1 | 0.3939 | - | - |
|
| 487 |
+
| 0.0907 | 100 | 0.1209 | - | - |
|
| 488 |
+
| 0.1813 | 200 | 0.014 | - | - |
|
| 489 |
+
| 0.2720 | 300 | 0.0057 | - | - |
|
| 490 |
+
| 0.3626 | 400 | 0.0047 | - | - |
|
| 491 |
+
| 0.4533 | 500 | 0.0048 | 0.0027 | 0.9629 |
|
| 492 |
+
| 0.5440 | 600 | 0.0038 | - | - |
|
| 493 |
+
| 0.6346 | 700 | 0.0041 | - | - |
|
| 494 |
+
| 0.7253 | 800 | 0.005 | - | - |
|
| 495 |
+
| 0.8160 | 900 | 0.003 | - | - |
|
| 496 |
+
| 0.9066 | 1000 | 0.0019 | 0.0023 | 0.9644 |
|
| 497 |
+
| 0.9973 | 1100 | 0.0021 | - | - |
|
| 498 |
+
| 1.0879 | 1200 | 0.0025 | - | - |
|
| 499 |
+
| 1.1786 | 1300 | 0.0025 | - | - |
|
| 500 |
+
| 1.2693 | 1400 | 0.0025 | - | - |
|
| 501 |
+
| 1.3599 | 1500 | 0.0031 | 0.0021 | 0.9682 |
|
| 502 |
+
|
| 503 |
+
|
| 504 |
+
### Framework Versions
|
| 505 |
+
- Python: 3.11.13
|
| 506 |
+
- Sentence Transformers: 5.1.1
|
| 507 |
+
- Transformers: 4.57.1
|
| 508 |
+
- PyTorch: 2.6.0+cu124
|
| 509 |
+
- Accelerate: 1.11.0
|
| 510 |
+
- Datasets: 4.0.0
|
| 511 |
+
- Tokenizers: 0.22.1
|
| 512 |
+
|
| 513 |
+
## Citation
|
| 514 |
+
|
| 515 |
+
### BibTeX
|
| 516 |
+
|
| 517 |
+
#### Sentence Transformers
|
| 518 |
+
```bibtex
|
| 519 |
+
@inproceedings{reimers-2019-sentence-bert,
|
| 520 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
| 521 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
| 522 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
| 523 |
+
month = "11",
|
| 524 |
+
year = "2019",
|
| 525 |
+
publisher = "Association for Computational Linguistics",
|
| 526 |
+
url = "https://arxiv.org/abs/1908.10084",
|
| 527 |
+
}
|
| 528 |
+
```
|
| 529 |
+
|
| 530 |
+
#### MultipleNegativesRankingLoss
|
| 531 |
+
```bibtex
|
| 532 |
+
@misc{henderson2017efficient,
|
| 533 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
| 534 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
| 535 |
+
year={2017},
|
| 536 |
+
eprint={1705.00652},
|
| 537 |
+
archivePrefix={arXiv},
|
| 538 |
+
primaryClass={cs.CL}
|
| 539 |
+
}
|
| 540 |
+
```
|
| 541 |
+
|
| 542 |
+
<!--
|
| 543 |
+
## Glossary
|
| 544 |
+
|
| 545 |
+
*Clearly define terms in order to be accessible across audiences.*
|
| 546 |
+
-->
|
| 547 |
+
|
| 548 |
+
<!--
|
| 549 |
+
## Model Card Authors
|
| 550 |
+
|
| 551 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
| 552 |
+
-->
|
| 553 |
+
|
| 554 |
+
<!--
|
| 555 |
+
## Model Card Contact
|
| 556 |
+
|
| 557 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
| 558 |
+
-->
|
checkpoint-1500/config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"MPNetModel"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"bos_token_id": 0,
|
| 7 |
+
"dtype": "float32",
|
| 8 |
+
"eos_token_id": 2,
|
| 9 |
+
"hidden_act": "gelu",
|
| 10 |
+
"hidden_dropout_prob": 0.1,
|
| 11 |
+
"hidden_size": 768,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 3072,
|
| 14 |
+
"layer_norm_eps": 1e-05,
|
| 15 |
+
"max_position_embeddings": 514,
|
| 16 |
+
"model_type": "mpnet",
|
| 17 |
+
"num_attention_heads": 12,
|
| 18 |
+
"num_hidden_layers": 12,
|
| 19 |
+
"pad_token_id": 1,
|
| 20 |
+
"relative_attention_num_buckets": 32,
|
| 21 |
+
"transformers_version": "4.57.1",
|
| 22 |
+
"vocab_size": 30527
|
| 23 |
+
}
|
checkpoint-1500/config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"__version__": {
|
| 3 |
+
"sentence_transformers": "5.1.1",
|
| 4 |
+
"transformers": "4.57.1",
|
| 5 |
+
"pytorch": "2.6.0+cu124"
|
| 6 |
+
},
|
| 7 |
+
"model_type": "SentenceTransformer",
|
| 8 |
+
"prompts": {
|
| 9 |
+
"query": "",
|
| 10 |
+
"document": ""
|
| 11 |
+
},
|
| 12 |
+
"default_prompt_name": null,
|
| 13 |
+
"similarity_fn_name": "cosine"
|
| 14 |
+
}
|
checkpoint-1500/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:571f9278950d04c5f38439e851971c56692b2a917856f9359039f7ae51b49d10
|
| 3 |
+
size 437967672
|
checkpoint-1500/modules.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"idx": 2,
|
| 16 |
+
"name": "2",
|
| 17 |
+
"path": "2_Normalize",
|
| 18 |
+
"type": "sentence_transformers.models.Normalize"
|
| 19 |
+
}
|
| 20 |
+
]
|
checkpoint-1500/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:590ca07de00cfc8621796d218273f00ce663b0c5641dc7d73ff31fdbd36e5a8f
|
| 3 |
+
size 871331770
|
checkpoint-1500/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9435a795d5ff42bc90d448b321f80e0cc55a9b2d57d3378b3129955715e44a67
|
| 3 |
+
size 14244
|
checkpoint-1500/scaler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:10adb0b09091eaddc15edb42edba8192db65be1485724a45cd89098a4458e57d
|
| 3 |
+
size 988
|
checkpoint-1500/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b3ab5028a5a7a543f931b37f65347cb2b5e4af2d0c698d49315be20f9b0f94d9
|
| 3 |
+
size 1064
|
checkpoint-1500/sentence_bert_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_seq_length": 384,
|
| 3 |
+
"do_lower_case": false
|
| 4 |
+
}
|
checkpoint-1500/special_tokens_map.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"cls_token": {
|
| 10 |
+
"content": "<s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"eos_token": {
|
| 17 |
+
"content": "</s>",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"mask_token": {
|
| 24 |
+
"content": "<mask>",
|
| 25 |
+
"lstrip": true,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
},
|
| 30 |
+
"pad_token": {
|
| 31 |
+
"content": "<pad>",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false
|
| 36 |
+
},
|
| 37 |
+
"sep_token": {
|
| 38 |
+
"content": "</s>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false
|
| 43 |
+
},
|
| 44 |
+
"unk_token": {
|
| 45 |
+
"content": "[UNK]",
|
| 46 |
+
"lstrip": false,
|
| 47 |
+
"normalized": false,
|
| 48 |
+
"rstrip": false,
|
| 49 |
+
"single_word": false
|
| 50 |
+
}
|
| 51 |
+
}
|
checkpoint-1500/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint-1500/tokenizer_config.json
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<s>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "<pad>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "</s>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "<unk>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": true,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"104": {
|
| 36 |
+
"content": "[UNK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
},
|
| 43 |
+
"30526": {
|
| 44 |
+
"content": "<mask>",
|
| 45 |
+
"lstrip": true,
|
| 46 |
+
"normalized": false,
|
| 47 |
+
"rstrip": false,
|
| 48 |
+
"single_word": false,
|
| 49 |
+
"special": true
|
| 50 |
+
}
|
| 51 |
+
},
|
| 52 |
+
"bos_token": "<s>",
|
| 53 |
+
"clean_up_tokenization_spaces": false,
|
| 54 |
+
"cls_token": "<s>",
|
| 55 |
+
"do_lower_case": true,
|
| 56 |
+
"eos_token": "</s>",
|
| 57 |
+
"extra_special_tokens": {},
|
| 58 |
+
"mask_token": "<mask>",
|
| 59 |
+
"max_length": 128,
|
| 60 |
+
"model_max_length": 384,
|
| 61 |
+
"pad_to_multiple_of": null,
|
| 62 |
+
"pad_token": "<pad>",
|
| 63 |
+
"pad_token_type_id": 0,
|
| 64 |
+
"padding_side": "right",
|
| 65 |
+
"sep_token": "</s>",
|
| 66 |
+
"stride": 0,
|
| 67 |
+
"strip_accents": null,
|
| 68 |
+
"tokenize_chinese_chars": true,
|
| 69 |
+
"tokenizer_class": "MPNetTokenizer",
|
| 70 |
+
"truncation_side": "right",
|
| 71 |
+
"truncation_strategy": "longest_first",
|
| 72 |
+
"unk_token": "[UNK]"
|
| 73 |
+
}
|
checkpoint-1500/trainer_state.json
ADDED
|
@@ -0,0 +1,215 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 1500,
|
| 3 |
+
"best_metric": 0.9681782842497483,
|
| 4 |
+
"best_model_checkpoint": "models/enarm-mpnet-v2\\checkpoint-1500",
|
| 5 |
+
"epoch": 1.3599274705349047,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 1500,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.0009066183136899365,
|
| 14 |
+
"grad_norm": 9.670710563659668,
|
| 15 |
+
"learning_rate": 0.0,
|
| 16 |
+
"loss": 0.3939,
|
| 17 |
+
"step": 1
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 0.09066183136899365,
|
| 21 |
+
"grad_norm": 0.3970909118652344,
|
| 22 |
+
"learning_rate": 8.95927601809955e-06,
|
| 23 |
+
"loss": 0.1209,
|
| 24 |
+
"step": 100
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"epoch": 0.1813236627379873,
|
| 28 |
+
"grad_norm": 0.46869727969169617,
|
| 29 |
+
"learning_rate": 1.8009049773755657e-05,
|
| 30 |
+
"loss": 0.014,
|
| 31 |
+
"step": 200
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 0.271985494106981,
|
| 35 |
+
"grad_norm": 0.05942446365952492,
|
| 36 |
+
"learning_rate": 1.9214105793450883e-05,
|
| 37 |
+
"loss": 0.0057,
|
| 38 |
+
"step": 300
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"epoch": 0.3626473254759746,
|
| 42 |
+
"grad_norm": 0.1977270245552063,
|
| 43 |
+
"learning_rate": 1.820654911838791e-05,
|
| 44 |
+
"loss": 0.0047,
|
| 45 |
+
"step": 400
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"epoch": 0.45330915684496825,
|
| 49 |
+
"grad_norm": 0.05510423332452774,
|
| 50 |
+
"learning_rate": 1.720906801007557e-05,
|
| 51 |
+
"loss": 0.0048,
|
| 52 |
+
"step": 500
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"epoch": 0.45330915684496825,
|
| 56 |
+
"eval_enarm-ir_cosine_accuracy@1": 0.9020408163265307,
|
| 57 |
+
"eval_enarm-ir_cosine_accuracy@10": 1.0,
|
| 58 |
+
"eval_enarm-ir_cosine_accuracy@3": 0.9987244897959183,
|
| 59 |
+
"eval_enarm-ir_cosine_accuracy@5": 1.0,
|
| 60 |
+
"eval_enarm-ir_cosine_map@100": 0.9498681972789115,
|
| 61 |
+
"eval_enarm-ir_cosine_mrr@10": 0.9498681972789118,
|
| 62 |
+
"eval_enarm-ir_cosine_ndcg@10": 0.9629337873914321,
|
| 63 |
+
"eval_enarm-ir_cosine_precision@1": 0.9020408163265307,
|
| 64 |
+
"eval_enarm-ir_cosine_precision@10": 0.10000000000000003,
|
| 65 |
+
"eval_enarm-ir_cosine_precision@3": 0.3329081632653061,
|
| 66 |
+
"eval_enarm-ir_cosine_precision@5": 0.20000000000000007,
|
| 67 |
+
"eval_enarm-ir_cosine_recall@1": 0.9020408163265307,
|
| 68 |
+
"eval_enarm-ir_cosine_recall@10": 1.0,
|
| 69 |
+
"eval_enarm-ir_cosine_recall@3": 0.9987244897959183,
|
| 70 |
+
"eval_enarm-ir_cosine_recall@5": 1.0,
|
| 71 |
+
"eval_loss": 0.002733997767791152,
|
| 72 |
+
"eval_runtime": 69.6943,
|
| 73 |
+
"eval_samples_per_second": 56.246,
|
| 74 |
+
"eval_steps_per_second": 1.765,
|
| 75 |
+
"step": 500
|
| 76 |
+
},
|
| 77 |
+
{
|
| 78 |
+
"epoch": 0.543970988213962,
|
| 79 |
+
"grad_norm": 0.07771875709295273,
|
| 80 |
+
"learning_rate": 1.6201511335012597e-05,
|
| 81 |
+
"loss": 0.0038,
|
| 82 |
+
"step": 600
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"epoch": 0.6346328195829556,
|
| 86 |
+
"grad_norm": 0.027955936267971992,
|
| 87 |
+
"learning_rate": 1.5193954659949624e-05,
|
| 88 |
+
"loss": 0.0041,
|
| 89 |
+
"step": 700
|
| 90 |
+
},
|
| 91 |
+
{
|
| 92 |
+
"epoch": 0.7252946509519492,
|
| 93 |
+
"grad_norm": 0.009821675717830658,
|
| 94 |
+
"learning_rate": 1.418639798488665e-05,
|
| 95 |
+
"loss": 0.005,
|
| 96 |
+
"step": 800
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"epoch": 0.8159564823209429,
|
| 100 |
+
"grad_norm": 0.7323502898216248,
|
| 101 |
+
"learning_rate": 1.3178841309823678e-05,
|
| 102 |
+
"loss": 0.003,
|
| 103 |
+
"step": 900
|
| 104 |
+
},
|
| 105 |
+
{
|
| 106 |
+
"epoch": 0.9066183136899365,
|
| 107 |
+
"grad_norm": 0.9905967712402344,
|
| 108 |
+
"learning_rate": 1.2171284634760707e-05,
|
| 109 |
+
"loss": 0.0019,
|
| 110 |
+
"step": 1000
|
| 111 |
+
},
|
| 112 |
+
{
|
| 113 |
+
"epoch": 0.9066183136899365,
|
| 114 |
+
"eval_enarm-ir_cosine_accuracy@1": 0.9056122448979592,
|
| 115 |
+
"eval_enarm-ir_cosine_accuracy@10": 1.0,
|
| 116 |
+
"eval_enarm-ir_cosine_accuracy@3": 0.999234693877551,
|
| 117 |
+
"eval_enarm-ir_cosine_accuracy@5": 1.0,
|
| 118 |
+
"eval_enarm-ir_cosine_map@100": 0.9518069727891155,
|
| 119 |
+
"eval_enarm-ir_cosine_mrr@10": 0.951806972789116,
|
| 120 |
+
"eval_enarm-ir_cosine_ndcg@10": 0.9643764244686448,
|
| 121 |
+
"eval_enarm-ir_cosine_precision@1": 0.9056122448979592,
|
| 122 |
+
"eval_enarm-ir_cosine_precision@10": 0.10000000000000003,
|
| 123 |
+
"eval_enarm-ir_cosine_precision@3": 0.33307823129251696,
|
| 124 |
+
"eval_enarm-ir_cosine_precision@5": 0.20000000000000007,
|
| 125 |
+
"eval_enarm-ir_cosine_recall@1": 0.9056122448979592,
|
| 126 |
+
"eval_enarm-ir_cosine_recall@10": 1.0,
|
| 127 |
+
"eval_enarm-ir_cosine_recall@3": 0.999234693877551,
|
| 128 |
+
"eval_enarm-ir_cosine_recall@5": 1.0,
|
| 129 |
+
"eval_loss": 0.002290277509018779,
|
| 130 |
+
"eval_runtime": 75.2569,
|
| 131 |
+
"eval_samples_per_second": 52.088,
|
| 132 |
+
"eval_steps_per_second": 1.634,
|
| 133 |
+
"step": 1000
|
| 134 |
+
},
|
| 135 |
+
{
|
| 136 |
+
"epoch": 0.9972801450589301,
|
| 137 |
+
"grad_norm": 0.05818323418498039,
|
| 138 |
+
"learning_rate": 1.1163727959697735e-05,
|
| 139 |
+
"loss": 0.0021,
|
| 140 |
+
"step": 1100
|
| 141 |
+
},
|
| 142 |
+
{
|
| 143 |
+
"epoch": 1.087941976427924,
|
| 144 |
+
"grad_norm": 0.12362250685691833,
|
| 145 |
+
"learning_rate": 1.0156171284634761e-05,
|
| 146 |
+
"loss": 0.0025,
|
| 147 |
+
"step": 1200
|
| 148 |
+
},
|
| 149 |
+
{
|
| 150 |
+
"epoch": 1.1786038077969174,
|
| 151 |
+
"grad_norm": 0.02448936738073826,
|
| 152 |
+
"learning_rate": 9.14861460957179e-06,
|
| 153 |
+
"loss": 0.0025,
|
| 154 |
+
"step": 1300
|
| 155 |
+
},
|
| 156 |
+
{
|
| 157 |
+
"epoch": 1.2692656391659112,
|
| 158 |
+
"grad_norm": 0.2795725166797638,
|
| 159 |
+
"learning_rate": 8.141057934508818e-06,
|
| 160 |
+
"loss": 0.0025,
|
| 161 |
+
"step": 1400
|
| 162 |
+
},
|
| 163 |
+
{
|
| 164 |
+
"epoch": 1.3599274705349047,
|
| 165 |
+
"grad_norm": 0.028418799862265587,
|
| 166 |
+
"learning_rate": 7.133501259445844e-06,
|
| 167 |
+
"loss": 0.0031,
|
| 168 |
+
"step": 1500
|
| 169 |
+
},
|
| 170 |
+
{
|
| 171 |
+
"epoch": 1.3599274705349047,
|
| 172 |
+
"eval_enarm-ir_cosine_accuracy@1": 0.9150510204081632,
|
| 173 |
+
"eval_enarm-ir_cosine_accuracy@10": 1.0,
|
| 174 |
+
"eval_enarm-ir_cosine_accuracy@3": 0.9994897959183674,
|
| 175 |
+
"eval_enarm-ir_cosine_accuracy@5": 1.0,
|
| 176 |
+
"eval_enarm-ir_cosine_map@100": 0.9569302721088435,
|
| 177 |
+
"eval_enarm-ir_cosine_mrr@10": 0.9569302721088436,
|
| 178 |
+
"eval_enarm-ir_cosine_ndcg@10": 0.9681782842497483,
|
| 179 |
+
"eval_enarm-ir_cosine_precision@1": 0.9150510204081632,
|
| 180 |
+
"eval_enarm-ir_cosine_precision@10": 0.10000000000000003,
|
| 181 |
+
"eval_enarm-ir_cosine_precision@3": 0.3331632653061224,
|
| 182 |
+
"eval_enarm-ir_cosine_precision@5": 0.20000000000000007,
|
| 183 |
+
"eval_enarm-ir_cosine_recall@1": 0.9150510204081632,
|
| 184 |
+
"eval_enarm-ir_cosine_recall@10": 1.0,
|
| 185 |
+
"eval_enarm-ir_cosine_recall@3": 0.9994897959183674,
|
| 186 |
+
"eval_enarm-ir_cosine_recall@5": 1.0,
|
| 187 |
+
"eval_loss": 0.0020953374914824963,
|
| 188 |
+
"eval_runtime": 65.9618,
|
| 189 |
+
"eval_samples_per_second": 59.428,
|
| 190 |
+
"eval_steps_per_second": 1.865,
|
| 191 |
+
"step": 1500
|
| 192 |
+
}
|
| 193 |
+
],
|
| 194 |
+
"logging_steps": 100,
|
| 195 |
+
"max_steps": 2206,
|
| 196 |
+
"num_input_tokens_seen": 0,
|
| 197 |
+
"num_train_epochs": 2,
|
| 198 |
+
"save_steps": 500,
|
| 199 |
+
"stateful_callbacks": {
|
| 200 |
+
"TrainerControl": {
|
| 201 |
+
"args": {
|
| 202 |
+
"should_epoch_stop": false,
|
| 203 |
+
"should_evaluate": false,
|
| 204 |
+
"should_log": false,
|
| 205 |
+
"should_save": true,
|
| 206 |
+
"should_training_stop": false
|
| 207 |
+
},
|
| 208 |
+
"attributes": {}
|
| 209 |
+
}
|
| 210 |
+
},
|
| 211 |
+
"total_flos": 0.0,
|
| 212 |
+
"train_batch_size": 32,
|
| 213 |
+
"trial_name": null,
|
| 214 |
+
"trial_params": null
|
| 215 |
+
}
|
checkpoint-1500/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:46c00947826242ae61ca80531e511dbebe62d6dbf43d17a3c6b8a55f3ed6a086
|
| 3 |
+
size 5688
|
checkpoint-1500/vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint-2206/1_Pooling/config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word_embedding_dimension": 768,
|
| 3 |
+
"pooling_mode_cls_token": false,
|
| 4 |
+
"pooling_mode_mean_tokens": true,
|
| 5 |
+
"pooling_mode_max_tokens": false,
|
| 6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
| 7 |
+
"pooling_mode_weightedmean_tokens": false,
|
| 8 |
+
"pooling_mode_lasttoken": false,
|
| 9 |
+
"include_prompt": true
|
| 10 |
+
}
|
checkpoint-2206/README.md
ADDED
|
@@ -0,0 +1,565 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- sentence-transformers
|
| 4 |
+
- sentence-similarity
|
| 5 |
+
- feature-extraction
|
| 6 |
+
- dense
|
| 7 |
+
- generated_from_trainer
|
| 8 |
+
- dataset_size:35280
|
| 9 |
+
- loss:MultipleNegativesRankingLoss
|
| 10 |
+
base_model: sentence-transformers/all-mpnet-base-v2
|
| 11 |
+
widget:
|
| 12 |
+
- source_sentence: ¿Cuál es la utilidad de la valoración ecográfica del cérvix?
|
| 13 |
+
sentences:
|
| 14 |
+
- Papel del rituximab en pacientes con linfoma no Hodgkin En algunos pacientes con
|
| 15 |
+
linfoma No Hodgkin existe sobreexpresión del antígeno CD20. En estos pacientes
|
| 16 |
+
se deberá utilizar un anticuerpo anti CD20 (rituximab), el cual ha demostrado
|
| 17 |
+
tener toxicidad limitada y mayor supervivencia.
|
| 18 |
+
- '¿Cuál es la característica del material de sutura absorbible de origen sintético?
|
| 19 |
+
En general se absorbe más despacio que el catgut y se hace por hidrólisis lenta
|
| 20 |
+
y fagocitosis, es prácticamente inerte pero sí produce reacción tisular con invasión
|
| 21 |
+
de macrófagos. En términos generales pierde 50% de su fuerza de tensión en menos
|
| 22 |
+
de 25 días. El ácido poliglicólico pierde su fuerza de tensión en 15 días y permanece
|
| 23 |
+
en los tejidos durante alrededor de 80 días; la poliglactina 910 permanece en
|
| 24 |
+
los tejidos durante '
|
| 25 |
+
- ¿Cuál es la utilidad de la valoración ecográfica del cérvix? No está indicada
|
| 26 |
+
en todas las mujeres embarazadas sino con mayor precisión sólo en aquellas con
|
| 27 |
+
mayor riesgo de parto pretérmino o bien aquellas con sospecha de cérvix acortado.
|
| 28 |
+
La medición de la longitud cervical, si es superior a 30 mm, tiene un alto valor
|
| 29 |
+
predictivo negativo. La presencia de fenómeno de embudo o insinuación de membranas
|
| 30 |
+
en el canal cervical se asocia con elevada probabilidad de parto pretérmino.
|
| 31 |
+
- source_sentence: Inmunodeficiencia congénita más común, en la cual los pacientes
|
| 32 |
+
presentan infecciones sinopulmonares y gastrointestinales
|
| 33 |
+
sentences:
|
| 34 |
+
- Causas de miocardiopatía dilatada Alcoholismo, beri beri, cocaína, infección por
|
| 35 |
+
Coxsackie B, enfermedad de Chagas, doxorrubicina, embarazo
|
| 36 |
+
- Mecanismo de acción de los barbitúricos Facilitan la acción de los canales GABA,
|
| 37 |
+
al aumentar la duración de la apertura del canal de cloro, con lo cual disminuye
|
| 38 |
+
la despolarización
|
| 39 |
+
- Deficiencia selectiva de IgA Inmunodeficiencia congénita más común, en la cual
|
| 40 |
+
los pacientes presentan infecciones sinopulmonares y gastrointestinales. El tratamiento
|
| 41 |
+
es con antibióticos y la administración de inmunoglobulinas se contraindica.
|
| 42 |
+
- source_sentence: Bloqueo de los receptores a serotonina 5-HT2, alfa adrenérgicos,
|
| 43 |
+
H1 histaminérgicos, y dopaminérgicos
|
| 44 |
+
sentences:
|
| 45 |
+
- Etiología de la anemia sideroblástica • Causas hereditarias. Éstas ocurren ya
|
| 46 |
+
sea por un defecto en la sintasa de ácido aminolevulínico o una anormalidad en
|
| 47 |
+
el metabolismo de la vitamina B6. • Causas adquiridas. Ocurren por fármacos como
|
| 48 |
+
el cloranfenicol, la isoniazida y el alcohol. La intoxicación por hierro puede
|
| 49 |
+
ocasionar anemia sideroblástica, así como los síndromes mielodisplásicos y la
|
| 50 |
+
anemia refractaria (estos dos últimos puede progresar a leucemia mieloide aguda
|
| 51 |
+
en un pequeño porcentaje de paciente
|
| 52 |
+
- Tipo de fractura craneal más frecuente ocasionada durante el trabajo de parto
|
| 53 |
+
Fractura craneal lineal
|
| 54 |
+
- Mecanismo de acción de los antipsicóticos atípicos Bloqueo de los receptores a
|
| 55 |
+
serotonina 5-HT2, alfa adrenérgicos, H1 histaminérgicos, y dopaminérgicos. Incluyen
|
| 56 |
+
clozapina, olanzapina, risperidona, aripiprazol, quetiapina, ziprasidona.
|
| 57 |
+
- source_sentence: 'Pediatría: ¿Cómo son las crisis convulsivas febriles?'
|
| 58 |
+
sentences:
|
| 59 |
+
- Utilidad del cultivo de citomegalovirus de muestra de orina en caso de sospecha
|
| 60 |
+
de infección congénita Si es negativo se excluye la posibilidad de infección congénita
|
| 61 |
+
por citomegalovirus
|
| 62 |
+
- ¿Qué es la displasia broncopulmonar (DBP)? Es una enfermedad pulmonar crónica
|
| 63 |
+
caracterizada por dependencia de oxígeno por un periodo mayor a 28 días. Se produce
|
| 64 |
+
como consecuencia de la exposición del pulmón inmaduro del prematuro a noxas ambientales
|
| 65 |
+
( oxígeno, infecciones, barotrauma, volutrauma).
|
| 66 |
+
- ¿Cómo son las crisis convulsivas febriles? Pueden ser simples (90% de los casos)
|
| 67 |
+
con convulsiones generalizadas mayormente clónicas, de duración menor a 15 minutos,
|
| 68 |
+
simétricas y ocurren en pacientes dentro del grupo de edad de riesgo, sin recurrencia
|
| 69 |
+
en 24 h. También pueden ser complejas o atípicas (menos de 10%), las cuales son
|
| 70 |
+
focales o generalizadas, de duración mayor a quince minutos, múltiples o recurrentes
|
| 71 |
+
en 24 h.
|
| 72 |
+
- source_sentence: Complicaciones de la anorexia nerviosa
|
| 73 |
+
sentences:
|
| 74 |
+
- Complicaciones de la anorexia nerviosa • Prolapso de la válvula mitral. • Arritmias. •
|
| 75 |
+
Hipotensión. • Bradicardia. • Amenorrea (ausencia de menstruación en tres ciclos
|
| 76 |
+
consecutivos). • Nefrolitiasis. • Osteoporosis. • Fracturas múltiples por estrés. •
|
| 77 |
+
Pancitopenia. • Anomalías tiroideas. • Mortalidad por complicaciones o suicidio
|
| 78 |
+
>10%.
|
| 79 |
+
- Hallazgo clásico del Pityrosporum orbiculare (pitiriasis versicolor) en la preparación
|
| 80 |
+
de KOH Espagueti a la boloñesa (hifas + esporas)
|
| 81 |
+
- Indicación de cateterismo en pacientes con angina inestable o infarto miocárdico
|
| 82 |
+
sin elevación del segmento ST Pacientes con puntal TIMI ≥ 3 Pacientes con dolor
|
| 83 |
+
torácico refractario a tratamiento Pacientes con elevación de troponinas Pacientes
|
| 84 |
+
con depresión del segmento ST > 1 mm
|
| 85 |
+
pipeline_tag: sentence-similarity
|
| 86 |
+
library_name: sentence-transformers
|
| 87 |
+
metrics:
|
| 88 |
+
- cosine_accuracy@1
|
| 89 |
+
- cosine_accuracy@3
|
| 90 |
+
- cosine_accuracy@5
|
| 91 |
+
- cosine_accuracy@10
|
| 92 |
+
- cosine_precision@1
|
| 93 |
+
- cosine_precision@3
|
| 94 |
+
- cosine_precision@5
|
| 95 |
+
- cosine_precision@10
|
| 96 |
+
- cosine_recall@1
|
| 97 |
+
- cosine_recall@3
|
| 98 |
+
- cosine_recall@5
|
| 99 |
+
- cosine_recall@10
|
| 100 |
+
- cosine_ndcg@10
|
| 101 |
+
- cosine_mrr@10
|
| 102 |
+
- cosine_map@100
|
| 103 |
+
model-index:
|
| 104 |
+
- name: SentenceTransformer based on sentence-transformers/all-mpnet-base-v2
|
| 105 |
+
results:
|
| 106 |
+
- task:
|
| 107 |
+
type: information-retrieval
|
| 108 |
+
name: Information Retrieval
|
| 109 |
+
dataset:
|
| 110 |
+
name: enarm ir
|
| 111 |
+
type: enarm-ir
|
| 112 |
+
metrics:
|
| 113 |
+
- type: cosine_accuracy@1
|
| 114 |
+
value: 0.9112244897959184
|
| 115 |
+
name: Cosine Accuracy@1
|
| 116 |
+
- type: cosine_accuracy@3
|
| 117 |
+
value: 0.999234693877551
|
| 118 |
+
name: Cosine Accuracy@3
|
| 119 |
+
- type: cosine_accuracy@5
|
| 120 |
+
value: 1.0
|
| 121 |
+
name: Cosine Accuracy@5
|
| 122 |
+
- type: cosine_accuracy@10
|
| 123 |
+
value: 1.0
|
| 124 |
+
name: Cosine Accuracy@10
|
| 125 |
+
- type: cosine_precision@1
|
| 126 |
+
value: 0.9112244897959184
|
| 127 |
+
name: Cosine Precision@1
|
| 128 |
+
- type: cosine_precision@3
|
| 129 |
+
value: 0.33307823129251696
|
| 130 |
+
name: Cosine Precision@3
|
| 131 |
+
- type: cosine_precision@5
|
| 132 |
+
value: 0.20000000000000007
|
| 133 |
+
name: Cosine Precision@5
|
| 134 |
+
- type: cosine_precision@10
|
| 135 |
+
value: 0.10000000000000003
|
| 136 |
+
name: Cosine Precision@10
|
| 137 |
+
- type: cosine_recall@1
|
| 138 |
+
value: 0.9112244897959184
|
| 139 |
+
name: Cosine Recall@1
|
| 140 |
+
- type: cosine_recall@3
|
| 141 |
+
value: 0.999234693877551
|
| 142 |
+
name: Cosine Recall@3
|
| 143 |
+
- type: cosine_recall@5
|
| 144 |
+
value: 1.0
|
| 145 |
+
name: Cosine Recall@5
|
| 146 |
+
- type: cosine_recall@10
|
| 147 |
+
value: 1.0
|
| 148 |
+
name: Cosine Recall@10
|
| 149 |
+
- type: cosine_ndcg@10
|
| 150 |
+
value: 0.9666147393128501
|
| 151 |
+
name: Cosine Ndcg@10
|
| 152 |
+
- type: cosine_mrr@10
|
| 153 |
+
value: 0.954825680272109
|
| 154 |
+
name: Cosine Mrr@10
|
| 155 |
+
- type: cosine_map@100
|
| 156 |
+
value: 0.9548256802721089
|
| 157 |
+
name: Cosine Map@100
|
| 158 |
+
---
|
| 159 |
+
|
| 160 |
+
# SentenceTransformer based on sentence-transformers/all-mpnet-base-v2
|
| 161 |
+
|
| 162 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) on the json dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
| 163 |
+
|
| 164 |
+
## Model Details
|
| 165 |
+
|
| 166 |
+
### Model Description
|
| 167 |
+
- **Model Type:** Sentence Transformer
|
| 168 |
+
- **Base model:** [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) <!-- at revision e8c3b32edf5434bc2275fc9bab85f82640a19130 -->
|
| 169 |
+
- **Maximum Sequence Length:** 384 tokens
|
| 170 |
+
- **Output Dimensionality:** 768 dimensions
|
| 171 |
+
- **Similarity Function:** Cosine Similarity
|
| 172 |
+
- **Training Dataset:**
|
| 173 |
+
- json
|
| 174 |
+
<!-- - **Language:** Unknown -->
|
| 175 |
+
<!-- - **License:** Unknown -->
|
| 176 |
+
|
| 177 |
+
### Model Sources
|
| 178 |
+
|
| 179 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
| 180 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
| 181 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
| 182 |
+
|
| 183 |
+
### Full Model Architecture
|
| 184 |
+
|
| 185 |
+
```
|
| 186 |
+
SentenceTransformer(
|
| 187 |
+
(0): Transformer({'max_seq_length': 384, 'do_lower_case': False, 'architecture': 'MPNetModel'})
|
| 188 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
| 189 |
+
(2): Normalize()
|
| 190 |
+
)
|
| 191 |
+
```
|
| 192 |
+
|
| 193 |
+
## Usage
|
| 194 |
+
|
| 195 |
+
### Direct Usage (Sentence Transformers)
|
| 196 |
+
|
| 197 |
+
First install the Sentence Transformers library:
|
| 198 |
+
|
| 199 |
+
```bash
|
| 200 |
+
pip install -U sentence-transformers
|
| 201 |
+
```
|
| 202 |
+
|
| 203 |
+
Then you can load this model and run inference.
|
| 204 |
+
```python
|
| 205 |
+
from sentence_transformers import SentenceTransformer
|
| 206 |
+
|
| 207 |
+
# Download from the 🤗 Hub
|
| 208 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
| 209 |
+
# Run inference
|
| 210 |
+
sentences = [
|
| 211 |
+
'Complicaciones de la anorexia nerviosa',
|
| 212 |
+
'Complicaciones de la anorexia nerviosa • Prolapso de la válvula mitral. • Arritmias. • Hipotensión. • Bradicardia. • Amenorrea (ausencia de menstruación en tres ciclos consecutivos). • Nefrolitiasis. • Osteoporosis. • Fracturas múltiples por estrés. • Pancitopenia. • Anomalías tiroideas. • Mortalidad por complicaciones o suicidio >10%.',
|
| 213 |
+
'Indicación de cateterismo en pacientes con angina inestable o infarto miocárdico sin elevación del segmento ST Pacientes con puntal TIMI ≥ 3 Pacientes con dolor torácico refractario a tratamiento Pacientes con elevación de troponinas Pacientes con depresión del segmento ST > 1 mm',
|
| 214 |
+
]
|
| 215 |
+
embeddings = model.encode(sentences)
|
| 216 |
+
print(embeddings.shape)
|
| 217 |
+
# [3, 768]
|
| 218 |
+
|
| 219 |
+
# Get the similarity scores for the embeddings
|
| 220 |
+
similarities = model.similarity(embeddings, embeddings)
|
| 221 |
+
print(similarities)
|
| 222 |
+
# tensor([[ 1.0000, 0.7277, -0.0115],
|
| 223 |
+
# [ 0.7277, 1.0000, -0.0359],
|
| 224 |
+
# [-0.0115, -0.0359, 1.0000]])
|
| 225 |
+
```
|
| 226 |
+
|
| 227 |
+
<!--
|
| 228 |
+
### Direct Usage (Transformers)
|
| 229 |
+
|
| 230 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
| 231 |
+
|
| 232 |
+
</details>
|
| 233 |
+
-->
|
| 234 |
+
|
| 235 |
+
<!--
|
| 236 |
+
### Downstream Usage (Sentence Transformers)
|
| 237 |
+
|
| 238 |
+
You can finetune this model on your own dataset.
|
| 239 |
+
|
| 240 |
+
<details><summary>Click to expand</summary>
|
| 241 |
+
|
| 242 |
+
</details>
|
| 243 |
+
-->
|
| 244 |
+
|
| 245 |
+
<!--
|
| 246 |
+
### Out-of-Scope Use
|
| 247 |
+
|
| 248 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
| 249 |
+
-->
|
| 250 |
+
|
| 251 |
+
## Evaluation
|
| 252 |
+
|
| 253 |
+
### Metrics
|
| 254 |
+
|
| 255 |
+
#### Information Retrieval
|
| 256 |
+
|
| 257 |
+
* Dataset: `enarm-ir`
|
| 258 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
|
| 259 |
+
|
| 260 |
+
| Metric | Value |
|
| 261 |
+
|:--------------------|:-----------|
|
| 262 |
+
| cosine_accuracy@1 | 0.9112 |
|
| 263 |
+
| cosine_accuracy@3 | 0.9992 |
|
| 264 |
+
| cosine_accuracy@5 | 1.0 |
|
| 265 |
+
| cosine_accuracy@10 | 1.0 |
|
| 266 |
+
| cosine_precision@1 | 0.9112 |
|
| 267 |
+
| cosine_precision@3 | 0.3331 |
|
| 268 |
+
| cosine_precision@5 | 0.2 |
|
| 269 |
+
| cosine_precision@10 | 0.1 |
|
| 270 |
+
| cosine_recall@1 | 0.9112 |
|
| 271 |
+
| cosine_recall@3 | 0.9992 |
|
| 272 |
+
| cosine_recall@5 | 1.0 |
|
| 273 |
+
| cosine_recall@10 | 1.0 |
|
| 274 |
+
| **cosine_ndcg@10** | **0.9666** |
|
| 275 |
+
| cosine_mrr@10 | 0.9548 |
|
| 276 |
+
| cosine_map@100 | 0.9548 |
|
| 277 |
+
|
| 278 |
+
<!--
|
| 279 |
+
## Bias, Risks and Limitations
|
| 280 |
+
|
| 281 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
| 282 |
+
-->
|
| 283 |
+
|
| 284 |
+
<!--
|
| 285 |
+
### Recommendations
|
| 286 |
+
|
| 287 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
| 288 |
+
-->
|
| 289 |
+
|
| 290 |
+
## Training Details
|
| 291 |
+
|
| 292 |
+
### Training Dataset
|
| 293 |
+
|
| 294 |
+
#### json
|
| 295 |
+
|
| 296 |
+
* Dataset: json
|
| 297 |
+
* Size: 35,280 training samples
|
| 298 |
+
* Columns: <code>anchor</code> and <code>positive</code>
|
| 299 |
+
* Approximate statistics based on the first 1000 samples:
|
| 300 |
+
| | anchor | positive |
|
| 301 |
+
|:--------|:-----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
|
| 302 |
+
| type | string | string |
|
| 303 |
+
| details | <ul><li>min: 5 tokens</li><li>mean: 30.93 tokens</li><li>max: 150 tokens</li></ul> | <ul><li>min: 16 tokens</li><li>mean: 93.86 tokens</li><li>max: 197 tokens</li></ul> |
|
| 304 |
+
* Samples:
|
| 305 |
+
| anchor | positive |
|
| 306 |
+
|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 307 |
+
| <code>• Principalmente por deficiencia dietética (p</code> | <code>Etiología de deficiencia de ácido fólico • Principalmente por deficiencia dietética (p.ej., en vegetarianos y alcohólicos) • Embarazo • Pérdidas cutáneas (p. ej., eczema o psoriasis) • Incremento de las pérdidas por diálisis • Anticonvulsivantes (fenitoína)</code> |
|
| 308 |
+
| <code>46,XX (en algunos casos 46,XY), lo cual ocurre por fertilización de un óvulo vacío por dos espermatozoides</code> | <code>Mola completa, cariotipo 46,XX (en algunos casos 46,XY), lo cual ocurre por fertilización de un óvulo vacío por dos espermatozoides. Por otro lado, la mola parcial tiene un cariotipo 69,XXY, lo cual ocurre por la fertilización de un óvulo por dos espermatozoides.</code> |
|
| 309 |
+
| <code>La enfermedad tiene dos picos de edad de los 20 a los 40 años y de los 60 a 80 años de edad, con mayor frecuencia en la etapa productiva y con una edad promedio de presentación a los 30 años</code> | <code>¿Cuál es la edad de presentación de la EII y la población más afectada? La enfermedad tiene dos picos de edad de los 20 a los 40 años y de los 60 a 80 años de edad, con mayor frecuencia en la etapa productiva y con una edad promedio de presentación a los 30 años. Se presenta más a menudo en países industrializados y zonas urbanas, con mayor prevalencia e incidencia en los países del norte de América y Europa; sin embargo, los judíos ashkenazí son los más afectados.</code> |
|
| 310 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
| 311 |
+
```json
|
| 312 |
+
{
|
| 313 |
+
"scale": 20.0,
|
| 314 |
+
"similarity_fct": "cos_sim",
|
| 315 |
+
"gather_across_devices": false
|
| 316 |
+
}
|
| 317 |
+
```
|
| 318 |
+
|
| 319 |
+
### Evaluation Dataset
|
| 320 |
+
|
| 321 |
+
#### json
|
| 322 |
+
|
| 323 |
+
* Dataset: json
|
| 324 |
+
* Size: 3,920 evaluation samples
|
| 325 |
+
* Columns: <code>anchor</code> and <code>positive</code>
|
| 326 |
+
* Approximate statistics based on the first 1000 samples:
|
| 327 |
+
| | anchor | positive |
|
| 328 |
+
|:--------|:-----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
|
| 329 |
+
| type | string | string |
|
| 330 |
+
| details | <ul><li>min: 5 tokens</li><li>mean: 29.98 tokens</li><li>max: 384 tokens</li></ul> | <ul><li>min: 16 tokens</li><li>mean: 89.54 tokens</li><li>max: 197 tokens</li></ul> |
|
| 331 |
+
* Samples:
|
| 332 |
+
| anchor | positive |
|
| 333 |
+
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 334 |
+
| <code>Focal (afecta a un único músculo o grupo muscular): blefaroespasmo, distonía oromandibular, distonía espasmódica o distonía, tortícolis o distonia cervical y espasmo del escribiente</code> | <code>¿Cuáles son los tipos de distonías? Focal (afecta a un ��nico músculo o grupo muscular): blefaroespasmo, distonía oromandibular, distonía espasmódica o distonía, tortícolis o distonia cervical y espasmo del escribiente.Segmentaria (afecta a varios músculos de dos o más regiones contiguas): craneal (musculatura craneal y cervical y síndrome de Meige: blefaroespasmo con distonía oromandibular), braquiocervical (afectación de la musculatura de cuello y miembros superiores), axial (musculatura de cuello y tronco</code> |
|
| 335 |
+
| <code>¿Cuál es el diagnóstico diferencial de la cefalea en racimos?</code> | <code>¿Cuál es el diagnóstico diferencial de la cefalea en racimos? Incluye cefaleas primarias, como la cefalea hemicránea paroxística benigna y la neuralgia del trigérnino, al igual que cefaleas secundarias, como el sindrome de Tolosa-Hunt y la cefalea por arteritis temporal.</code> |
|
| 336 |
+
| <code>Tratamiento general de la dermatitis atópica</code> | <code>Tratamiento general de la dermatitis atópica Esteroides tópicos (no sistémicos), terapia PUVA, inmunomoduladores.</code> |
|
| 337 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
| 338 |
+
```json
|
| 339 |
+
{
|
| 340 |
+
"scale": 20.0,
|
| 341 |
+
"similarity_fct": "cos_sim",
|
| 342 |
+
"gather_across_devices": false
|
| 343 |
+
}
|
| 344 |
+
```
|
| 345 |
+
|
| 346 |
+
### Training Hyperparameters
|
| 347 |
+
#### Non-Default Hyperparameters
|
| 348 |
+
|
| 349 |
+
- `eval_strategy`: steps
|
| 350 |
+
- `per_device_train_batch_size`: 32
|
| 351 |
+
- `per_device_eval_batch_size`: 32
|
| 352 |
+
- `learning_rate`: 2e-05
|
| 353 |
+
- `num_train_epochs`: 2
|
| 354 |
+
- `warmup_ratio`: 0.1
|
| 355 |
+
- `fp16`: True
|
| 356 |
+
- `load_best_model_at_end`: True
|
| 357 |
+
|
| 358 |
+
#### All Hyperparameters
|
| 359 |
+
<details><summary>Click to expand</summary>
|
| 360 |
+
|
| 361 |
+
- `overwrite_output_dir`: False
|
| 362 |
+
- `do_predict`: False
|
| 363 |
+
- `eval_strategy`: steps
|
| 364 |
+
- `prediction_loss_only`: True
|
| 365 |
+
- `per_device_train_batch_size`: 32
|
| 366 |
+
- `per_device_eval_batch_size`: 32
|
| 367 |
+
- `per_gpu_train_batch_size`: None
|
| 368 |
+
- `per_gpu_eval_batch_size`: None
|
| 369 |
+
- `gradient_accumulation_steps`: 1
|
| 370 |
+
- `eval_accumulation_steps`: None
|
| 371 |
+
- `torch_empty_cache_steps`: None
|
| 372 |
+
- `learning_rate`: 2e-05
|
| 373 |
+
- `weight_decay`: 0.0
|
| 374 |
+
- `adam_beta1`: 0.9
|
| 375 |
+
- `adam_beta2`: 0.999
|
| 376 |
+
- `adam_epsilon`: 1e-08
|
| 377 |
+
- `max_grad_norm`: 1.0
|
| 378 |
+
- `num_train_epochs`: 2
|
| 379 |
+
- `max_steps`: -1
|
| 380 |
+
- `lr_scheduler_type`: linear
|
| 381 |
+
- `lr_scheduler_kwargs`: {}
|
| 382 |
+
- `warmup_ratio`: 0.1
|
| 383 |
+
- `warmup_steps`: 0
|
| 384 |
+
- `log_level`: passive
|
| 385 |
+
- `log_level_replica`: warning
|
| 386 |
+
- `log_on_each_node`: True
|
| 387 |
+
- `logging_nan_inf_filter`: True
|
| 388 |
+
- `save_safetensors`: True
|
| 389 |
+
- `save_on_each_node`: False
|
| 390 |
+
- `save_only_model`: False
|
| 391 |
+
- `restore_callback_states_from_checkpoint`: False
|
| 392 |
+
- `no_cuda`: False
|
| 393 |
+
- `use_cpu`: False
|
| 394 |
+
- `use_mps_device`: False
|
| 395 |
+
- `seed`: 42
|
| 396 |
+
- `data_seed`: None
|
| 397 |
+
- `jit_mode_eval`: False
|
| 398 |
+
- `bf16`: False
|
| 399 |
+
- `fp16`: True
|
| 400 |
+
- `fp16_opt_level`: O1
|
| 401 |
+
- `half_precision_backend`: auto
|
| 402 |
+
- `bf16_full_eval`: False
|
| 403 |
+
- `fp16_full_eval`: False
|
| 404 |
+
- `tf32`: None
|
| 405 |
+
- `local_rank`: 0
|
| 406 |
+
- `ddp_backend`: None
|
| 407 |
+
- `tpu_num_cores`: None
|
| 408 |
+
- `tpu_metrics_debug`: False
|
| 409 |
+
- `debug`: []
|
| 410 |
+
- `dataloader_drop_last`: False
|
| 411 |
+
- `dataloader_num_workers`: 0
|
| 412 |
+
- `dataloader_prefetch_factor`: None
|
| 413 |
+
- `past_index`: -1
|
| 414 |
+
- `disable_tqdm`: False
|
| 415 |
+
- `remove_unused_columns`: True
|
| 416 |
+
- `label_names`: None
|
| 417 |
+
- `load_best_model_at_end`: True
|
| 418 |
+
- `ignore_data_skip`: False
|
| 419 |
+
- `fsdp`: []
|
| 420 |
+
- `fsdp_min_num_params`: 0
|
| 421 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
| 422 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
| 423 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
| 424 |
+
- `parallelism_config`: None
|
| 425 |
+
- `deepspeed`: None
|
| 426 |
+
- `label_smoothing_factor`: 0.0
|
| 427 |
+
- `optim`: adamw_torch
|
| 428 |
+
- `optim_args`: None
|
| 429 |
+
- `adafactor`: False
|
| 430 |
+
- `group_by_length`: False
|
| 431 |
+
- `length_column_name`: length
|
| 432 |
+
- `project`: huggingface
|
| 433 |
+
- `trackio_space_id`: trackio
|
| 434 |
+
- `ddp_find_unused_parameters`: None
|
| 435 |
+
- `ddp_bucket_cap_mb`: None
|
| 436 |
+
- `ddp_broadcast_buffers`: False
|
| 437 |
+
- `dataloader_pin_memory`: True
|
| 438 |
+
- `dataloader_persistent_workers`: False
|
| 439 |
+
- `skip_memory_metrics`: True
|
| 440 |
+
- `use_legacy_prediction_loop`: False
|
| 441 |
+
- `push_to_hub`: False
|
| 442 |
+
- `resume_from_checkpoint`: None
|
| 443 |
+
- `hub_model_id`: None
|
| 444 |
+
- `hub_strategy`: every_save
|
| 445 |
+
- `hub_private_repo`: None
|
| 446 |
+
- `hub_always_push`: False
|
| 447 |
+
- `hub_revision`: None
|
| 448 |
+
- `gradient_checkpointing`: False
|
| 449 |
+
- `gradient_checkpointing_kwargs`: None
|
| 450 |
+
- `include_inputs_for_metrics`: False
|
| 451 |
+
- `include_for_metrics`: []
|
| 452 |
+
- `eval_do_concat_batches`: True
|
| 453 |
+
- `fp16_backend`: auto
|
| 454 |
+
- `push_to_hub_model_id`: None
|
| 455 |
+
- `push_to_hub_organization`: None
|
| 456 |
+
- `mp_parameters`:
|
| 457 |
+
- `auto_find_batch_size`: False
|
| 458 |
+
- `full_determinism`: False
|
| 459 |
+
- `torchdynamo`: None
|
| 460 |
+
- `ray_scope`: last
|
| 461 |
+
- `ddp_timeout`: 1800
|
| 462 |
+
- `torch_compile`: False
|
| 463 |
+
- `torch_compile_backend`: None
|
| 464 |
+
- `torch_compile_mode`: None
|
| 465 |
+
- `include_tokens_per_second`: False
|
| 466 |
+
- `include_num_input_tokens_seen`: no
|
| 467 |
+
- `neftune_noise_alpha`: None
|
| 468 |
+
- `optim_target_modules`: None
|
| 469 |
+
- `batch_eval_metrics`: False
|
| 470 |
+
- `eval_on_start`: False
|
| 471 |
+
- `use_liger_kernel`: False
|
| 472 |
+
- `liger_kernel_config`: None
|
| 473 |
+
- `eval_use_gather_object`: False
|
| 474 |
+
- `average_tokens_across_devices`: True
|
| 475 |
+
- `prompts`: None
|
| 476 |
+
- `batch_sampler`: batch_sampler
|
| 477 |
+
- `multi_dataset_batch_sampler`: proportional
|
| 478 |
+
- `router_mapping`: {}
|
| 479 |
+
- `learning_rate_mapping`: {}
|
| 480 |
+
|
| 481 |
+
</details>
|
| 482 |
+
|
| 483 |
+
### Training Logs
|
| 484 |
+
| Epoch | Step | Training Loss | Validation Loss | enarm-ir_cosine_ndcg@10 |
|
| 485 |
+
|:------:|:----:|:-------------:|:---------------:|:-----------------------:|
|
| 486 |
+
| 0.0009 | 1 | 0.3939 | - | - |
|
| 487 |
+
| 0.0907 | 100 | 0.1209 | - | - |
|
| 488 |
+
| 0.1813 | 200 | 0.014 | - | - |
|
| 489 |
+
| 0.2720 | 300 | 0.0057 | - | - |
|
| 490 |
+
| 0.3626 | 400 | 0.0047 | - | - |
|
| 491 |
+
| 0.4533 | 500 | 0.0048 | 0.0027 | 0.9629 |
|
| 492 |
+
| 0.5440 | 600 | 0.0038 | - | - |
|
| 493 |
+
| 0.6346 | 700 | 0.0041 | - | - |
|
| 494 |
+
| 0.7253 | 800 | 0.005 | - | - |
|
| 495 |
+
| 0.8160 | 900 | 0.003 | - | - |
|
| 496 |
+
| 0.9066 | 1000 | 0.0019 | 0.0023 | 0.9644 |
|
| 497 |
+
| 0.9973 | 1100 | 0.0021 | - | - |
|
| 498 |
+
| 1.0879 | 1200 | 0.0025 | - | - |
|
| 499 |
+
| 1.1786 | 1300 | 0.0025 | - | - |
|
| 500 |
+
| 1.2693 | 1400 | 0.0025 | - | - |
|
| 501 |
+
| 1.3599 | 1500 | 0.0031 | 0.0021 | 0.9682 |
|
| 502 |
+
| 1.4506 | 1600 | 0.0025 | - | - |
|
| 503 |
+
| 1.5413 | 1700 | 0.0016 | - | - |
|
| 504 |
+
| 1.6319 | 1800 | 0.003 | - | - |
|
| 505 |
+
| 1.7226 | 1900 | 0.0024 | - | - |
|
| 506 |
+
| 1.8132 | 2000 | 0.0024 | 0.0020 | 0.9666 |
|
| 507 |
+
| 1.9039 | 2100 | 0.0037 | - | - |
|
| 508 |
+
| 1.9946 | 2200 | 0.0013 | - | - |
|
| 509 |
+
|
| 510 |
+
|
| 511 |
+
### Framework Versions
|
| 512 |
+
- Python: 3.11.13
|
| 513 |
+
- Sentence Transformers: 5.1.1
|
| 514 |
+
- Transformers: 4.57.1
|
| 515 |
+
- PyTorch: 2.6.0+cu124
|
| 516 |
+
- Accelerate: 1.11.0
|
| 517 |
+
- Datasets: 4.0.0
|
| 518 |
+
- Tokenizers: 0.22.1
|
| 519 |
+
|
| 520 |
+
## Citation
|
| 521 |
+
|
| 522 |
+
### BibTeX
|
| 523 |
+
|
| 524 |
+
#### Sentence Transformers
|
| 525 |
+
```bibtex
|
| 526 |
+
@inproceedings{reimers-2019-sentence-bert,
|
| 527 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
| 528 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
| 529 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
| 530 |
+
month = "11",
|
| 531 |
+
year = "2019",
|
| 532 |
+
publisher = "Association for Computational Linguistics",
|
| 533 |
+
url = "https://arxiv.org/abs/1908.10084",
|
| 534 |
+
}
|
| 535 |
+
```
|
| 536 |
+
|
| 537 |
+
#### MultipleNegativesRankingLoss
|
| 538 |
+
```bibtex
|
| 539 |
+
@misc{henderson2017efficient,
|
| 540 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
| 541 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
| 542 |
+
year={2017},
|
| 543 |
+
eprint={1705.00652},
|
| 544 |
+
archivePrefix={arXiv},
|
| 545 |
+
primaryClass={cs.CL}
|
| 546 |
+
}
|
| 547 |
+
```
|
| 548 |
+
|
| 549 |
+
<!--
|
| 550 |
+
## Glossary
|
| 551 |
+
|
| 552 |
+
*Clearly define terms in order to be accessible across audiences.*
|
| 553 |
+
-->
|
| 554 |
+
|
| 555 |
+
<!--
|
| 556 |
+
## Model Card Authors
|
| 557 |
+
|
| 558 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
| 559 |
+
-->
|
| 560 |
+
|
| 561 |
+
<!--
|
| 562 |
+
## Model Card Contact
|
| 563 |
+
|
| 564 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
| 565 |
+
-->
|
checkpoint-2206/config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"MPNetModel"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"bos_token_id": 0,
|
| 7 |
+
"dtype": "float32",
|
| 8 |
+
"eos_token_id": 2,
|
| 9 |
+
"hidden_act": "gelu",
|
| 10 |
+
"hidden_dropout_prob": 0.1,
|
| 11 |
+
"hidden_size": 768,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 3072,
|
| 14 |
+
"layer_norm_eps": 1e-05,
|
| 15 |
+
"max_position_embeddings": 514,
|
| 16 |
+
"model_type": "mpnet",
|
| 17 |
+
"num_attention_heads": 12,
|
| 18 |
+
"num_hidden_layers": 12,
|
| 19 |
+
"pad_token_id": 1,
|
| 20 |
+
"relative_attention_num_buckets": 32,
|
| 21 |
+
"transformers_version": "4.57.1",
|
| 22 |
+
"vocab_size": 30527
|
| 23 |
+
}
|
checkpoint-2206/config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"__version__": {
|
| 3 |
+
"sentence_transformers": "5.1.1",
|
| 4 |
+
"transformers": "4.57.1",
|
| 5 |
+
"pytorch": "2.6.0+cu124"
|
| 6 |
+
},
|
| 7 |
+
"model_type": "SentenceTransformer",
|
| 8 |
+
"prompts": {
|
| 9 |
+
"query": "",
|
| 10 |
+
"document": ""
|
| 11 |
+
},
|
| 12 |
+
"default_prompt_name": null,
|
| 13 |
+
"similarity_fn_name": "cosine"
|
| 14 |
+
}
|
checkpoint-2206/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f2ad9ebd36fc6ce1a9bf00b14fc21f96a689aaba649a344b23c7ac87d819ca29
|
| 3 |
+
size 437967672
|
checkpoint-2206/modules.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"idx": 2,
|
| 16 |
+
"name": "2",
|
| 17 |
+
"path": "2_Normalize",
|
| 18 |
+
"type": "sentence_transformers.models.Normalize"
|
| 19 |
+
}
|
| 20 |
+
]
|
checkpoint-2206/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:170e4344fbae89c47658174f37ceae005f7d05cf7b06cd081c6d4f7f1a384d9c
|
| 3 |
+
size 871331770
|
checkpoint-2206/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7570fa36c999d04a0075df8bbf927a5bc6a02cb4877caa0a8ee2a7a714d53b80
|
| 3 |
+
size 14244
|
checkpoint-2206/scaler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ab8622b13e4eb873feb5df5eacaf8e84eb368695c24efcc351157b47a2ed4e0
|
| 3 |
+
size 988
|
checkpoint-2206/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:362f5906fbe19f3cb7fe0a549421029b65be1d540b671706e53abdad4b4d47b5
|
| 3 |
+
size 1064
|
checkpoint-2206/sentence_bert_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_seq_length": 384,
|
| 3 |
+
"do_lower_case": false
|
| 4 |
+
}
|
checkpoint-2206/special_tokens_map.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"cls_token": {
|
| 10 |
+
"content": "<s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"eos_token": {
|
| 17 |
+
"content": "</s>",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"mask_token": {
|
| 24 |
+
"content": "<mask>",
|
| 25 |
+
"lstrip": true,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
},
|
| 30 |
+
"pad_token": {
|
| 31 |
+
"content": "<pad>",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false
|
| 36 |
+
},
|
| 37 |
+
"sep_token": {
|
| 38 |
+
"content": "</s>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false
|
| 43 |
+
},
|
| 44 |
+
"unk_token": {
|
| 45 |
+
"content": "[UNK]",
|
| 46 |
+
"lstrip": false,
|
| 47 |
+
"normalized": false,
|
| 48 |
+
"rstrip": false,
|
| 49 |
+
"single_word": false
|
| 50 |
+
}
|
| 51 |
+
}
|
checkpoint-2206/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint-2206/tokenizer_config.json
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<s>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "<pad>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "</s>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "<unk>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": true,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"104": {
|
| 36 |
+
"content": "[UNK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
},
|
| 43 |
+
"30526": {
|
| 44 |
+
"content": "<mask>",
|
| 45 |
+
"lstrip": true,
|
| 46 |
+
"normalized": false,
|
| 47 |
+
"rstrip": false,
|
| 48 |
+
"single_word": false,
|
| 49 |
+
"special": true
|
| 50 |
+
}
|
| 51 |
+
},
|
| 52 |
+
"bos_token": "<s>",
|
| 53 |
+
"clean_up_tokenization_spaces": false,
|
| 54 |
+
"cls_token": "<s>",
|
| 55 |
+
"do_lower_case": true,
|
| 56 |
+
"eos_token": "</s>",
|
| 57 |
+
"extra_special_tokens": {},
|
| 58 |
+
"mask_token": "<mask>",
|
| 59 |
+
"max_length": 128,
|
| 60 |
+
"model_max_length": 384,
|
| 61 |
+
"pad_to_multiple_of": null,
|
| 62 |
+
"pad_token": "<pad>",
|
| 63 |
+
"pad_token_type_id": 0,
|
| 64 |
+
"padding_side": "right",
|
| 65 |
+
"sep_token": "</s>",
|
| 66 |
+
"stride": 0,
|
| 67 |
+
"strip_accents": null,
|
| 68 |
+
"tokenize_chinese_chars": true,
|
| 69 |
+
"tokenizer_class": "MPNetTokenizer",
|
| 70 |
+
"truncation_side": "right",
|
| 71 |
+
"truncation_strategy": "longest_first",
|
| 72 |
+
"unk_token": "[UNK]"
|
| 73 |
+
}
|
checkpoint-2206/trainer_state.json
ADDED
|
@@ -0,0 +1,287 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 1500,
|
| 3 |
+
"best_metric": 0.9681782842497483,
|
| 4 |
+
"best_model_checkpoint": "models/enarm-mpnet-v2\\checkpoint-1500",
|
| 5 |
+
"epoch": 2.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 2206,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.0009066183136899365,
|
| 14 |
+
"grad_norm": 9.670710563659668,
|
| 15 |
+
"learning_rate": 0.0,
|
| 16 |
+
"loss": 0.3939,
|
| 17 |
+
"step": 1
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 0.09066183136899365,
|
| 21 |
+
"grad_norm": 0.3970909118652344,
|
| 22 |
+
"learning_rate": 8.95927601809955e-06,
|
| 23 |
+
"loss": 0.1209,
|
| 24 |
+
"step": 100
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"epoch": 0.1813236627379873,
|
| 28 |
+
"grad_norm": 0.46869727969169617,
|
| 29 |
+
"learning_rate": 1.8009049773755657e-05,
|
| 30 |
+
"loss": 0.014,
|
| 31 |
+
"step": 200
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 0.271985494106981,
|
| 35 |
+
"grad_norm": 0.05942446365952492,
|
| 36 |
+
"learning_rate": 1.9214105793450883e-05,
|
| 37 |
+
"loss": 0.0057,
|
| 38 |
+
"step": 300
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"epoch": 0.3626473254759746,
|
| 42 |
+
"grad_norm": 0.1977270245552063,
|
| 43 |
+
"learning_rate": 1.820654911838791e-05,
|
| 44 |
+
"loss": 0.0047,
|
| 45 |
+
"step": 400
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"epoch": 0.45330915684496825,
|
| 49 |
+
"grad_norm": 0.05510423332452774,
|
| 50 |
+
"learning_rate": 1.720906801007557e-05,
|
| 51 |
+
"loss": 0.0048,
|
| 52 |
+
"step": 500
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"epoch": 0.45330915684496825,
|
| 56 |
+
"eval_enarm-ir_cosine_accuracy@1": 0.9020408163265307,
|
| 57 |
+
"eval_enarm-ir_cosine_accuracy@10": 1.0,
|
| 58 |
+
"eval_enarm-ir_cosine_accuracy@3": 0.9987244897959183,
|
| 59 |
+
"eval_enarm-ir_cosine_accuracy@5": 1.0,
|
| 60 |
+
"eval_enarm-ir_cosine_map@100": 0.9498681972789115,
|
| 61 |
+
"eval_enarm-ir_cosine_mrr@10": 0.9498681972789118,
|
| 62 |
+
"eval_enarm-ir_cosine_ndcg@10": 0.9629337873914321,
|
| 63 |
+
"eval_enarm-ir_cosine_precision@1": 0.9020408163265307,
|
| 64 |
+
"eval_enarm-ir_cosine_precision@10": 0.10000000000000003,
|
| 65 |
+
"eval_enarm-ir_cosine_precision@3": 0.3329081632653061,
|
| 66 |
+
"eval_enarm-ir_cosine_precision@5": 0.20000000000000007,
|
| 67 |
+
"eval_enarm-ir_cosine_recall@1": 0.9020408163265307,
|
| 68 |
+
"eval_enarm-ir_cosine_recall@10": 1.0,
|
| 69 |
+
"eval_enarm-ir_cosine_recall@3": 0.9987244897959183,
|
| 70 |
+
"eval_enarm-ir_cosine_recall@5": 1.0,
|
| 71 |
+
"eval_loss": 0.002733997767791152,
|
| 72 |
+
"eval_runtime": 69.6943,
|
| 73 |
+
"eval_samples_per_second": 56.246,
|
| 74 |
+
"eval_steps_per_second": 1.765,
|
| 75 |
+
"step": 500
|
| 76 |
+
},
|
| 77 |
+
{
|
| 78 |
+
"epoch": 0.543970988213962,
|
| 79 |
+
"grad_norm": 0.07771875709295273,
|
| 80 |
+
"learning_rate": 1.6201511335012597e-05,
|
| 81 |
+
"loss": 0.0038,
|
| 82 |
+
"step": 600
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"epoch": 0.6346328195829556,
|
| 86 |
+
"grad_norm": 0.027955936267971992,
|
| 87 |
+
"learning_rate": 1.5193954659949624e-05,
|
| 88 |
+
"loss": 0.0041,
|
| 89 |
+
"step": 700
|
| 90 |
+
},
|
| 91 |
+
{
|
| 92 |
+
"epoch": 0.7252946509519492,
|
| 93 |
+
"grad_norm": 0.009821675717830658,
|
| 94 |
+
"learning_rate": 1.418639798488665e-05,
|
| 95 |
+
"loss": 0.005,
|
| 96 |
+
"step": 800
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"epoch": 0.8159564823209429,
|
| 100 |
+
"grad_norm": 0.7323502898216248,
|
| 101 |
+
"learning_rate": 1.3178841309823678e-05,
|
| 102 |
+
"loss": 0.003,
|
| 103 |
+
"step": 900
|
| 104 |
+
},
|
| 105 |
+
{
|
| 106 |
+
"epoch": 0.9066183136899365,
|
| 107 |
+
"grad_norm": 0.9905967712402344,
|
| 108 |
+
"learning_rate": 1.2171284634760707e-05,
|
| 109 |
+
"loss": 0.0019,
|
| 110 |
+
"step": 1000
|
| 111 |
+
},
|
| 112 |
+
{
|
| 113 |
+
"epoch": 0.9066183136899365,
|
| 114 |
+
"eval_enarm-ir_cosine_accuracy@1": 0.9056122448979592,
|
| 115 |
+
"eval_enarm-ir_cosine_accuracy@10": 1.0,
|
| 116 |
+
"eval_enarm-ir_cosine_accuracy@3": 0.999234693877551,
|
| 117 |
+
"eval_enarm-ir_cosine_accuracy@5": 1.0,
|
| 118 |
+
"eval_enarm-ir_cosine_map@100": 0.9518069727891155,
|
| 119 |
+
"eval_enarm-ir_cosine_mrr@10": 0.951806972789116,
|
| 120 |
+
"eval_enarm-ir_cosine_ndcg@10": 0.9643764244686448,
|
| 121 |
+
"eval_enarm-ir_cosine_precision@1": 0.9056122448979592,
|
| 122 |
+
"eval_enarm-ir_cosine_precision@10": 0.10000000000000003,
|
| 123 |
+
"eval_enarm-ir_cosine_precision@3": 0.33307823129251696,
|
| 124 |
+
"eval_enarm-ir_cosine_precision@5": 0.20000000000000007,
|
| 125 |
+
"eval_enarm-ir_cosine_recall@1": 0.9056122448979592,
|
| 126 |
+
"eval_enarm-ir_cosine_recall@10": 1.0,
|
| 127 |
+
"eval_enarm-ir_cosine_recall@3": 0.999234693877551,
|
| 128 |
+
"eval_enarm-ir_cosine_recall@5": 1.0,
|
| 129 |
+
"eval_loss": 0.002290277509018779,
|
| 130 |
+
"eval_runtime": 75.2569,
|
| 131 |
+
"eval_samples_per_second": 52.088,
|
| 132 |
+
"eval_steps_per_second": 1.634,
|
| 133 |
+
"step": 1000
|
| 134 |
+
},
|
| 135 |
+
{
|
| 136 |
+
"epoch": 0.9972801450589301,
|
| 137 |
+
"grad_norm": 0.05818323418498039,
|
| 138 |
+
"learning_rate": 1.1163727959697735e-05,
|
| 139 |
+
"loss": 0.0021,
|
| 140 |
+
"step": 1100
|
| 141 |
+
},
|
| 142 |
+
{
|
| 143 |
+
"epoch": 1.087941976427924,
|
| 144 |
+
"grad_norm": 0.12362250685691833,
|
| 145 |
+
"learning_rate": 1.0156171284634761e-05,
|
| 146 |
+
"loss": 0.0025,
|
| 147 |
+
"step": 1200
|
| 148 |
+
},
|
| 149 |
+
{
|
| 150 |
+
"epoch": 1.1786038077969174,
|
| 151 |
+
"grad_norm": 0.02448936738073826,
|
| 152 |
+
"learning_rate": 9.14861460957179e-06,
|
| 153 |
+
"loss": 0.0025,
|
| 154 |
+
"step": 1300
|
| 155 |
+
},
|
| 156 |
+
{
|
| 157 |
+
"epoch": 1.2692656391659112,
|
| 158 |
+
"grad_norm": 0.2795725166797638,
|
| 159 |
+
"learning_rate": 8.141057934508818e-06,
|
| 160 |
+
"loss": 0.0025,
|
| 161 |
+
"step": 1400
|
| 162 |
+
},
|
| 163 |
+
{
|
| 164 |
+
"epoch": 1.3599274705349047,
|
| 165 |
+
"grad_norm": 0.028418799862265587,
|
| 166 |
+
"learning_rate": 7.133501259445844e-06,
|
| 167 |
+
"loss": 0.0031,
|
| 168 |
+
"step": 1500
|
| 169 |
+
},
|
| 170 |
+
{
|
| 171 |
+
"epoch": 1.3599274705349047,
|
| 172 |
+
"eval_enarm-ir_cosine_accuracy@1": 0.9150510204081632,
|
| 173 |
+
"eval_enarm-ir_cosine_accuracy@10": 1.0,
|
| 174 |
+
"eval_enarm-ir_cosine_accuracy@3": 0.9994897959183674,
|
| 175 |
+
"eval_enarm-ir_cosine_accuracy@5": 1.0,
|
| 176 |
+
"eval_enarm-ir_cosine_map@100": 0.9569302721088435,
|
| 177 |
+
"eval_enarm-ir_cosine_mrr@10": 0.9569302721088436,
|
| 178 |
+
"eval_enarm-ir_cosine_ndcg@10": 0.9681782842497483,
|
| 179 |
+
"eval_enarm-ir_cosine_precision@1": 0.9150510204081632,
|
| 180 |
+
"eval_enarm-ir_cosine_precision@10": 0.10000000000000003,
|
| 181 |
+
"eval_enarm-ir_cosine_precision@3": 0.3331632653061224,
|
| 182 |
+
"eval_enarm-ir_cosine_precision@5": 0.20000000000000007,
|
| 183 |
+
"eval_enarm-ir_cosine_recall@1": 0.9150510204081632,
|
| 184 |
+
"eval_enarm-ir_cosine_recall@10": 1.0,
|
| 185 |
+
"eval_enarm-ir_cosine_recall@3": 0.9994897959183674,
|
| 186 |
+
"eval_enarm-ir_cosine_recall@5": 1.0,
|
| 187 |
+
"eval_loss": 0.0020953374914824963,
|
| 188 |
+
"eval_runtime": 65.9618,
|
| 189 |
+
"eval_samples_per_second": 59.428,
|
| 190 |
+
"eval_steps_per_second": 1.865,
|
| 191 |
+
"step": 1500
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 1.4505893019038985,
|
| 195 |
+
"grad_norm": 0.5629938840866089,
|
| 196 |
+
"learning_rate": 6.1259445843828725e-06,
|
| 197 |
+
"loss": 0.0025,
|
| 198 |
+
"step": 1600
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 1.5412511332728922,
|
| 202 |
+
"grad_norm": 0.060413144528865814,
|
| 203 |
+
"learning_rate": 5.1183879093199e-06,
|
| 204 |
+
"loss": 0.0016,
|
| 205 |
+
"step": 1700
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 1.6319129646418857,
|
| 209 |
+
"grad_norm": 0.04670459404587746,
|
| 210 |
+
"learning_rate": 4.110831234256927e-06,
|
| 211 |
+
"loss": 0.003,
|
| 212 |
+
"step": 1800
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 1.7225747960108793,
|
| 216 |
+
"grad_norm": 0.6066415309906006,
|
| 217 |
+
"learning_rate": 3.103274559193955e-06,
|
| 218 |
+
"loss": 0.0024,
|
| 219 |
+
"step": 1900
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 1.8132366273798732,
|
| 223 |
+
"grad_norm": 0.03934232518076897,
|
| 224 |
+
"learning_rate": 2.0957178841309824e-06,
|
| 225 |
+
"loss": 0.0024,
|
| 226 |
+
"step": 2000
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 1.8132366273798732,
|
| 230 |
+
"eval_enarm-ir_cosine_accuracy@1": 0.9112244897959184,
|
| 231 |
+
"eval_enarm-ir_cosine_accuracy@10": 1.0,
|
| 232 |
+
"eval_enarm-ir_cosine_accuracy@3": 0.999234693877551,
|
| 233 |
+
"eval_enarm-ir_cosine_accuracy@5": 1.0,
|
| 234 |
+
"eval_enarm-ir_cosine_map@100": 0.9548256802721089,
|
| 235 |
+
"eval_enarm-ir_cosine_mrr@10": 0.954825680272109,
|
| 236 |
+
"eval_enarm-ir_cosine_ndcg@10": 0.9666147393128501,
|
| 237 |
+
"eval_enarm-ir_cosine_precision@1": 0.9112244897959184,
|
| 238 |
+
"eval_enarm-ir_cosine_precision@10": 0.10000000000000003,
|
| 239 |
+
"eval_enarm-ir_cosine_precision@3": 0.33307823129251696,
|
| 240 |
+
"eval_enarm-ir_cosine_precision@5": 0.20000000000000007,
|
| 241 |
+
"eval_enarm-ir_cosine_recall@1": 0.9112244897959184,
|
| 242 |
+
"eval_enarm-ir_cosine_recall@10": 1.0,
|
| 243 |
+
"eval_enarm-ir_cosine_recall@3": 0.999234693877551,
|
| 244 |
+
"eval_enarm-ir_cosine_recall@5": 1.0,
|
| 245 |
+
"eval_loss": 0.001998345600441098,
|
| 246 |
+
"eval_runtime": 66.1757,
|
| 247 |
+
"eval_samples_per_second": 59.236,
|
| 248 |
+
"eval_steps_per_second": 1.859,
|
| 249 |
+
"step": 2000
|
| 250 |
+
},
|
| 251 |
+
{
|
| 252 |
+
"epoch": 1.9038984587488668,
|
| 253 |
+
"grad_norm": 0.020388886332511902,
|
| 254 |
+
"learning_rate": 1.0881612090680102e-06,
|
| 255 |
+
"loss": 0.0037,
|
| 256 |
+
"step": 2100
|
| 257 |
+
},
|
| 258 |
+
{
|
| 259 |
+
"epoch": 1.9945602901178603,
|
| 260 |
+
"grad_norm": 0.02650429680943489,
|
| 261 |
+
"learning_rate": 8.060453400503778e-08,
|
| 262 |
+
"loss": 0.0013,
|
| 263 |
+
"step": 2200
|
| 264 |
+
}
|
| 265 |
+
],
|
| 266 |
+
"logging_steps": 100,
|
| 267 |
+
"max_steps": 2206,
|
| 268 |
+
"num_input_tokens_seen": 0,
|
| 269 |
+
"num_train_epochs": 2,
|
| 270 |
+
"save_steps": 500,
|
| 271 |
+
"stateful_callbacks": {
|
| 272 |
+
"TrainerControl": {
|
| 273 |
+
"args": {
|
| 274 |
+
"should_epoch_stop": false,
|
| 275 |
+
"should_evaluate": false,
|
| 276 |
+
"should_log": false,
|
| 277 |
+
"should_save": true,
|
| 278 |
+
"should_training_stop": true
|
| 279 |
+
},
|
| 280 |
+
"attributes": {}
|
| 281 |
+
}
|
| 282 |
+
},
|
| 283 |
+
"total_flos": 0.0,
|
| 284 |
+
"train_batch_size": 32,
|
| 285 |
+
"trial_name": null,
|
| 286 |
+
"trial_params": null
|
| 287 |
+
}
|
checkpoint-2206/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:46c00947826242ae61ca80531e511dbebe62d6dbf43d17a3c6b8a55f3ed6a086
|
| 3 |
+
size 5688
|
checkpoint-2206/vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"MPNetModel"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"bos_token_id": 0,
|
| 7 |
+
"dtype": "float32",
|
| 8 |
+
"eos_token_id": 2,
|
| 9 |
+
"hidden_act": "gelu",
|
| 10 |
+
"hidden_dropout_prob": 0.1,
|
| 11 |
+
"hidden_size": 768,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 3072,
|
| 14 |
+
"layer_norm_eps": 1e-05,
|
| 15 |
+
"max_position_embeddings": 514,
|
| 16 |
+
"model_type": "mpnet",
|
| 17 |
+
"num_attention_heads": 12,
|
| 18 |
+
"num_hidden_layers": 12,
|
| 19 |
+
"pad_token_id": 1,
|
| 20 |
+
"relative_attention_num_buckets": 32,
|
| 21 |
+
"transformers_version": "4.57.1",
|
| 22 |
+
"vocab_size": 30527
|
| 23 |
+
}
|
config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"__version__": {
|
| 3 |
+
"sentence_transformers": "5.1.1",
|
| 4 |
+
"transformers": "4.57.1",
|
| 5 |
+
"pytorch": "2.6.0+cu124"
|
| 6 |
+
},
|
| 7 |
+
"model_type": "SentenceTransformer",
|
| 8 |
+
"prompts": {
|
| 9 |
+
"query": "",
|
| 10 |
+
"document": ""
|
| 11 |
+
},
|
| 12 |
+
"default_prompt_name": null,
|
| 13 |
+
"similarity_fn_name": "cosine"
|
| 14 |
+
}
|
eval/Information-Retrieval_evaluation_enarm-ir_results.csv
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accuracy@10,cosine-Precision@1,cosine-Recall@1,cosine-Precision@3,cosine-Recall@3,cosine-Precision@5,cosine-Recall@5,cosine-Precision@10,cosine-Recall@10,cosine-MRR@10,cosine-NDCG@10,cosine-MAP@100
|
| 2 |
+
0.45330915684496825,500,0.9020408163265307,0.9987244897959183,1.0,1.0,0.9020408163265307,0.9020408163265307,0.3329081632653061,0.9987244897959183,0.20000000000000007,1.0,0.10000000000000003,1.0,0.9498681972789118,0.9629337873914321,0.9498681972789115
|
| 3 |
+
0.45330915684496825,500,0.9020408163265307,0.9987244897959183,1.0,1.0,0.9020408163265307,0.9020408163265307,0.3329081632653061,0.9987244897959183,0.20000000000000007,1.0,0.10000000000000003,1.0,0.9498681972789118,0.9629337873914321,0.9498681972789115
|
| 4 |
+
0.45330915684496825,500,0.9020408163265307,0.9987244897959183,1.0,1.0,0.9020408163265307,0.9020408163265307,0.3329081632653061,0.9987244897959183,0.20000000000000007,1.0,0.10000000000000003,1.0,0.9498681972789118,0.9629337873914321,0.9498681972789115
|
| 5 |
+
0.9066183136899365,1000,0.9056122448979592,0.999234693877551,1.0,1.0,0.9056122448979592,0.9056122448979592,0.33307823129251696,0.999234693877551,0.20000000000000007,1.0,0.10000000000000003,1.0,0.951806972789116,0.9643764244686448,0.9518069727891155
|
| 6 |
+
1.3599274705349047,1500,0.9150510204081632,0.9994897959183674,1.0,1.0,0.9150510204081632,0.9150510204081632,0.3331632653061224,0.9994897959183674,0.20000000000000007,1.0,0.10000000000000003,1.0,0.9569302721088436,0.9681782842497483,0.9569302721088435
|
| 7 |
+
1.8132366273798732,2000,0.9112244897959184,0.999234693877551,1.0,1.0,0.9112244897959184,0.9112244897959184,0.33307823129251696,0.999234693877551,0.20000000000000007,1.0,0.10000000000000003,1.0,0.954825680272109,0.9666147393128501,0.9548256802721089
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:571f9278950d04c5f38439e851971c56692b2a917856f9359039f7ae51b49d10
|
| 3 |
+
size 437967672
|
modules.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"idx": 2,
|
| 16 |
+
"name": "2",
|
| 17 |
+
"path": "2_Normalize",
|
| 18 |
+
"type": "sentence_transformers.models.Normalize"
|
| 19 |
+
}
|
| 20 |
+
]
|
sentence_bert_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_seq_length": 384,
|
| 3 |
+
"do_lower_case": false
|
| 4 |
+
}
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"cls_token": {
|
| 10 |
+
"content": "<s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"eos_token": {
|
| 17 |
+
"content": "</s>",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"mask_token": {
|
| 24 |
+
"content": "<mask>",
|
| 25 |
+
"lstrip": true,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
},
|
| 30 |
+
"pad_token": {
|
| 31 |
+
"content": "<pad>",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false
|
| 36 |
+
},
|
| 37 |
+
"sep_token": {
|
| 38 |
+
"content": "</s>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false
|
| 43 |
+
},
|
| 44 |
+
"unk_token": {
|
| 45 |
+
"content": "[UNK]",
|
| 46 |
+
"lstrip": false,
|
| 47 |
+
"normalized": false,
|
| 48 |
+
"rstrip": false,
|
| 49 |
+
"single_word": false
|
| 50 |
+
}
|
| 51 |
+
}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<s>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "<pad>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "</s>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "<unk>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": true,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"104": {
|
| 36 |
+
"content": "[UNK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
},
|
| 43 |
+
"30526": {
|
| 44 |
+
"content": "<mask>",
|
| 45 |
+
"lstrip": true,
|
| 46 |
+
"normalized": false,
|
| 47 |
+
"rstrip": false,
|
| 48 |
+
"single_word": false,
|
| 49 |
+
"special": true
|
| 50 |
+
}
|
| 51 |
+
},
|
| 52 |
+
"bos_token": "<s>",
|
| 53 |
+
"clean_up_tokenization_spaces": false,
|
| 54 |
+
"cls_token": "<s>",
|
| 55 |
+
"do_lower_case": true,
|
| 56 |
+
"eos_token": "</s>",
|
| 57 |
+
"extra_special_tokens": {},
|
| 58 |
+
"mask_token": "<mask>",
|
| 59 |
+
"max_length": 128,
|
| 60 |
+
"model_max_length": 384,
|
| 61 |
+
"pad_to_multiple_of": null,
|
| 62 |
+
"pad_token": "<pad>",
|
| 63 |
+
"pad_token_type_id": 0,
|
| 64 |
+
"padding_side": "right",
|
| 65 |
+
"sep_token": "</s>",
|
| 66 |
+
"stride": 0,
|
| 67 |
+
"strip_accents": null,
|
| 68 |
+
"tokenize_chinese_chars": true,
|
| 69 |
+
"tokenizer_class": "MPNetTokenizer",
|
| 70 |
+
"truncation_side": "right",
|
| 71 |
+
"truncation_strategy": "longest_first",
|
| 72 |
+
"unk_token": "[UNK]"
|
| 73 |
+
}
|
training_info.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"base_model": "sentence-transformers/all-mpnet-base-v2",
|
| 3 |
+
"output_dir": "models/enarm-mpnet-v2",
|
| 4 |
+
"train_examples": 35280,
|
| 5 |
+
"eval_examples": 3920,
|
| 6 |
+
"epochs": 2,
|
| 7 |
+
"batch_size": 32,
|
| 8 |
+
"learning_rate": 2e-05,
|
| 9 |
+
"final_metrics": {
|
| 10 |
+
"enarm-ir_cosine_accuracy@1": 0.9150510204081632,
|
| 11 |
+
"enarm-ir_cosine_accuracy@3": 0.9994897959183674,
|
| 12 |
+
"enarm-ir_cosine_accuracy@5": 1.0,
|
| 13 |
+
"enarm-ir_cosine_accuracy@10": 1.0,
|
| 14 |
+
"enarm-ir_cosine_precision@1": 0.9150510204081632,
|
| 15 |
+
"enarm-ir_cosine_precision@3": 0.3331632653061224,
|
| 16 |
+
"enarm-ir_cosine_precision@5": 0.20000000000000007,
|
| 17 |
+
"enarm-ir_cosine_precision@10": 0.10000000000000003,
|
| 18 |
+
"enarm-ir_cosine_recall@1": 0.9150510204081632,
|
| 19 |
+
"enarm-ir_cosine_recall@3": 0.9994897959183674,
|
| 20 |
+
"enarm-ir_cosine_recall@5": 1.0,
|
| 21 |
+
"enarm-ir_cosine_recall@10": 1.0,
|
| 22 |
+
"enarm-ir_cosine_ndcg@10": 0.9681782842497483,
|
| 23 |
+
"enarm-ir_cosine_mrr@10": 0.9569302721088436,
|
| 24 |
+
"enarm-ir_cosine_map@100": 0.9569302721088435
|
| 25 |
+
},
|
| 26 |
+
"timestamp": "2026-01-01 03:51:49.380086"
|
| 27 |
+
}
|
vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|