Fine-tuned product classifier: 62% accuracy, +34 points improvement
Browse files- 1_Pooling/config.json +10 -0
- 2_Dense/config.json +6 -0
- 2_Dense/model.safetensors +3 -0
- README.md +393 -0
- config.json +25 -0
- config_sentence_transformers.json +14 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +67 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word_embedding_dimension": 768,
|
| 3 |
+
"pooling_mode_cls_token": false,
|
| 4 |
+
"pooling_mode_mean_tokens": true,
|
| 5 |
+
"pooling_mode_max_tokens": false,
|
| 6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
| 7 |
+
"pooling_mode_weightedmean_tokens": false,
|
| 8 |
+
"pooling_mode_lasttoken": false,
|
| 9 |
+
"include_prompt": true
|
| 10 |
+
}
|
2_Dense/config.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"in_features": 768,
|
| 3 |
+
"out_features": 512,
|
| 4 |
+
"bias": true,
|
| 5 |
+
"activation_function": "torch.nn.modules.activation.Tanh"
|
| 6 |
+
}
|
2_Dense/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:92e85b7bb07b644d41717c4ed16958d73e42ed80914a01e506974e80342bff86
|
| 3 |
+
size 1575072
|
README.md
ADDED
|
@@ -0,0 +1,393 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- sentence-transformers
|
| 4 |
+
- sentence-similarity
|
| 5 |
+
- feature-extraction
|
| 6 |
+
- dense
|
| 7 |
+
- generated_from_trainer
|
| 8 |
+
- dataset_size:3324
|
| 9 |
+
- loss:MultipleNegativesRankingLoss
|
| 10 |
+
base_model: sentence-transformers/distiluse-base-multilingual-cased-v2
|
| 11 |
+
widget:
|
| 12 |
+
- source_sentence: Pizarra acústica de guitarra Dunlop T213C negra - Top plate de
|
| 13 |
+
reemplazo para guitarras acústicas, fabricada en madera maciza (arce), con perforaciones
|
| 14 |
+
para mejor resonancia y acabado negro mate
|
| 15 |
+
sentences:
|
| 16 |
+
- Accesorios para instrumentos musicales / Musical instrument accessories
|
| 17 |
+
- Personal Care / Aseo
|
| 18 |
+
- Sistema limpiaparabrisas / Windshield wiper system
|
| 19 |
+
- source_sentence: Hisense 40" Clase FHD (1080P) Roku Smart LED TV (40H4030F1) - Hisense
|
| 20 |
+
40" Clase 1080P FHD LED Roku Smart TV 40H4030F1
|
| 21 |
+
sentences:
|
| 22 |
+
- Olla arrocera / rice cooker
|
| 23 |
+
- Estufa / Stove/Cooktop
|
| 24 |
+
- TV (-) 32 ''
|
| 25 |
+
- source_sentence: 'Reductor de Bushing de 3/4 in. x 1/2 in. PVC Schedule 40 - Las
|
| 26 |
+
tuberías y accesorios de presión de PVC Schedule 40 se utilizan en sistemas de
|
| 27 |
+
riego, aspersores subterráneos, piscinas, aplicaciones al aire libre y líneas
|
| 28 |
+
de suministro de agua fría. Estos accesorios son altamente resistentes, con alta
|
| 29 |
+
resistencia a la tracción y al impacto. Diseñados para uso bajo presión donde
|
| 30 |
+
los sistemas no superen los 140° F. Cumplen con las Normas: ASTM D 1784, ASTM
|
| 31 |
+
D 2466, NSF 14 y 61PVC Schedule 40. Tienen mejores propiedades de reducción de
|
| 32 |
+
sonido que el PVC Schedule 40 DWV Foam Core y el ABS Foam Core. La instalación
|
| 33 |
+
requiere el uso de imprimación y cemento solvente. Todos los productos Charlotte
|
| 34 |
+
Pipe se fabrican en los EE. UU.'
|
| 35 |
+
sentences:
|
| 36 |
+
- Base de TV / TV base
|
| 37 |
+
- Manufacturas para la construcción/ Manufactures for construction use
|
| 38 |
+
- Leather goods. (Briefcases, suitcases, portfolios.) / Artículos de talabartería.(Maletines,
|
| 39 |
+
maletas, portafolios. )
|
| 40 |
+
- source_sentence: Liebherr WCE81769BX Motor de Compresión con Inversor para Nevera
|
| 41 |
+
Combi, Tensión Dual (220-240V/50Hz) - 100% Reutilizable y Garantía Vida - Motor
|
| 42 |
+
de alta eficiencia y durabilidad diseñado específicamente para neveras y congeladores
|
| 43 |
+
Liebherr de gama media-alta. Este motor de compresión invertible con tensión dual
|
| 44 |
+
(220-240V/50Hz) está optimizado para reducir el consumo energético en hasta un
|
| 45 |
+
30% frente a modelos convencionales, garantizando un funcionamiento silencioso
|
| 46 |
+
(≤42dB) y una capacidad de refrigeración constante. Compatible con unidades de
|
| 47 |
+
neveras tipo 'Combi' de gran tamaño, como los modelos WCS5791 o WCF6589. Incluye
|
| 48 |
+
certificación ecológica A+++ por eficiencia energética y viene empaquetado en
|
| 49 |
+
caja hermética para evitar daños durante el transporte o almacenaje prolongado.
|
| 50 |
+
Ideal para reemplazo en reparaciones profesionales, con garantía de vida útil
|
| 51 |
+
del equipo original (hasta 10 años según uso). Diseño modular que permite instalación
|
| 52 |
+
rápida sin necesidad de herramientas especializadas
|
| 53 |
+
sentences:
|
| 54 |
+
- Piezas de repuesto de motos / Motorcycle spare parts
|
| 55 |
+
- Alternador / Alternator
|
| 56 |
+
- Motores de equipos electrodomésticos / Motors for household appliances
|
| 57 |
+
- source_sentence: Audífono Widex Moment A-RIC Plus Rechargeable - Talla Grande (RIC)
|
| 58 |
+
- Audífono intraauricular receptor en el canal (RIC) con tecnología Sound Recognizer
|
| 59 |
+
para optimizar sonidos importantes. Incluye cargador magnético, aplicación móvil
|
| 60 |
+
y compatibilidad con wearables como iOS/Android. Recomendado para adultos con
|
| 61 |
+
pérdida auditiva leve a moderada
|
| 62 |
+
sentences:
|
| 63 |
+
- Piezas de repuesto de motos / Motorcycle spare parts
|
| 64 |
+
- Caja de seguridad / Safe deposit box
|
| 65 |
+
- Audifono / Hearing Aids
|
| 66 |
+
pipeline_tag: sentence-similarity
|
| 67 |
+
library_name: sentence-transformers
|
| 68 |
+
---
|
| 69 |
+
|
| 70 |
+
# SentenceTransformer based on sentence-transformers/distiluse-base-multilingual-cased-v2
|
| 71 |
+
|
| 72 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/distiluse-base-multilingual-cased-v2](https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2). It maps sentences & paragraphs to a 512-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
| 73 |
+
|
| 74 |
+
## Model Details
|
| 75 |
+
|
| 76 |
+
### Model Description
|
| 77 |
+
- **Model Type:** Sentence Transformer
|
| 78 |
+
- **Base model:** [sentence-transformers/distiluse-base-multilingual-cased-v2](https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2) <!-- at revision bfe45d0732ca50787611c0fe107ba278c7f3f889 -->
|
| 79 |
+
- **Maximum Sequence Length:** 128 tokens
|
| 80 |
+
- **Output Dimensionality:** 512 dimensions
|
| 81 |
+
- **Similarity Function:** Cosine Similarity
|
| 82 |
+
<!-- - **Training Dataset:** Unknown -->
|
| 83 |
+
<!-- - **Language:** Unknown -->
|
| 84 |
+
<!-- - **License:** Unknown -->
|
| 85 |
+
|
| 86 |
+
### Model Sources
|
| 87 |
+
|
| 88 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
| 89 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/huggingface/sentence-transformers)
|
| 90 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
| 91 |
+
|
| 92 |
+
### Full Model Architecture
|
| 93 |
+
|
| 94 |
+
```
|
| 95 |
+
SentenceTransformer(
|
| 96 |
+
(0): Transformer({'max_seq_length': 128, 'do_lower_case': False, 'architecture': 'DistilBertModel'})
|
| 97 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
| 98 |
+
(2): Dense({'in_features': 768, 'out_features': 512, 'bias': True, 'activation_function': 'torch.nn.modules.activation.Tanh'})
|
| 99 |
+
)
|
| 100 |
+
```
|
| 101 |
+
|
| 102 |
+
## Usage
|
| 103 |
+
|
| 104 |
+
### Direct Usage (Sentence Transformers)
|
| 105 |
+
|
| 106 |
+
First install the Sentence Transformers library:
|
| 107 |
+
|
| 108 |
+
```bash
|
| 109 |
+
pip install -U sentence-transformers
|
| 110 |
+
```
|
| 111 |
+
|
| 112 |
+
Then you can load this model and run inference.
|
| 113 |
+
```python
|
| 114 |
+
from sentence_transformers import SentenceTransformer
|
| 115 |
+
|
| 116 |
+
# Download from the 🤗 Hub
|
| 117 |
+
model = SentenceTransformer("roig/compass-product-classifier")
|
| 118 |
+
# Run inference
|
| 119 |
+
sentences = [
|
| 120 |
+
'Audífono Widex Moment A-RIC Plus Rechargeable - Talla Grande (RIC) - Audífono intraauricular receptor en el canal (RIC) con tecnología Sound Recognizer para optimizar sonidos importantes. Incluye cargador magnético, aplicación móvil y compatibilidad con wearables como iOS/Android. Recomendado para adultos con pérdida auditiva leve a moderada',
|
| 121 |
+
'Audifono / Hearing Aids',
|
| 122 |
+
'Piezas de repuesto de motos / Motorcycle spare parts',
|
| 123 |
+
]
|
| 124 |
+
embeddings = model.encode(sentences)
|
| 125 |
+
print(embeddings.shape)
|
| 126 |
+
# [3, 512]
|
| 127 |
+
|
| 128 |
+
# Get the similarity scores for the embeddings
|
| 129 |
+
similarities = model.similarity(embeddings, embeddings)
|
| 130 |
+
print(similarities)
|
| 131 |
+
# tensor([[ 1.0000, 0.7342, 0.0265],
|
| 132 |
+
# [ 0.7342, 1.0000, -0.0021],
|
| 133 |
+
# [ 0.0265, -0.0021, 1.0000]])
|
| 134 |
+
```
|
| 135 |
+
|
| 136 |
+
<!--
|
| 137 |
+
### Direct Usage (Transformers)
|
| 138 |
+
|
| 139 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
| 140 |
+
|
| 141 |
+
</details>
|
| 142 |
+
-->
|
| 143 |
+
|
| 144 |
+
<!--
|
| 145 |
+
### Downstream Usage (Sentence Transformers)
|
| 146 |
+
|
| 147 |
+
You can finetune this model on your own dataset.
|
| 148 |
+
|
| 149 |
+
<details><summary>Click to expand</summary>
|
| 150 |
+
|
| 151 |
+
</details>
|
| 152 |
+
-->
|
| 153 |
+
|
| 154 |
+
<!--
|
| 155 |
+
### Out-of-Scope Use
|
| 156 |
+
|
| 157 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
| 158 |
+
-->
|
| 159 |
+
|
| 160 |
+
<!--
|
| 161 |
+
## Bias, Risks and Limitations
|
| 162 |
+
|
| 163 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
| 164 |
+
-->
|
| 165 |
+
|
| 166 |
+
<!--
|
| 167 |
+
### Recommendations
|
| 168 |
+
|
| 169 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
| 170 |
+
-->
|
| 171 |
+
|
| 172 |
+
## Training Details
|
| 173 |
+
|
| 174 |
+
### Training Dataset
|
| 175 |
+
|
| 176 |
+
#### Unnamed Dataset
|
| 177 |
+
|
| 178 |
+
* Size: 3,324 training samples
|
| 179 |
+
* Columns: <code>sentence_0</code> and <code>sentence_1</code>
|
| 180 |
+
* Approximate statistics based on the first 1000 samples:
|
| 181 |
+
| | sentence_0 | sentence_1 |
|
| 182 |
+
|:--------|:-----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
|
| 183 |
+
| type | string | string |
|
| 184 |
+
| details | <ul><li>min: 5 tokens</li><li>mean: 95.27 tokens</li><li>max: 128 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 12.28 tokens</li><li>max: 53 tokens</li></ul> |
|
| 185 |
+
* Samples:
|
| 186 |
+
| sentence_0 | sentence_1 |
|
| 187 |
+
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------|
|
| 188 |
+
| <code>Nichia J16JL-UN - Bujía de rendimiento extremo - Bujía profesional con electrodo iridio, diseñada para motores deportivos o turismos de alto kilometraje. Resiste condiciones severas y ofrece mejor ignición que los modelos estándar</code> | <code>Bujías / Spark plugs</code> |
|
| 189 |
+
| <code>500 Watt Power Inverter, Dual Power Inverter, Two USB Charging Ports</code> | <code>Convertidor, Inversor de corriente / Converter, Power inverter</code> |
|
| 190 |
+
| <code>Colchón Híbrido de Firmeza Media Queen de 12 pulg., Aislamiento de Movimiento y Transpirable - Si buscas un colchón que sea tanto cómodo como asequible, entonces este colchón híbrido de 12 pulg. es perfecto para ti. Este colchón de 12 pulg. está diseñado con espuma de confort y un sistema de resortes ensacados, proporcionando una experiencia de sueño cómoda. La espuma de flujo de aire confort es suave y agradable, haciendo tu colchón cómodo y transpirable. La espuma de confort alivia los puntos de presión para que puedas despertar sin dolor de espalda. El diseño de resortes ensacados independientes es perfecto para durmientes sensibles, cada resorte funciona de manera independiente. No importa cuánto te muevas, no tendrás que preocuparte por molestar a tu compañero de sueño y disfrutarás de una noche de descanso tranquila e ininterrumpida. Nuestros colchones de 12 pulg. vienen en caja, llegan con una garantía limitada de 10 años; simplemente coloca el colchón en tu cama y deja pasar 72...</code> | <code>Mattress / Colchon</code> |
|
| 191 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
| 192 |
+
```json
|
| 193 |
+
{
|
| 194 |
+
"scale": 20.0,
|
| 195 |
+
"similarity_fct": "cos_sim",
|
| 196 |
+
"gather_across_devices": false
|
| 197 |
+
}
|
| 198 |
+
```
|
| 199 |
+
|
| 200 |
+
### Training Hyperparameters
|
| 201 |
+
#### Non-Default Hyperparameters
|
| 202 |
+
|
| 203 |
+
- `per_device_train_batch_size`: 16
|
| 204 |
+
- `per_device_eval_batch_size`: 16
|
| 205 |
+
- `num_train_epochs`: 4
|
| 206 |
+
- `multi_dataset_batch_sampler`: round_robin
|
| 207 |
+
|
| 208 |
+
#### All Hyperparameters
|
| 209 |
+
<details><summary>Click to expand</summary>
|
| 210 |
+
|
| 211 |
+
- `overwrite_output_dir`: False
|
| 212 |
+
- `do_predict`: False
|
| 213 |
+
- `eval_strategy`: no
|
| 214 |
+
- `prediction_loss_only`: True
|
| 215 |
+
- `per_device_train_batch_size`: 16
|
| 216 |
+
- `per_device_eval_batch_size`: 16
|
| 217 |
+
- `per_gpu_train_batch_size`: None
|
| 218 |
+
- `per_gpu_eval_batch_size`: None
|
| 219 |
+
- `gradient_accumulation_steps`: 1
|
| 220 |
+
- `eval_accumulation_steps`: None
|
| 221 |
+
- `torch_empty_cache_steps`: None
|
| 222 |
+
- `learning_rate`: 5e-05
|
| 223 |
+
- `weight_decay`: 0.0
|
| 224 |
+
- `adam_beta1`: 0.9
|
| 225 |
+
- `adam_beta2`: 0.999
|
| 226 |
+
- `adam_epsilon`: 1e-08
|
| 227 |
+
- `max_grad_norm`: 1
|
| 228 |
+
- `num_train_epochs`: 4
|
| 229 |
+
- `max_steps`: -1
|
| 230 |
+
- `lr_scheduler_type`: linear
|
| 231 |
+
- `lr_scheduler_kwargs`: {}
|
| 232 |
+
- `warmup_ratio`: 0.0
|
| 233 |
+
- `warmup_steps`: 0
|
| 234 |
+
- `log_level`: passive
|
| 235 |
+
- `log_level_replica`: warning
|
| 236 |
+
- `log_on_each_node`: True
|
| 237 |
+
- `logging_nan_inf_filter`: True
|
| 238 |
+
- `save_safetensors`: True
|
| 239 |
+
- `save_on_each_node`: False
|
| 240 |
+
- `save_only_model`: False
|
| 241 |
+
- `restore_callback_states_from_checkpoint`: False
|
| 242 |
+
- `no_cuda`: False
|
| 243 |
+
- `use_cpu`: False
|
| 244 |
+
- `use_mps_device`: False
|
| 245 |
+
- `seed`: 42
|
| 246 |
+
- `data_seed`: None
|
| 247 |
+
- `jit_mode_eval`: False
|
| 248 |
+
- `bf16`: False
|
| 249 |
+
- `fp16`: False
|
| 250 |
+
- `fp16_opt_level`: O1
|
| 251 |
+
- `half_precision_backend`: auto
|
| 252 |
+
- `bf16_full_eval`: False
|
| 253 |
+
- `fp16_full_eval`: False
|
| 254 |
+
- `tf32`: None
|
| 255 |
+
- `local_rank`: 0
|
| 256 |
+
- `ddp_backend`: None
|
| 257 |
+
- `tpu_num_cores`: None
|
| 258 |
+
- `tpu_metrics_debug`: False
|
| 259 |
+
- `debug`: []
|
| 260 |
+
- `dataloader_drop_last`: False
|
| 261 |
+
- `dataloader_num_workers`: 0
|
| 262 |
+
- `dataloader_prefetch_factor`: None
|
| 263 |
+
- `past_index`: -1
|
| 264 |
+
- `disable_tqdm`: False
|
| 265 |
+
- `remove_unused_columns`: True
|
| 266 |
+
- `label_names`: None
|
| 267 |
+
- `load_best_model_at_end`: False
|
| 268 |
+
- `ignore_data_skip`: False
|
| 269 |
+
- `fsdp`: []
|
| 270 |
+
- `fsdp_min_num_params`: 0
|
| 271 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
| 272 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
| 273 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
| 274 |
+
- `parallelism_config`: None
|
| 275 |
+
- `deepspeed`: None
|
| 276 |
+
- `label_smoothing_factor`: 0.0
|
| 277 |
+
- `optim`: adamw_torch_fused
|
| 278 |
+
- `optim_args`: None
|
| 279 |
+
- `adafactor`: False
|
| 280 |
+
- `group_by_length`: False
|
| 281 |
+
- `length_column_name`: length
|
| 282 |
+
- `project`: huggingface
|
| 283 |
+
- `trackio_space_id`: trackio
|
| 284 |
+
- `ddp_find_unused_parameters`: None
|
| 285 |
+
- `ddp_bucket_cap_mb`: None
|
| 286 |
+
- `ddp_broadcast_buffers`: False
|
| 287 |
+
- `dataloader_pin_memory`: True
|
| 288 |
+
- `dataloader_persistent_workers`: False
|
| 289 |
+
- `skip_memory_metrics`: True
|
| 290 |
+
- `use_legacy_prediction_loop`: False
|
| 291 |
+
- `push_to_hub`: False
|
| 292 |
+
- `resume_from_checkpoint`: None
|
| 293 |
+
- `hub_model_id`: None
|
| 294 |
+
- `hub_strategy`: every_save
|
| 295 |
+
- `hub_private_repo`: None
|
| 296 |
+
- `hub_always_push`: False
|
| 297 |
+
- `hub_revision`: None
|
| 298 |
+
- `gradient_checkpointing`: False
|
| 299 |
+
- `gradient_checkpointing_kwargs`: None
|
| 300 |
+
- `include_inputs_for_metrics`: False
|
| 301 |
+
- `include_for_metrics`: []
|
| 302 |
+
- `eval_do_concat_batches`: True
|
| 303 |
+
- `fp16_backend`: auto
|
| 304 |
+
- `push_to_hub_model_id`: None
|
| 305 |
+
- `push_to_hub_organization`: None
|
| 306 |
+
- `mp_parameters`:
|
| 307 |
+
- `auto_find_batch_size`: False
|
| 308 |
+
- `full_determinism`: False
|
| 309 |
+
- `torchdynamo`: None
|
| 310 |
+
- `ray_scope`: last
|
| 311 |
+
- `ddp_timeout`: 1800
|
| 312 |
+
- `torch_compile`: False
|
| 313 |
+
- `torch_compile_backend`: None
|
| 314 |
+
- `torch_compile_mode`: None
|
| 315 |
+
- `include_tokens_per_second`: False
|
| 316 |
+
- `include_num_input_tokens_seen`: no
|
| 317 |
+
- `neftune_noise_alpha`: None
|
| 318 |
+
- `optim_target_modules`: None
|
| 319 |
+
- `batch_eval_metrics`: False
|
| 320 |
+
- `eval_on_start`: False
|
| 321 |
+
- `use_liger_kernel`: False
|
| 322 |
+
- `liger_kernel_config`: None
|
| 323 |
+
- `eval_use_gather_object`: False
|
| 324 |
+
- `average_tokens_across_devices`: True
|
| 325 |
+
- `prompts`: None
|
| 326 |
+
- `batch_sampler`: batch_sampler
|
| 327 |
+
- `multi_dataset_batch_sampler`: round_robin
|
| 328 |
+
- `router_mapping`: {}
|
| 329 |
+
- `learning_rate_mapping`: {}
|
| 330 |
+
|
| 331 |
+
</details>
|
| 332 |
+
|
| 333 |
+
### Training Logs
|
| 334 |
+
| Epoch | Step | Training Loss |
|
| 335 |
+
|:------:|:----:|:-------------:|
|
| 336 |
+
| 2.4038 | 500 | 0.5086 |
|
| 337 |
+
|
| 338 |
+
|
| 339 |
+
### Framework Versions
|
| 340 |
+
- Python: 3.14.2
|
| 341 |
+
- Sentence Transformers: 5.2.0
|
| 342 |
+
- Transformers: 4.57.3
|
| 343 |
+
- PyTorch: 2.9.1+cu128
|
| 344 |
+
- Accelerate: 1.12.0
|
| 345 |
+
- Datasets: 4.4.2
|
| 346 |
+
- Tokenizers: 0.22.2
|
| 347 |
+
|
| 348 |
+
## Citation
|
| 349 |
+
|
| 350 |
+
### BibTeX
|
| 351 |
+
|
| 352 |
+
#### Sentence Transformers
|
| 353 |
+
```bibtex
|
| 354 |
+
@inproceedings{reimers-2019-sentence-bert,
|
| 355 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
| 356 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
| 357 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
| 358 |
+
month = "11",
|
| 359 |
+
year = "2019",
|
| 360 |
+
publisher = "Association for Computational Linguistics",
|
| 361 |
+
url = "https://arxiv.org/abs/1908.10084",
|
| 362 |
+
}
|
| 363 |
+
```
|
| 364 |
+
|
| 365 |
+
#### MultipleNegativesRankingLoss
|
| 366 |
+
```bibtex
|
| 367 |
+
@misc{henderson2017efficient,
|
| 368 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
| 369 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
| 370 |
+
year={2017},
|
| 371 |
+
eprint={1705.00652},
|
| 372 |
+
archivePrefix={arXiv},
|
| 373 |
+
primaryClass={cs.CL}
|
| 374 |
+
}
|
| 375 |
+
```
|
| 376 |
+
|
| 377 |
+
<!--
|
| 378 |
+
## Glossary
|
| 379 |
+
|
| 380 |
+
*Clearly define terms in order to be accessible across audiences.*
|
| 381 |
+
-->
|
| 382 |
+
|
| 383 |
+
<!--
|
| 384 |
+
## Model Card Authors
|
| 385 |
+
|
| 386 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
| 387 |
+
-->
|
| 388 |
+
|
| 389 |
+
<!--
|
| 390 |
+
## Model Card Contact
|
| 391 |
+
|
| 392 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
| 393 |
+
-->
|
config.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"activation": "gelu",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"DistilBertModel"
|
| 5 |
+
],
|
| 6 |
+
"attention_dropout": 0.1,
|
| 7 |
+
"dim": 768,
|
| 8 |
+
"dropout": 0.1,
|
| 9 |
+
"dtype": "float32",
|
| 10 |
+
"hidden_dim": 3072,
|
| 11 |
+
"initializer_range": 0.02,
|
| 12 |
+
"max_position_embeddings": 512,
|
| 13 |
+
"model_type": "distilbert",
|
| 14 |
+
"n_heads": 12,
|
| 15 |
+
"n_layers": 6,
|
| 16 |
+
"output_hidden_states": true,
|
| 17 |
+
"output_past": true,
|
| 18 |
+
"pad_token_id": 0,
|
| 19 |
+
"qa_dropout": 0.1,
|
| 20 |
+
"seq_classif_dropout": 0.2,
|
| 21 |
+
"sinusoidal_pos_embds": false,
|
| 22 |
+
"tie_weights_": true,
|
| 23 |
+
"transformers_version": "4.57.3",
|
| 24 |
+
"vocab_size": 119547
|
| 25 |
+
}
|
config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"__version__": {
|
| 3 |
+
"sentence_transformers": "5.2.0",
|
| 4 |
+
"transformers": "4.57.3",
|
| 5 |
+
"pytorch": "2.9.1+cu128"
|
| 6 |
+
},
|
| 7 |
+
"model_type": "SentenceTransformer",
|
| 8 |
+
"prompts": {
|
| 9 |
+
"query": "",
|
| 10 |
+
"document": ""
|
| 11 |
+
},
|
| 12 |
+
"default_prompt_name": null,
|
| 13 |
+
"similarity_fn_name": "cosine"
|
| 14 |
+
}
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6dbce1944d0dcc2a8bfd3d6a517684ab7fc48711802723edd8ca3b4cb5856422
|
| 3 |
+
size 538947416
|
modules.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"idx": 2,
|
| 16 |
+
"name": "2",
|
| 17 |
+
"path": "2_Dense",
|
| 18 |
+
"type": "sentence_transformers.models.Dense"
|
| 19 |
+
}
|
| 20 |
+
]
|
sentence_bert_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_seq_length": 128,
|
| 3 |
+
"do_lower_case": false
|
| 4 |
+
}
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": {
|
| 3 |
+
"content": "[CLS]",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"mask_token": {
|
| 10 |
+
"content": "[MASK]",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": {
|
| 17 |
+
"content": "[PAD]",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"sep_token": {
|
| 24 |
+
"content": "[SEP]",
|
| 25 |
+
"lstrip": false,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
},
|
| 30 |
+
"unk_token": {
|
| 31 |
+
"content": "[UNK]",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false
|
| 36 |
+
}
|
| 37 |
+
}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"100": {
|
| 12 |
+
"content": "[UNK]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"101": {
|
| 20 |
+
"content": "[CLS]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"102": {
|
| 28 |
+
"content": "[SEP]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"103": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"clean_up_tokenization_spaces": false,
|
| 45 |
+
"cls_token": "[CLS]",
|
| 46 |
+
"do_basic_tokenize": true,
|
| 47 |
+
"do_lower_case": false,
|
| 48 |
+
"extra_special_tokens": {},
|
| 49 |
+
"full_tokenizer_file": null,
|
| 50 |
+
"mask_token": "[MASK]",
|
| 51 |
+
"max_len": 512,
|
| 52 |
+
"max_length": 128,
|
| 53 |
+
"model_max_length": 128,
|
| 54 |
+
"never_split": null,
|
| 55 |
+
"pad_to_multiple_of": null,
|
| 56 |
+
"pad_token": "[PAD]",
|
| 57 |
+
"pad_token_type_id": 0,
|
| 58 |
+
"padding_side": "right",
|
| 59 |
+
"sep_token": "[SEP]",
|
| 60 |
+
"stride": 0,
|
| 61 |
+
"strip_accents": null,
|
| 62 |
+
"tokenize_chinese_chars": true,
|
| 63 |
+
"tokenizer_class": "DistilBertTokenizer",
|
| 64 |
+
"truncation_side": "right",
|
| 65 |
+
"truncation_strategy": "longest_first",
|
| 66 |
+
"unk_token": "[UNK]"
|
| 67 |
+
}
|
vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|