PhysiQuanty's picture
Update README.md
a8367f4 verified
|
Raw
History Blame Contribute Delete
2.92 kB
---
license: apache-2.0
pipeline_tag: sentence-similarity
language:
- fr
tags:
- embeddings
- french
- feature-extraction
- bfloat16
- sentence-similarity
- text-embeddings
---
<table>
<thead>
<tr>
<th>Evaluation task</th>
<th align="center">Embeddings-Francais-BF16-BASE-50M</th>
<th align="center">Test-Train-Avant-Main-Train</th>
</tr>
<tr>
<td>SICKFr</td>
<td align="center">0.519713</td>
<td align="center"><strong>0.699325</strong></td>
</tr>
<tr>
<td>SyntecReranking</td>
<td align="center">0.313680</td>
<td align="center"><strong>0.328360</strong></td>
</tr>
<tr>
<td>SummEvalFr</td>
<td align="center"><strong>0.306903</strong></td>
<td align="center">0.305028</td>
</tr>
<tr>
<td>AlloProfClusteringS2S</td>
<td align="center"><strong>0.213383</strong></td>
<td align="center">0.209503</td>
</tr>
<tr>
<td>SyntecRetrieval</td>
<td align="center">0.051370</td>
<td align="center"><strong>0.123900</strong></td>
</tr>
<tr>
<td>HALClusteringS2S</td>
<td align="center">Failed</td>
<td align="center"><strong>0.042094</strong></td>
</tr>
<tr>
<th>Hyperparameter</th>
<th align="center">Embeddings-Francais-BF16-BASE-50M</th>
<th align="center">Test-Train-Avant-Main-Train</th>
</tr>
</thead>
<tbody>
<tr>
<td>Training tokens seen</td>
<td align="center">2.46B</td>
<td align="center">61.44M + SFT </td>
</tr>
<tr>
<td>Parameters</td>
<td align="center">169,896,960</td>
<td align="center">21,240,576</td>
</tr>
<tr>
<td>Context length</td>
<td align="center">4096</td>
<td align="center">4096</td>
</tr>
<tr>
<td>Embedding dimension</td>
<td align="center">1536</td>
<td align="center">384</td>
</tr>
<tr>
<td>Vocabulary size</td>
<td align="center">32768</td>
<td align="center">32768</td>
</tr>
<tr>
<td>Layers</td>
<td align="center">4</td>
<td align="center">4</td>
</tr>
<tr>
<td>Heads</td>
<td align="center">12</td>
<td align="center">4</td>
</tr>
<tr>
<td>Head dimension</td>
<td align="center">128</td>
<td align="center">96</td>
</tr>
<tr>
<td>Precision</td>
<td align="center">bfloat16</td>
<td align="center">bfloat16</td>
</tr>
<tr>
<td>Attention backend</td>
<td align="center">SageAttention</td>
<td align="center">SageAttention</td>
</tr>
<tr>
<td>Pooling</td>
<td align="center">Mean pooling</td>
<td align="center">Mean pooling</td>
</tr>
<tr>
<td>Normalization</td>
<td align="center">L2 normalize</td>
<td align="center">L2 normalize</td>
</tr>
</tbody>
</table>