| --- |
| license: apache-2.0 |
| pipeline_tag: sentence-similarity |
| language: |
| - fr |
| tags: |
| - embeddings |
| - french |
| - feature-extraction |
| - bfloat16 |
| - sentence-similarity |
| - text-embeddings |
| --- |
| |
| <table> |
| <thead> |
| <tr> |
| <th>Evaluation task</th> |
| <th align="center">Embeddings-Francais-BF16-BASE-50M</th> |
| <th align="center">Test-Train-Avant-Main-Train</th> |
| </tr> |
| <tr> |
| <td>SICKFr</td> |
| <td align="center">0.519713</td> |
| <td align="center"><strong>0.699325</strong></td> |
| </tr> |
| <tr> |
| <td>SyntecReranking</td> |
| <td align="center">0.313680</td> |
| <td align="center"><strong>0.328360</strong></td> |
| </tr> |
| <tr> |
| <td>SummEvalFr</td> |
| <td align="center"><strong>0.306903</strong></td> |
| <td align="center">0.305028</td> |
| </tr> |
| <tr> |
| <td>AlloProfClusteringS2S</td> |
| <td align="center"><strong>0.213383</strong></td> |
| <td align="center">0.209503</td> |
| </tr> |
| <tr> |
| <td>SyntecRetrieval</td> |
| <td align="center">0.051370</td> |
| <td align="center"><strong>0.123900</strong></td> |
| </tr> |
| <tr> |
| <td>HALClusteringS2S</td> |
| <td align="center">Failed</td> |
| <td align="center"><strong>0.042094</strong></td> |
| </tr> |
| <tr> |
| <th>Hyperparameter</th> |
| <th align="center">Embeddings-Francais-BF16-BASE-50M</th> |
| <th align="center">Test-Train-Avant-Main-Train</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td>Training tokens seen</td> |
| <td align="center">2.46B</td> |
| <td align="center">61.44M + SFT </td> |
| </tr> |
| <tr> |
| <td>Parameters</td> |
| <td align="center">169,896,960</td> |
| <td align="center">21,240,576</td> |
| </tr> |
| <tr> |
| <td>Context length</td> |
| <td align="center">4096</td> |
| <td align="center">4096</td> |
| </tr> |
| <tr> |
| <td>Embedding dimension</td> |
| <td align="center">1536</td> |
| <td align="center">384</td> |
| </tr> |
| <tr> |
| <td>Vocabulary size</td> |
| <td align="center">32768</td> |
| <td align="center">32768</td> |
| </tr> |
| <tr> |
| <td>Layers</td> |
| <td align="center">4</td> |
| <td align="center">4</td> |
| </tr> |
| <tr> |
| <td>Heads</td> |
| <td align="center">12</td> |
| <td align="center">4</td> |
| </tr> |
| <tr> |
| <td>Head dimension</td> |
| <td align="center">128</td> |
| <td align="center">96</td> |
| </tr> |
| <tr> |
| <td>Precision</td> |
| <td align="center">bfloat16</td> |
| <td align="center">bfloat16</td> |
| </tr> |
| <tr> |
| <td>Attention backend</td> |
| <td align="center">SageAttention</td> |
| <td align="center">SageAttention</td> |
| </tr> |
| <tr> |
| <td>Pooling</td> |
| <td align="center">Mean pooling</td> |
| <td align="center">Mean pooling</td> |
| </tr> |
| <tr> |
| <td>Normalization</td> |
| <td align="center">L2 normalize</td> |
| <td align="center">L2 normalize</td> |
| </tr> |
| </tbody> |
| </table> |