File size: 2,917 Bytes
a8e765d
 
a8367f4
a8e765d
a8367f4
a8e765d
a8367f4
 
 
 
 
 
a8e765d
 
 
 
a137c08
 
 
 
 
 
 
 
3dba654
a137c08
 
 
 
3dba654
a137c08
 
 
3dba654
 
a137c08
 
 
3dba654
 
a137c08
 
 
 
3dba654
a137c08
 
 
 
3dba654
a137c08
a8e765d
 
 
 
 
 
 
63bf8c3
 
 
a00d870
3dba654
346fe28
 
 
 
 
a8e765d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
---
license: apache-2.0
pipeline_tag: sentence-similarity
language:
- fr
tags:
- embeddings
- french
- feature-extraction
- bfloat16
- sentence-similarity
- text-embeddings
---

<table>
  <thead>
    <tr>
      <th>Evaluation task</th>
      <th align="center">Embeddings-Francais-BF16-BASE-50M</th>
      <th align="center">Test-Train-Avant-Main-Train</th>
    </tr>
    <tr>
      <td>SICKFr</td>
      <td align="center">0.519713</td>
      <td align="center"><strong>0.699325</strong></td>
    </tr>
    <tr>
      <td>SyntecReranking</td>
      <td align="center">0.313680</td>
      <td align="center"><strong>0.328360</strong></td>
    </tr>
    <tr>
      <td>SummEvalFr</td>
      <td align="center"><strong>0.306903</strong></td>
      <td align="center">0.305028</td>
    </tr>
    <tr>
      <td>AlloProfClusteringS2S</td>
      <td align="center"><strong>0.213383</strong></td>
      <td align="center">0.209503</td>
    </tr>
    <tr>
      <td>SyntecRetrieval</td>
      <td align="center">0.051370</td>
      <td align="center"><strong>0.123900</strong></td>
    </tr>
    <tr>
      <td>HALClusteringS2S</td>
      <td align="center">Failed</td>
      <td align="center"><strong>0.042094</strong></td>
    </tr>
    <tr>
      <th>Hyperparameter</th>
      <th align="center">Embeddings-Francais-BF16-BASE-50M</th>
      <th align="center">Test-Train-Avant-Main-Train</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>Training tokens seen</td>
      <td align="center">2.46B</td>
      <td align="center">61.44M + SFT </td>
    </tr>
    <tr>
      <td>Parameters</td>
      <td align="center">169,896,960</td>
      <td align="center">21,240,576</td>
    </tr>
    <tr>
      <td>Context length</td>
      <td align="center">4096</td>
      <td align="center">4096</td>
    </tr>
    <tr>
      <td>Embedding dimension</td>
      <td align="center">1536</td>
      <td align="center">384</td>
    </tr>
    <tr>
      <td>Vocabulary size</td>
      <td align="center">32768</td>
      <td align="center">32768</td>
    </tr>
    <tr>
      <td>Layers</td>
      <td align="center">4</td>
      <td align="center">4</td>
    </tr>
    <tr>
      <td>Heads</td>
      <td align="center">12</td>
      <td align="center">4</td>
    </tr>
    <tr>
      <td>Head dimension</td>
      <td align="center">128</td>
      <td align="center">96</td>
    </tr>
    <tr>
      <td>Precision</td>
      <td align="center">bfloat16</td>
      <td align="center">bfloat16</td>
    </tr>
    <tr>
      <td>Attention backend</td>
      <td align="center">SageAttention</td>
      <td align="center">SageAttention</td>
    </tr>
    <tr>
      <td>Pooling</td>
      <td align="center">Mean pooling</td>
      <td align="center">Mean pooling</td>
    </tr>
    <tr>
      <td>Normalization</td>
      <td align="center">L2 normalize</td>
      <td align="center">L2 normalize</td>
    </tr>
  </tbody>
</table>