parrishcorcoran commited on
Commit
0082594
·
verified ·
1 Parent(s): ccf50e8

Upload benchmark_headtohead.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. benchmark_headtohead.json +242 -0
benchmark_headtohead.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": [
3
+ {
4
+ "name": "BitNet b1.58 2B-4T (I2_S)",
5
+ "model_path": "/home/cpinchington/MedusaBitNet/models/bitnet-b1.58-2B-4T/ggml-model-i2_s.gguf",
6
+ "model_size_mb": 1187.310112,
7
+ "n_runs": 8,
8
+ "avg_gen_tok_s": 72.73875,
9
+ "avg_prefill_tok_s": 434.95375,
10
+ "avg_ms_per_tok": 13.75375,
11
+ "runs": [
12
+ {
13
+ "gen_tok_s": 75.47,
14
+ "gen_ms_per_tok": 13.25,
15
+ "prefill_tok_s": 473.97,
16
+ "wall_time": 3.8744120597839355
17
+ },
18
+ {
19
+ "gen_tok_s": 74.61,
20
+ "gen_ms_per_tok": 13.4,
21
+ "prefill_tok_s": 400.33,
22
+ "wall_time": 3.946519613265991
23
+ },
24
+ {
25
+ "gen_tok_s": 73.39,
26
+ "gen_ms_per_tok": 13.63,
27
+ "prefill_tok_s": 468.56,
28
+ "wall_time": 3.981218099594116
29
+ },
30
+ {
31
+ "gen_tok_s": 71.47,
32
+ "gen_ms_per_tok": 13.99,
33
+ "prefill_tok_s": 403.88,
34
+ "wall_time": 4.082836866378784
35
+ },
36
+ {
37
+ "gen_tok_s": 71.75,
38
+ "gen_ms_per_tok": 13.94,
39
+ "prefill_tok_s": 425.03,
40
+ "wall_time": 4.0725319385528564
41
+ },
42
+ {
43
+ "gen_tok_s": 71.9,
44
+ "gen_ms_per_tok": 13.91,
45
+ "prefill_tok_s": 399.68,
46
+ "wall_time": 4.0745580196380615
47
+ },
48
+ {
49
+ "gen_tok_s": 71.23,
50
+ "gen_ms_per_tok": 14.04,
51
+ "prefill_tok_s": 466.73,
52
+ "wall_time": 4.097683429718018
53
+ },
54
+ {
55
+ "gen_tok_s": 72.09,
56
+ "gen_ms_per_tok": 13.87,
57
+ "prefill_tok_s": 441.45,
58
+ "wall_time": 4.053175687789917
59
+ }
60
+ ]
61
+ },
62
+ {
63
+ "name": "Qwen2.5 1.5B (Q4_K_M)",
64
+ "model_path": "/home/cpinchington/MedusaBitNet/models/competing/qwen2.5-1.5b-instruct-q4_k_m.gguf",
65
+ "model_size_mb": 1117.320736,
66
+ "n_runs": 8,
67
+ "avg_gen_tok_s": 88.79125,
68
+ "avg_prefill_tok_s": 317.92375,
69
+ "avg_ms_per_tok": 11.2625,
70
+ "runs": [
71
+ {
72
+ "gen_tok_s": 88.83,
73
+ "gen_ms_per_tok": 11.26,
74
+ "prefill_tok_s": 277.6,
75
+ "wall_time": 3.4317729473114014
76
+ },
77
+ {
78
+ "gen_tok_s": 88.81,
79
+ "gen_ms_per_tok": 11.26,
80
+ "prefill_tok_s": 349.13,
81
+ "wall_time": 3.419360637664795
82
+ },
83
+ {
84
+ "gen_tok_s": 87.75,
85
+ "gen_ms_per_tok": 11.4,
86
+ "prefill_tok_s": 362.62,
87
+ "wall_time": 3.448280096054077
88
+ },
89
+ {
90
+ "gen_tok_s": 88.93,
91
+ "gen_ms_per_tok": 11.24,
92
+ "prefill_tok_s": 371.15,
93
+ "wall_time": 2.9640591144561768
94
+ },
95
+ {
96
+ "gen_tok_s": 88.83,
97
+ "gen_ms_per_tok": 11.26,
98
+ "prefill_tok_s": 278.95,
99
+ "wall_time": 3.4554200172424316
100
+ },
101
+ {
102
+ "gen_tok_s": 89.49,
103
+ "gen_ms_per_tok": 11.17,
104
+ "prefill_tok_s": 271.77,
105
+ "wall_time": 2.8726541996002197
106
+ },
107
+ {
108
+ "gen_tok_s": 88.94,
109
+ "gen_ms_per_tok": 11.24,
110
+ "prefill_tok_s": 278.1,
111
+ "wall_time": 3.42142915725708
112
+ },
113
+ {
114
+ "gen_tok_s": 88.75,
115
+ "gen_ms_per_tok": 11.27,
116
+ "prefill_tok_s": 354.07,
117
+ "wall_time": 3.4249227046966553
118
+ }
119
+ ]
120
+ },
121
+ {
122
+ "name": "Llama 3.2 1B (Q4_K_M)",
123
+ "model_path": "/home/cpinchington/MedusaBitNet/models/competing/Llama-3.2-1B-Instruct-Q4_K_M.gguf",
124
+ "model_size_mb": 807.694464,
125
+ "n_runs": 8,
126
+ "avg_gen_tok_s": 115.94624999999999,
127
+ "avg_prefill_tok_s": 440.73375,
128
+ "avg_ms_per_tok": 8.62625,
129
+ "runs": [
130
+ {
131
+ "gen_tok_s": 115.23,
132
+ "gen_ms_per_tok": 8.68,
133
+ "prefill_tok_s": 373.11,
134
+ "wall_time": 3.3805642127990723
135
+ },
136
+ {
137
+ "gen_tok_s": 115.66,
138
+ "gen_ms_per_tok": 8.65,
139
+ "prefill_tok_s": 499.09,
140
+ "wall_time": 3.3459088802337646
141
+ },
142
+ {
143
+ "gen_tok_s": 115.25,
144
+ "gen_ms_per_tok": 8.68,
145
+ "prefill_tok_s": 505.72,
146
+ "wall_time": 3.353907346725464
147
+ },
148
+ {
149
+ "gen_tok_s": 116.84,
150
+ "gen_ms_per_tok": 8.56,
151
+ "prefill_tok_s": 324.38,
152
+ "wall_time": 3.3358867168426514
153
+ },
154
+ {
155
+ "gen_tok_s": 115.81,
156
+ "gen_ms_per_tok": 8.63,
157
+ "prefill_tok_s": 531.45,
158
+ "wall_time": 3.3553287982940674
159
+ },
160
+ {
161
+ "gen_tok_s": 116.16,
162
+ "gen_ms_per_tok": 8.61,
163
+ "prefill_tok_s": 524.34,
164
+ "wall_time": 3.340409278869629
165
+ },
166
+ {
167
+ "gen_tok_s": 117.13,
168
+ "gen_ms_per_tok": 8.54,
169
+ "prefill_tok_s": 341.84,
170
+ "wall_time": 3.348862648010254
171
+ },
172
+ {
173
+ "gen_tok_s": 115.49,
174
+ "gen_ms_per_tok": 8.66,
175
+ "prefill_tok_s": 425.94,
176
+ "wall_time": 3.349743366241455
177
+ }
178
+ ]
179
+ },
180
+ {
181
+ "name": "Gemma 2 2B (Q4_K_M)",
182
+ "model_path": "/home/cpinchington/MedusaBitNet/models/competing/gemma-2-2b-it-Q4_K_M.gguf",
183
+ "model_size_mb": 1708.582752,
184
+ "n_runs": 8,
185
+ "avg_gen_tok_s": 50.53125,
186
+ "avg_prefill_tok_s": 200.96,
187
+ "avg_ms_per_tok": 19.7875,
188
+ "runs": [
189
+ {
190
+ "gen_tok_s": 50.47,
191
+ "gen_ms_per_tok": 19.81,
192
+ "prefill_tok_s": 184.03,
193
+ "wall_time": 5.728861331939697
194
+ },
195
+ {
196
+ "gen_tok_s": 50.67,
197
+ "gen_ms_per_tok": 19.73,
198
+ "prefill_tok_s": 229.99,
199
+ "wall_time": 5.6800384521484375
200
+ },
201
+ {
202
+ "gen_tok_s": 50.37,
203
+ "gen_ms_per_tok": 19.85,
204
+ "prefill_tok_s": 165.91,
205
+ "wall_time": 5.7390992641448975
206
+ },
207
+ {
208
+ "gen_tok_s": 50.7,
209
+ "gen_ms_per_tok": 19.72,
210
+ "prefill_tok_s": 174.06,
211
+ "wall_time": 5.694071292877197
212
+ },
213
+ {
214
+ "gen_tok_s": 50.63,
215
+ "gen_ms_per_tok": 19.75,
216
+ "prefill_tok_s": 190.64,
217
+ "wall_time": 5.7339208126068115
218
+ },
219
+ {
220
+ "gen_tok_s": 50.33,
221
+ "gen_ms_per_tok": 19.87,
222
+ "prefill_tok_s": 253.86,
223
+ "wall_time": 5.723286867141724
224
+ },
225
+ {
226
+ "gen_tok_s": 50.52,
227
+ "gen_ms_per_tok": 19.79,
228
+ "prefill_tok_s": 233.19,
229
+ "wall_time": 5.69863224029541
230
+ },
231
+ {
232
+ "gen_tok_s": 50.56,
233
+ "gen_ms_per_tok": 19.78,
234
+ "prefill_tok_s": 176.0,
235
+ "wall_time": 5.730913162231445
236
+ }
237
+ ]
238
+ }
239
+ ],
240
+ "hardware": "AMD Ryzen AI MAX+ 395 (Strix Halo)",
241
+ "threads": 16
242
+ }