ror HF Staff commited on
Commit
46f4b10
·
1 Parent(s): 9aa1b3d

data backend

Browse files
Files changed (2) hide show
  1. data.json +1418 -0
  2. data.py +30 -0
data.json ADDED
@@ -0,0 +1,1418 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eager_None_uncompiled_vanilla_with_cache": {
3
+ "metadata": {
4
+ "timestamp": "2025-09-26T12:00:15.841272",
5
+ "commit_id": null,
6
+ "hardware_info": {
7
+ "gpu_name": "AMD Instinct Mi325X VF",
8
+ "gpu_memory_total_mb": 255.6875,
9
+ "python_version": "3.12.10",
10
+ "torch_version": "2.7.1+rocm6.4.1.git2a215e4a"
11
+ },
12
+ "config": {
13
+ "name": "eager_None_uncompiled_vanilla_with_cache",
14
+ "warmup_iterations": 5,
15
+ "measurement_iterations": 20,
16
+ "gpu_monitoring": false,
17
+ "batch_size": 16,
18
+ "sequence_length": 128,
19
+ "num_tokens_to_generate": 128,
20
+ "attn_implementation": "eager",
21
+ "use_cache": true,
22
+ "sdpa_backend": null,
23
+ "compilation": false,
24
+ "compile_mode": null,
25
+ "compile_options": {},
26
+ "kernelize": false,
27
+ "device": "cuda",
28
+ "dtype": "torch.bfloat16"
29
+ }
30
+ },
31
+ "ttft": [
32
+ {
33
+ "wall_time": 0.07295582396909595,
34
+ "cuda_time": 0.07292783355712891,
35
+ "batch_size": 16,
36
+ "new_tokens": 1,
37
+ "use_cuda_time": false,
38
+ "gpu_metrics": null
39
+ },
40
+ {
41
+ "wall_time": 0.07316389994230121,
42
+ "cuda_time": 0.0731438217163086,
43
+ "batch_size": 16,
44
+ "new_tokens": 1,
45
+ "use_cuda_time": false,
46
+ "gpu_metrics": null
47
+ },
48
+ {
49
+ "wall_time": 0.07270528899971396,
50
+ "cuda_time": 0.0726801986694336,
51
+ "batch_size": 16,
52
+ "new_tokens": 1,
53
+ "use_cuda_time": false,
54
+ "gpu_metrics": null
55
+ },
56
+ {
57
+ "wall_time": 0.07304733199998736,
58
+ "cuda_time": 0.07302810668945313,
59
+ "batch_size": 16,
60
+ "new_tokens": 1,
61
+ "use_cuda_time": false,
62
+ "gpu_metrics": null
63
+ },
64
+ {
65
+ "wall_time": 0.07359540998004377,
66
+ "cuda_time": 0.07356825256347656,
67
+ "batch_size": 16,
68
+ "new_tokens": 1,
69
+ "use_cuda_time": false,
70
+ "gpu_metrics": null
71
+ },
72
+ {
73
+ "wall_time": 0.07285187696106732,
74
+ "cuda_time": 0.07282662963867187,
75
+ "batch_size": 16,
76
+ "new_tokens": 1,
77
+ "use_cuda_time": false,
78
+ "gpu_metrics": null
79
+ },
80
+ {
81
+ "wall_time": 0.07380507595371455,
82
+ "cuda_time": 0.07378445434570312,
83
+ "batch_size": 16,
84
+ "new_tokens": 1,
85
+ "use_cuda_time": false,
86
+ "gpu_metrics": null
87
+ },
88
+ {
89
+ "wall_time": 0.07294069498311728,
90
+ "cuda_time": 0.07292146301269531,
91
+ "batch_size": 16,
92
+ "new_tokens": 1,
93
+ "use_cuda_time": false,
94
+ "gpu_metrics": null
95
+ },
96
+ {
97
+ "wall_time": 0.0724824140779674,
98
+ "cuda_time": 0.07246123504638671,
99
+ "batch_size": 16,
100
+ "new_tokens": 1,
101
+ "use_cuda_time": false,
102
+ "gpu_metrics": null
103
+ },
104
+ {
105
+ "wall_time": 0.07335149694699794,
106
+ "cuda_time": 0.07333294677734375,
107
+ "batch_size": 16,
108
+ "new_tokens": 1,
109
+ "use_cuda_time": false,
110
+ "gpu_metrics": null
111
+ },
112
+ {
113
+ "wall_time": 0.07265867001842707,
114
+ "cuda_time": 0.07263875579833984,
115
+ "batch_size": 16,
116
+ "new_tokens": 1,
117
+ "use_cuda_time": false,
118
+ "gpu_metrics": null
119
+ },
120
+ {
121
+ "wall_time": 0.07336580497212708,
122
+ "cuda_time": 0.07334489440917968,
123
+ "batch_size": 16,
124
+ "new_tokens": 1,
125
+ "use_cuda_time": false,
126
+ "gpu_metrics": null
127
+ },
128
+ {
129
+ "wall_time": 0.07250520400702953,
130
+ "cuda_time": 0.07229901123046875,
131
+ "batch_size": 16,
132
+ "new_tokens": 1,
133
+ "use_cuda_time": false,
134
+ "gpu_metrics": null
135
+ },
136
+ {
137
+ "wall_time": 0.07315384992398322,
138
+ "cuda_time": 0.07313486480712891,
139
+ "batch_size": 16,
140
+ "new_tokens": 1,
141
+ "use_cuda_time": false,
142
+ "gpu_metrics": null
143
+ },
144
+ {
145
+ "wall_time": 0.07289102603681386,
146
+ "cuda_time": 0.07287146759033203,
147
+ "batch_size": 16,
148
+ "new_tokens": 1,
149
+ "use_cuda_time": false,
150
+ "gpu_metrics": null
151
+ },
152
+ {
153
+ "wall_time": 0.07284493604674935,
154
+ "cuda_time": 0.07282552337646485,
155
+ "batch_size": 16,
156
+ "new_tokens": 1,
157
+ "use_cuda_time": false,
158
+ "gpu_metrics": null
159
+ },
160
+ {
161
+ "wall_time": 0.07329084700904787,
162
+ "cuda_time": 0.07327109527587891,
163
+ "batch_size": 16,
164
+ "new_tokens": 1,
165
+ "use_cuda_time": false,
166
+ "gpu_metrics": null
167
+ },
168
+ {
169
+ "wall_time": 0.07241688505746424,
170
+ "cuda_time": 0.07239772033691406,
171
+ "batch_size": 16,
172
+ "new_tokens": 1,
173
+ "use_cuda_time": false,
174
+ "gpu_metrics": null
175
+ },
176
+ {
177
+ "wall_time": 0.07338041497860104,
178
+ "cuda_time": 0.07336121368408204,
179
+ "batch_size": 16,
180
+ "new_tokens": 1,
181
+ "use_cuda_time": false,
182
+ "gpu_metrics": null
183
+ },
184
+ {
185
+ "wall_time": 0.07338539499323815,
186
+ "cuda_time": 0.07336421966552735,
187
+ "batch_size": 16,
188
+ "new_tokens": 1,
189
+ "use_cuda_time": false,
190
+ "gpu_metrics": null
191
+ }
192
+ ],
193
+ "tpot": [
194
+ {
195
+ "wall_time": 2.031788578024134,
196
+ "cuda_time": 2.0317476806640626,
197
+ "batch_size": 16,
198
+ "new_tokens": 128,
199
+ "use_cuda_time": false,
200
+ "gpu_metrics": null
201
+ },
202
+ {
203
+ "wall_time": 2.0359795569675043,
204
+ "cuda_time": 2.0359508056640623,
205
+ "batch_size": 16,
206
+ "new_tokens": 128,
207
+ "use_cuda_time": false,
208
+ "gpu_metrics": null
209
+ },
210
+ {
211
+ "wall_time": 2.0377820000285283,
212
+ "cuda_time": 2.0377520751953124,
213
+ "batch_size": 16,
214
+ "new_tokens": 128,
215
+ "use_cuda_time": false,
216
+ "gpu_metrics": null
217
+ },
218
+ {
219
+ "wall_time": 2.03635143104475,
220
+ "cuda_time": 2.0363153076171874,
221
+ "batch_size": 16,
222
+ "new_tokens": 128,
223
+ "use_cuda_time": false,
224
+ "gpu_metrics": null
225
+ },
226
+ {
227
+ "wall_time": 2.0420283479616046,
228
+ "cuda_time": 2.0419971923828126,
229
+ "batch_size": 16,
230
+ "new_tokens": 128,
231
+ "use_cuda_time": false,
232
+ "gpu_metrics": null
233
+ },
234
+ {
235
+ "wall_time": 2.046406274079345,
236
+ "cuda_time": 2.046373046875,
237
+ "batch_size": 16,
238
+ "new_tokens": 128,
239
+ "use_cuda_time": false,
240
+ "gpu_metrics": null
241
+ },
242
+ {
243
+ "wall_time": 1.848314146976918,
244
+ "cuda_time": 1.848263916015625,
245
+ "batch_size": 16,
246
+ "new_tokens": 128,
247
+ "use_cuda_time": false,
248
+ "gpu_metrics": null
249
+ },
250
+ {
251
+ "wall_time": 1.8559249829268083,
252
+ "cuda_time": 1.855894287109375,
253
+ "batch_size": 16,
254
+ "new_tokens": 128,
255
+ "use_cuda_time": false,
256
+ "gpu_metrics": null
257
+ },
258
+ {
259
+ "wall_time": 1.967564046033658,
260
+ "cuda_time": 1.96753173828125,
261
+ "batch_size": 16,
262
+ "new_tokens": 128,
263
+ "use_cuda_time": false,
264
+ "gpu_metrics": null
265
+ },
266
+ {
267
+ "wall_time": 2.0448259089607745,
268
+ "cuda_time": 2.0447965087890627,
269
+ "batch_size": 16,
270
+ "new_tokens": 128,
271
+ "use_cuda_time": false,
272
+ "gpu_metrics": null
273
+ },
274
+ {
275
+ "wall_time": 1.8805117839947343,
276
+ "cuda_time": 1.8804791259765625,
277
+ "batch_size": 16,
278
+ "new_tokens": 128,
279
+ "use_cuda_time": false,
280
+ "gpu_metrics": null
281
+ },
282
+ {
283
+ "wall_time": 2.054183567990549,
284
+ "cuda_time": 2.054150634765625,
285
+ "batch_size": 16,
286
+ "new_tokens": 128,
287
+ "use_cuda_time": false,
288
+ "gpu_metrics": null
289
+ },
290
+ {
291
+ "wall_time": 2.0287541430443525,
292
+ "cuda_time": 2.02872314453125,
293
+ "batch_size": 16,
294
+ "new_tokens": 128,
295
+ "use_cuda_time": false,
296
+ "gpu_metrics": null
297
+ },
298
+ {
299
+ "wall_time": 2.031609061989002,
300
+ "cuda_time": 2.0315772705078126,
301
+ "batch_size": 16,
302
+ "new_tokens": 128,
303
+ "use_cuda_time": false,
304
+ "gpu_metrics": null
305
+ },
306
+ {
307
+ "wall_time": 2.030884437961504,
308
+ "cuda_time": 2.0308531494140625,
309
+ "batch_size": 16,
310
+ "new_tokens": 128,
311
+ "use_cuda_time": false,
312
+ "gpu_metrics": null
313
+ },
314
+ {
315
+ "wall_time": 2.039256637915969,
316
+ "cuda_time": 2.0392255859375,
317
+ "batch_size": 16,
318
+ "new_tokens": 128,
319
+ "use_cuda_time": false,
320
+ "gpu_metrics": null
321
+ },
322
+ {
323
+ "wall_time": 2.039441852015443,
324
+ "cuda_time": 2.0394091796875,
325
+ "batch_size": 16,
326
+ "new_tokens": 128,
327
+ "use_cuda_time": false,
328
+ "gpu_metrics": null
329
+ },
330
+ {
331
+ "wall_time": 2.028935077949427,
332
+ "cuda_time": 2.0289049072265626,
333
+ "batch_size": 16,
334
+ "new_tokens": 128,
335
+ "use_cuda_time": false,
336
+ "gpu_metrics": null
337
+ },
338
+ {
339
+ "wall_time": 2.0309303459944203,
340
+ "cuda_time": 2.0308995361328126,
341
+ "batch_size": 16,
342
+ "new_tokens": 128,
343
+ "use_cuda_time": false,
344
+ "gpu_metrics": null
345
+ },
346
+ {
347
+ "wall_time": 2.052516763098538,
348
+ "cuda_time": 2.052485107421875,
349
+ "batch_size": 16,
350
+ "new_tokens": 128,
351
+ "use_cuda_time": false,
352
+ "gpu_metrics": null
353
+ }
354
+ ]
355
+ },
356
+ "eager_None_compiled_vanilla_with_cache": {
357
+ "metadata": {
358
+ "timestamp": "2025-09-26T12:01:13.033342",
359
+ "commit_id": null,
360
+ "hardware_info": {
361
+ "gpu_name": "AMD Instinct Mi325X VF",
362
+ "gpu_memory_total_mb": 255.6875,
363
+ "python_version": "3.12.10",
364
+ "torch_version": "2.7.1+rocm6.4.1.git2a215e4a"
365
+ },
366
+ "config": {
367
+ "name": "eager_None_compiled_vanilla_with_cache",
368
+ "warmup_iterations": 5,
369
+ "measurement_iterations": 20,
370
+ "gpu_monitoring": false,
371
+ "batch_size": 16,
372
+ "sequence_length": 128,
373
+ "num_tokens_to_generate": 128,
374
+ "attn_implementation": "eager",
375
+ "use_cache": true,
376
+ "sdpa_backend": null,
377
+ "compilation": true,
378
+ "compile_mode": "max-autotune",
379
+ "compile_options": {},
380
+ "kernelize": false,
381
+ "device": "cuda",
382
+ "dtype": "torch.bfloat16"
383
+ }
384
+ },
385
+ "ttft": [
386
+ {
387
+ "wall_time": 0.07427955605089664,
388
+ "cuda_time": 0.07425287628173828,
389
+ "batch_size": 16,
390
+ "new_tokens": 1,
391
+ "use_cuda_time": false,
392
+ "gpu_metrics": null
393
+ },
394
+ {
395
+ "wall_time": 0.07495377992745489,
396
+ "cuda_time": 0.07493439483642578,
397
+ "batch_size": 16,
398
+ "new_tokens": 1,
399
+ "use_cuda_time": false,
400
+ "gpu_metrics": null
401
+ },
402
+ {
403
+ "wall_time": 0.07390031509567052,
404
+ "cuda_time": 0.07387985229492187,
405
+ "batch_size": 16,
406
+ "new_tokens": 1,
407
+ "use_cuda_time": false,
408
+ "gpu_metrics": null
409
+ },
410
+ {
411
+ "wall_time": 0.07471515703946352,
412
+ "cuda_time": 0.07469596099853516,
413
+ "batch_size": 16,
414
+ "new_tokens": 1,
415
+ "use_cuda_time": false,
416
+ "gpu_metrics": null
417
+ },
418
+ {
419
+ "wall_time": 0.07410621899180114,
420
+ "cuda_time": 0.07408612823486328,
421
+ "batch_size": 16,
422
+ "new_tokens": 1,
423
+ "use_cuda_time": false,
424
+ "gpu_metrics": null
425
+ },
426
+ {
427
+ "wall_time": 0.07513214694336057,
428
+ "cuda_time": 0.07511282348632813,
429
+ "batch_size": 16,
430
+ "new_tokens": 1,
431
+ "use_cuda_time": false,
432
+ "gpu_metrics": null
433
+ },
434
+ {
435
+ "wall_time": 0.07445675204508007,
436
+ "cuda_time": 0.07443669128417969,
437
+ "batch_size": 16,
438
+ "new_tokens": 1,
439
+ "use_cuda_time": false,
440
+ "gpu_metrics": null
441
+ },
442
+ {
443
+ "wall_time": 0.07454574899747968,
444
+ "cuda_time": 0.07452691650390625,
445
+ "batch_size": 16,
446
+ "new_tokens": 1,
447
+ "use_cuda_time": false,
448
+ "gpu_metrics": null
449
+ },
450
+ {
451
+ "wall_time": 0.07436268392484635,
452
+ "cuda_time": 0.07433811950683594,
453
+ "batch_size": 16,
454
+ "new_tokens": 1,
455
+ "use_cuda_time": false,
456
+ "gpu_metrics": null
457
+ },
458
+ {
459
+ "wall_time": 0.07478532497771084,
460
+ "cuda_time": 0.07476634979248047,
461
+ "batch_size": 16,
462
+ "new_tokens": 1,
463
+ "use_cuda_time": false,
464
+ "gpu_metrics": null
465
+ },
466
+ {
467
+ "wall_time": 0.07541929103899747,
468
+ "cuda_time": 0.075388427734375,
469
+ "batch_size": 16,
470
+ "new_tokens": 1,
471
+ "use_cuda_time": false,
472
+ "gpu_metrics": null
473
+ },
474
+ {
475
+ "wall_time": 0.07438250305131078,
476
+ "cuda_time": 0.07436360168457032,
477
+ "batch_size": 16,
478
+ "new_tokens": 1,
479
+ "use_cuda_time": false,
480
+ "gpu_metrics": null
481
+ },
482
+ {
483
+ "wall_time": 0.07475267606787384,
484
+ "cuda_time": 0.07473255157470703,
485
+ "batch_size": 16,
486
+ "new_tokens": 1,
487
+ "use_cuda_time": false,
488
+ "gpu_metrics": null
489
+ },
490
+ {
491
+ "wall_time": 0.07406081003136933,
492
+ "cuda_time": 0.07403617095947265,
493
+ "batch_size": 16,
494
+ "new_tokens": 1,
495
+ "use_cuda_time": false,
496
+ "gpu_metrics": null
497
+ },
498
+ {
499
+ "wall_time": 0.07498570007737726,
500
+ "cuda_time": 0.07496607208251953,
501
+ "batch_size": 16,
502
+ "new_tokens": 1,
503
+ "use_cuda_time": false,
504
+ "gpu_metrics": null
505
+ },
506
+ {
507
+ "wall_time": 0.07443700195290148,
508
+ "cuda_time": 0.07441815948486329,
509
+ "batch_size": 16,
510
+ "new_tokens": 1,
511
+ "use_cuda_time": false,
512
+ "gpu_metrics": null
513
+ },
514
+ {
515
+ "wall_time": 0.07433398498687893,
516
+ "cuda_time": 0.07422721099853516,
517
+ "batch_size": 16,
518
+ "new_tokens": 1,
519
+ "use_cuda_time": false,
520
+ "gpu_metrics": null
521
+ },
522
+ {
523
+ "wall_time": 0.07485785405151546,
524
+ "cuda_time": 0.07483879089355469,
525
+ "batch_size": 16,
526
+ "new_tokens": 1,
527
+ "use_cuda_time": false,
528
+ "gpu_metrics": null
529
+ },
530
+ {
531
+ "wall_time": 0.07518403592985123,
532
+ "cuda_time": 0.07516366577148438,
533
+ "batch_size": 16,
534
+ "new_tokens": 1,
535
+ "use_cuda_time": false,
536
+ "gpu_metrics": null
537
+ },
538
+ {
539
+ "wall_time": 0.0745964579982683,
540
+ "cuda_time": 0.07457791900634765,
541
+ "batch_size": 16,
542
+ "new_tokens": 1,
543
+ "use_cuda_time": false,
544
+ "gpu_metrics": null
545
+ }
546
+ ],
547
+ "tpot": [
548
+ {
549
+ "wall_time": 2.0495622069574893,
550
+ "cuda_time": 2.0495224609375,
551
+ "batch_size": 16,
552
+ "new_tokens": 128,
553
+ "use_cuda_time": false,
554
+ "gpu_metrics": null
555
+ },
556
+ {
557
+ "wall_time": 2.0417750619817525,
558
+ "cuda_time": 2.0417412109375,
559
+ "batch_size": 16,
560
+ "new_tokens": 128,
561
+ "use_cuda_time": false,
562
+ "gpu_metrics": null
563
+ },
564
+ {
565
+ "wall_time": 2.0438177799806,
566
+ "cuda_time": 2.043787109375,
567
+ "batch_size": 16,
568
+ "new_tokens": 128,
569
+ "use_cuda_time": false,
570
+ "gpu_metrics": null
571
+ },
572
+ {
573
+ "wall_time": 2.0555215378990397,
574
+ "cuda_time": 2.0554853515625,
575
+ "batch_size": 16,
576
+ "new_tokens": 128,
577
+ "use_cuda_time": false,
578
+ "gpu_metrics": null
579
+ },
580
+ {
581
+ "wall_time": 2.052577171009034,
582
+ "cuda_time": 2.05254443359375,
583
+ "batch_size": 16,
584
+ "new_tokens": 128,
585
+ "use_cuda_time": false,
586
+ "gpu_metrics": null
587
+ },
588
+ {
589
+ "wall_time": 2.057972935028374,
590
+ "cuda_time": 2.05794091796875,
591
+ "batch_size": 16,
592
+ "new_tokens": 128,
593
+ "use_cuda_time": false,
594
+ "gpu_metrics": null
595
+ },
596
+ {
597
+ "wall_time": 2.048640146967955,
598
+ "cuda_time": 2.048610107421875,
599
+ "batch_size": 16,
600
+ "new_tokens": 128,
601
+ "use_cuda_time": false,
602
+ "gpu_metrics": null
603
+ },
604
+ {
605
+ "wall_time": 2.0477576749399304,
606
+ "cuda_time": 2.047728759765625,
607
+ "batch_size": 16,
608
+ "new_tokens": 128,
609
+ "use_cuda_time": false,
610
+ "gpu_metrics": null
611
+ },
612
+ {
613
+ "wall_time": 2.043742422014475,
614
+ "cuda_time": 2.0437120361328125,
615
+ "batch_size": 16,
616
+ "new_tokens": 128,
617
+ "use_cuda_time": false,
618
+ "gpu_metrics": null
619
+ },
620
+ {
621
+ "wall_time": 2.066631429013796,
622
+ "cuda_time": 2.066598388671875,
623
+ "batch_size": 16,
624
+ "new_tokens": 128,
625
+ "use_cuda_time": false,
626
+ "gpu_metrics": null
627
+ },
628
+ {
629
+ "wall_time": 2.058080272981897,
630
+ "cuda_time": 2.058049072265625,
631
+ "batch_size": 16,
632
+ "new_tokens": 128,
633
+ "use_cuda_time": false,
634
+ "gpu_metrics": null
635
+ },
636
+ {
637
+ "wall_time": 2.0463295759400353,
638
+ "cuda_time": 2.0462979736328126,
639
+ "batch_size": 16,
640
+ "new_tokens": 128,
641
+ "use_cuda_time": false,
642
+ "gpu_metrics": null
643
+ },
644
+ {
645
+ "wall_time": 1.871039719088003,
646
+ "cuda_time": 1.871006103515625,
647
+ "batch_size": 16,
648
+ "new_tokens": 128,
649
+ "use_cuda_time": false,
650
+ "gpu_metrics": null
651
+ },
652
+ {
653
+ "wall_time": 1.9884425880154595,
654
+ "cuda_time": 1.9884063720703125,
655
+ "batch_size": 16,
656
+ "new_tokens": 128,
657
+ "use_cuda_time": false,
658
+ "gpu_metrics": null
659
+ },
660
+ {
661
+ "wall_time": 2.060368453967385,
662
+ "cuda_time": 2.060333984375,
663
+ "batch_size": 16,
664
+ "new_tokens": 128,
665
+ "use_cuda_time": false,
666
+ "gpu_metrics": null
667
+ },
668
+ {
669
+ "wall_time": 2.070050645968877,
670
+ "cuda_time": 2.070018798828125,
671
+ "batch_size": 16,
672
+ "new_tokens": 128,
673
+ "use_cuda_time": false,
674
+ "gpu_metrics": null
675
+ },
676
+ {
677
+ "wall_time": 2.0624818300129846,
678
+ "cuda_time": 2.062452880859375,
679
+ "batch_size": 16,
680
+ "new_tokens": 128,
681
+ "use_cuda_time": false,
682
+ "gpu_metrics": null
683
+ },
684
+ {
685
+ "wall_time": 2.072273188037798,
686
+ "cuda_time": 2.0722421875,
687
+ "batch_size": 16,
688
+ "new_tokens": 128,
689
+ "use_cuda_time": false,
690
+ "gpu_metrics": null
691
+ },
692
+ {
693
+ "wall_time": 2.0667856170330197,
694
+ "cuda_time": 2.06675244140625,
695
+ "batch_size": 16,
696
+ "new_tokens": 128,
697
+ "use_cuda_time": false,
698
+ "gpu_metrics": null
699
+ },
700
+ {
701
+ "wall_time": 2.039454172947444,
702
+ "cuda_time": 2.0394228515625,
703
+ "batch_size": 16,
704
+ "new_tokens": 128,
705
+ "use_cuda_time": false,
706
+ "gpu_metrics": null
707
+ }
708
+ ]
709
+ },
710
+ "sdpa_None_uncompiled_vanilla_with_cache": {
711
+ "metadata": {
712
+ "timestamp": "2025-09-26T12:01:57.531459",
713
+ "commit_id": null,
714
+ "hardware_info": {
715
+ "gpu_name": "AMD Instinct Mi325X VF",
716
+ "gpu_memory_total_mb": 255.6875,
717
+ "python_version": "3.12.10",
718
+ "torch_version": "2.7.1+rocm6.4.1.git2a215e4a"
719
+ },
720
+ "config": {
721
+ "name": "sdpa_None_uncompiled_vanilla_with_cache",
722
+ "warmup_iterations": 5,
723
+ "measurement_iterations": 20,
724
+ "gpu_monitoring": false,
725
+ "batch_size": 16,
726
+ "sequence_length": 128,
727
+ "num_tokens_to_generate": 128,
728
+ "attn_implementation": "sdpa",
729
+ "use_cache": true,
730
+ "sdpa_backend": null,
731
+ "compilation": false,
732
+ "compile_mode": null,
733
+ "compile_options": {},
734
+ "kernelize": false,
735
+ "device": "cuda",
736
+ "dtype": "torch.bfloat16"
737
+ }
738
+ },
739
+ "ttft": [
740
+ {
741
+ "wall_time": 0.06973504310008138,
742
+ "cuda_time": 0.06970318603515625,
743
+ "batch_size": 16,
744
+ "new_tokens": 1,
745
+ "use_cuda_time": false,
746
+ "gpu_metrics": null
747
+ },
748
+ {
749
+ "wall_time": 0.06962730409577489,
750
+ "cuda_time": 0.069607666015625,
751
+ "batch_size": 16,
752
+ "new_tokens": 1,
753
+ "use_cuda_time": false,
754
+ "gpu_metrics": null
755
+ },
756
+ {
757
+ "wall_time": 0.06969393300823867,
758
+ "cuda_time": 0.069668701171875,
759
+ "batch_size": 16,
760
+ "new_tokens": 1,
761
+ "use_cuda_time": false,
762
+ "gpu_metrics": null
763
+ },
764
+ {
765
+ "wall_time": 0.07013454497791827,
766
+ "cuda_time": 0.07011565399169922,
767
+ "batch_size": 16,
768
+ "new_tokens": 1,
769
+ "use_cuda_time": false,
770
+ "gpu_metrics": null
771
+ },
772
+ {
773
+ "wall_time": 0.06957653688732535,
774
+ "cuda_time": 0.06955630493164063,
775
+ "batch_size": 16,
776
+ "new_tokens": 1,
777
+ "use_cuda_time": false,
778
+ "gpu_metrics": null
779
+ },
780
+ {
781
+ "wall_time": 0.06952459702733904,
782
+ "cuda_time": 0.06950542449951172,
783
+ "batch_size": 16,
784
+ "new_tokens": 1,
785
+ "use_cuda_time": false,
786
+ "gpu_metrics": null
787
+ },
788
+ {
789
+ "wall_time": 0.0701530339429155,
790
+ "cuda_time": 0.07013269805908204,
791
+ "batch_size": 16,
792
+ "new_tokens": 1,
793
+ "use_cuda_time": false,
794
+ "gpu_metrics": null
795
+ },
796
+ {
797
+ "wall_time": 0.06968465296085924,
798
+ "cuda_time": 0.06966370391845703,
799
+ "batch_size": 16,
800
+ "new_tokens": 1,
801
+ "use_cuda_time": false,
802
+ "gpu_metrics": null
803
+ },
804
+ {
805
+ "wall_time": 0.06970080395694822,
806
+ "cuda_time": 0.06968074035644531,
807
+ "batch_size": 16,
808
+ "new_tokens": 1,
809
+ "use_cuda_time": false,
810
+ "gpu_metrics": null
811
+ },
812
+ {
813
+ "wall_time": 0.0705471959663555,
814
+ "cuda_time": 0.07052652740478516,
815
+ "batch_size": 16,
816
+ "new_tokens": 1,
817
+ "use_cuda_time": false,
818
+ "gpu_metrics": null
819
+ },
820
+ {
821
+ "wall_time": 0.06941466010175645,
822
+ "cuda_time": 0.06938239288330078,
823
+ "batch_size": 16,
824
+ "new_tokens": 1,
825
+ "use_cuda_time": false,
826
+ "gpu_metrics": null
827
+ },
828
+ {
829
+ "wall_time": 0.06946662906557322,
830
+ "cuda_time": 0.06944795227050782,
831
+ "batch_size": 16,
832
+ "new_tokens": 1,
833
+ "use_cuda_time": false,
834
+ "gpu_metrics": null
835
+ },
836
+ {
837
+ "wall_time": 0.07021664292551577,
838
+ "cuda_time": 0.0701964569091797,
839
+ "batch_size": 16,
840
+ "new_tokens": 1,
841
+ "use_cuda_time": false,
842
+ "gpu_metrics": null
843
+ },
844
+ {
845
+ "wall_time": 0.0697313129203394,
846
+ "cuda_time": 0.0697121810913086,
847
+ "batch_size": 16,
848
+ "new_tokens": 1,
849
+ "use_cuda_time": false,
850
+ "gpu_metrics": null
851
+ },
852
+ {
853
+ "wall_time": 0.07019988296087831,
854
+ "cuda_time": 0.07017969512939454,
855
+ "batch_size": 16,
856
+ "new_tokens": 1,
857
+ "use_cuda_time": false,
858
+ "gpu_metrics": null
859
+ },
860
+ {
861
+ "wall_time": 0.06973098393063992,
862
+ "cuda_time": 0.06971258544921875,
863
+ "batch_size": 16,
864
+ "new_tokens": 1,
865
+ "use_cuda_time": false,
866
+ "gpu_metrics": null
867
+ },
868
+ {
869
+ "wall_time": 0.06996345904190093,
870
+ "cuda_time": 0.06994325256347657,
871
+ "batch_size": 16,
872
+ "new_tokens": 1,
873
+ "use_cuda_time": false,
874
+ "gpu_metrics": null
875
+ },
876
+ {
877
+ "wall_time": 0.0691890650196001,
878
+ "cuda_time": 0.06917007446289063,
879
+ "batch_size": 16,
880
+ "new_tokens": 1,
881
+ "use_cuda_time": false,
882
+ "gpu_metrics": null
883
+ },
884
+ {
885
+ "wall_time": 0.07019514299463481,
886
+ "cuda_time": 0.07002085876464843,
887
+ "batch_size": 16,
888
+ "new_tokens": 1,
889
+ "use_cuda_time": false,
890
+ "gpu_metrics": null
891
+ },
892
+ {
893
+ "wall_time": 0.06993868900462985,
894
+ "cuda_time": 0.06991465759277343,
895
+ "batch_size": 16,
896
+ "new_tokens": 1,
897
+ "use_cuda_time": false,
898
+ "gpu_metrics": null
899
+ }
900
+ ],
901
+ "tpot": [
902
+ {
903
+ "wall_time": 1.568886692984961,
904
+ "cuda_time": 1.568811279296875,
905
+ "batch_size": 16,
906
+ "new_tokens": 128,
907
+ "use_cuda_time": false,
908
+ "gpu_metrics": null
909
+ },
910
+ {
911
+ "wall_time": 1.5646536540007219,
912
+ "cuda_time": 1.564624755859375,
913
+ "batch_size": 16,
914
+ "new_tokens": 128,
915
+ "use_cuda_time": false,
916
+ "gpu_metrics": null
917
+ },
918
+ {
919
+ "wall_time": 1.569601257913746,
920
+ "cuda_time": 1.5695728759765626,
921
+ "batch_size": 16,
922
+ "new_tokens": 128,
923
+ "use_cuda_time": false,
924
+ "gpu_metrics": null
925
+ },
926
+ {
927
+ "wall_time": 1.56203376094345,
928
+ "cuda_time": 1.5620035400390626,
929
+ "batch_size": 16,
930
+ "new_tokens": 128,
931
+ "use_cuda_time": false,
932
+ "gpu_metrics": null
933
+ },
934
+ {
935
+ "wall_time": 1.5592194920172915,
936
+ "cuda_time": 1.5591905517578124,
937
+ "batch_size": 16,
938
+ "new_tokens": 128,
939
+ "use_cuda_time": false,
940
+ "gpu_metrics": null
941
+ },
942
+ {
943
+ "wall_time": 1.5600104450713843,
944
+ "cuda_time": 1.5599837646484376,
945
+ "batch_size": 16,
946
+ "new_tokens": 128,
947
+ "use_cuda_time": false,
948
+ "gpu_metrics": null
949
+ },
950
+ {
951
+ "wall_time": 1.556260335026309,
952
+ "cuda_time": 1.5562335205078126,
953
+ "batch_size": 16,
954
+ "new_tokens": 128,
955
+ "use_cuda_time": false,
956
+ "gpu_metrics": null
957
+ },
958
+ {
959
+ "wall_time": 1.5623370950343087,
960
+ "cuda_time": 1.5623018798828125,
961
+ "batch_size": 16,
962
+ "new_tokens": 128,
963
+ "use_cuda_time": false,
964
+ "gpu_metrics": null
965
+ },
966
+ {
967
+ "wall_time": 1.56083326600492,
968
+ "cuda_time": 1.5608055419921876,
969
+ "batch_size": 16,
970
+ "new_tokens": 128,
971
+ "use_cuda_time": false,
972
+ "gpu_metrics": null
973
+ },
974
+ {
975
+ "wall_time": 1.561068672919646,
976
+ "cuda_time": 1.561042724609375,
977
+ "batch_size": 16,
978
+ "new_tokens": 128,
979
+ "use_cuda_time": false,
980
+ "gpu_metrics": null
981
+ },
982
+ {
983
+ "wall_time": 1.5576978439930826,
984
+ "cuda_time": 1.557672119140625,
985
+ "batch_size": 16,
986
+ "new_tokens": 128,
987
+ "use_cuda_time": false,
988
+ "gpu_metrics": null
989
+ },
990
+ {
991
+ "wall_time": 1.5631060280138627,
992
+ "cuda_time": 1.56307666015625,
993
+ "batch_size": 16,
994
+ "new_tokens": 128,
995
+ "use_cuda_time": false,
996
+ "gpu_metrics": null
997
+ },
998
+ {
999
+ "wall_time": 1.564229413983412,
1000
+ "cuda_time": 1.5642041015625,
1001
+ "batch_size": 16,
1002
+ "new_tokens": 128,
1003
+ "use_cuda_time": false,
1004
+ "gpu_metrics": null
1005
+ },
1006
+ {
1007
+ "wall_time": 1.5635379790328443,
1008
+ "cuda_time": 1.563511474609375,
1009
+ "batch_size": 16,
1010
+ "new_tokens": 128,
1011
+ "use_cuda_time": false,
1012
+ "gpu_metrics": null
1013
+ },
1014
+ {
1015
+ "wall_time": 1.5589708760380745,
1016
+ "cuda_time": 1.55894482421875,
1017
+ "batch_size": 16,
1018
+ "new_tokens": 128,
1019
+ "use_cuda_time": false,
1020
+ "gpu_metrics": null
1021
+ },
1022
+ {
1023
+ "wall_time": 1.5711359959095716,
1024
+ "cuda_time": 1.5711026611328125,
1025
+ "batch_size": 16,
1026
+ "new_tokens": 128,
1027
+ "use_cuda_time": false,
1028
+ "gpu_metrics": null
1029
+ },
1030
+ {
1031
+ "wall_time": 1.5941198619548231,
1032
+ "cuda_time": 1.5940926513671876,
1033
+ "batch_size": 16,
1034
+ "new_tokens": 128,
1035
+ "use_cuda_time": false,
1036
+ "gpu_metrics": null
1037
+ },
1038
+ {
1039
+ "wall_time": 1.5766646160045639,
1040
+ "cuda_time": 1.5766334228515626,
1041
+ "batch_size": 16,
1042
+ "new_tokens": 128,
1043
+ "use_cuda_time": false,
1044
+ "gpu_metrics": null
1045
+ },
1046
+ {
1047
+ "wall_time": 1.579302111058496,
1048
+ "cuda_time": 1.57926708984375,
1049
+ "batch_size": 16,
1050
+ "new_tokens": 128,
1051
+ "use_cuda_time": false,
1052
+ "gpu_metrics": null
1053
+ },
1054
+ {
1055
+ "wall_time": 1.5677615479798988,
1056
+ "cuda_time": 1.567731689453125,
1057
+ "batch_size": 16,
1058
+ "new_tokens": 128,
1059
+ "use_cuda_time": false,
1060
+ "gpu_metrics": null
1061
+ }
1062
+ ]
1063
+ },
1064
+ "sdpa_None_compiled_vanilla_with_cache": {
1065
+ "metadata": {
1066
+ "timestamp": "2025-09-26T12:02:42.414361",
1067
+ "commit_id": null,
1068
+ "hardware_info": {
1069
+ "gpu_name": "AMD Instinct Mi325X VF",
1070
+ "gpu_memory_total_mb": 255.6875,
1071
+ "python_version": "3.12.10",
1072
+ "torch_version": "2.7.1+rocm6.4.1.git2a215e4a"
1073
+ },
1074
+ "config": {
1075
+ "name": "sdpa_None_compiled_vanilla_with_cache",
1076
+ "warmup_iterations": 5,
1077
+ "measurement_iterations": 20,
1078
+ "gpu_monitoring": false,
1079
+ "batch_size": 16,
1080
+ "sequence_length": 128,
1081
+ "num_tokens_to_generate": 128,
1082
+ "attn_implementation": "sdpa",
1083
+ "use_cache": true,
1084
+ "sdpa_backend": null,
1085
+ "compilation": true,
1086
+ "compile_mode": "max-autotune",
1087
+ "compile_options": {},
1088
+ "kernelize": false,
1089
+ "device": "cuda",
1090
+ "dtype": "torch.bfloat16"
1091
+ }
1092
+ },
1093
+ "ttft": [
1094
+ {
1095
+ "wall_time": 0.06976361200213432,
1096
+ "cuda_time": 0.06957430267333985,
1097
+ "batch_size": 16,
1098
+ "new_tokens": 1,
1099
+ "use_cuda_time": false,
1100
+ "gpu_metrics": null
1101
+ },
1102
+ {
1103
+ "wall_time": 0.06963270506821573,
1104
+ "cuda_time": 0.06961402130126954,
1105
+ "batch_size": 16,
1106
+ "new_tokens": 1,
1107
+ "use_cuda_time": false,
1108
+ "gpu_metrics": null
1109
+ },
1110
+ {
1111
+ "wall_time": 0.06991932890377939,
1112
+ "cuda_time": 0.06972110748291016,
1113
+ "batch_size": 16,
1114
+ "new_tokens": 1,
1115
+ "use_cuda_time": false,
1116
+ "gpu_metrics": null
1117
+ },
1118
+ {
1119
+ "wall_time": 0.06952144694514573,
1120
+ "cuda_time": 0.06950206756591797,
1121
+ "batch_size": 16,
1122
+ "new_tokens": 1,
1123
+ "use_cuda_time": false,
1124
+ "gpu_metrics": null
1125
+ },
1126
+ {
1127
+ "wall_time": 0.06935929099563509,
1128
+ "cuda_time": 0.06933879089355469,
1129
+ "batch_size": 16,
1130
+ "new_tokens": 1,
1131
+ "use_cuda_time": false,
1132
+ "gpu_metrics": null
1133
+ },
1134
+ {
1135
+ "wall_time": 0.06907697801943868,
1136
+ "cuda_time": 0.06905767822265625,
1137
+ "batch_size": 16,
1138
+ "new_tokens": 1,
1139
+ "use_cuda_time": false,
1140
+ "gpu_metrics": null
1141
+ },
1142
+ {
1143
+ "wall_time": 0.06930454296525568,
1144
+ "cuda_time": 0.06927911376953125,
1145
+ "batch_size": 16,
1146
+ "new_tokens": 1,
1147
+ "use_cuda_time": false,
1148
+ "gpu_metrics": null
1149
+ },
1150
+ {
1151
+ "wall_time": 0.06966740405187011,
1152
+ "cuda_time": 0.06963846588134766,
1153
+ "batch_size": 16,
1154
+ "new_tokens": 1,
1155
+ "use_cuda_time": false,
1156
+ "gpu_metrics": null
1157
+ },
1158
+ {
1159
+ "wall_time": 0.06912526697851717,
1160
+ "cuda_time": 0.06909512329101562,
1161
+ "batch_size": 16,
1162
+ "new_tokens": 1,
1163
+ "use_cuda_time": false,
1164
+ "gpu_metrics": null
1165
+ },
1166
+ {
1167
+ "wall_time": 0.06950624799355865,
1168
+ "cuda_time": 0.06948722839355469,
1169
+ "batch_size": 16,
1170
+ "new_tokens": 1,
1171
+ "use_cuda_time": false,
1172
+ "gpu_metrics": null
1173
+ },
1174
+ {
1175
+ "wall_time": 0.06985958106815815,
1176
+ "cuda_time": 0.06983929443359375,
1177
+ "batch_size": 16,
1178
+ "new_tokens": 1,
1179
+ "use_cuda_time": false,
1180
+ "gpu_metrics": null
1181
+ },
1182
+ {
1183
+ "wall_time": 0.06962528603617102,
1184
+ "cuda_time": 0.06960670471191406,
1185
+ "batch_size": 16,
1186
+ "new_tokens": 1,
1187
+ "use_cuda_time": false,
1188
+ "gpu_metrics": null
1189
+ },
1190
+ {
1191
+ "wall_time": 0.06962068600114435,
1192
+ "cuda_time": 0.069595703125,
1193
+ "batch_size": 16,
1194
+ "new_tokens": 1,
1195
+ "use_cuda_time": false,
1196
+ "gpu_metrics": null
1197
+ },
1198
+ {
1199
+ "wall_time": 0.06950531899929047,
1200
+ "cuda_time": 0.06948622131347656,
1201
+ "batch_size": 16,
1202
+ "new_tokens": 1,
1203
+ "use_cuda_time": false,
1204
+ "gpu_metrics": null
1205
+ },
1206
+ {
1207
+ "wall_time": 0.06971742305904627,
1208
+ "cuda_time": 0.06969742584228515,
1209
+ "batch_size": 16,
1210
+ "new_tokens": 1,
1211
+ "use_cuda_time": false,
1212
+ "gpu_metrics": null
1213
+ },
1214
+ {
1215
+ "wall_time": 0.07027899206150323,
1216
+ "cuda_time": 0.07026025390625,
1217
+ "batch_size": 16,
1218
+ "new_tokens": 1,
1219
+ "use_cuda_time": false,
1220
+ "gpu_metrics": null
1221
+ },
1222
+ {
1223
+ "wall_time": 0.06971817300654948,
1224
+ "cuda_time": 0.06969794464111329,
1225
+ "batch_size": 16,
1226
+ "new_tokens": 1,
1227
+ "use_cuda_time": false,
1228
+ "gpu_metrics": null
1229
+ },
1230
+ {
1231
+ "wall_time": 0.06987840996589512,
1232
+ "cuda_time": 0.06985985565185547,
1233
+ "batch_size": 16,
1234
+ "new_tokens": 1,
1235
+ "use_cuda_time": false,
1236
+ "gpu_metrics": null
1237
+ },
1238
+ {
1239
+ "wall_time": 0.07004970591515303,
1240
+ "cuda_time": 0.07003005981445312,
1241
+ "batch_size": 16,
1242
+ "new_tokens": 1,
1243
+ "use_cuda_time": false,
1244
+ "gpu_metrics": null
1245
+ },
1246
+ {
1247
+ "wall_time": 0.07010770495980978,
1248
+ "cuda_time": 0.07008773803710938,
1249
+ "batch_size": 16,
1250
+ "new_tokens": 1,
1251
+ "use_cuda_time": false,
1252
+ "gpu_metrics": null
1253
+ }
1254
+ ],
1255
+ "tpot": [
1256
+ {
1257
+ "wall_time": 1.5897287370171398,
1258
+ "cuda_time": 1.589665771484375,
1259
+ "batch_size": 16,
1260
+ "new_tokens": 128,
1261
+ "use_cuda_time": false,
1262
+ "gpu_metrics": null
1263
+ },
1264
+ {
1265
+ "wall_time": 1.586211972986348,
1266
+ "cuda_time": 1.5861837158203125,
1267
+ "batch_size": 16,
1268
+ "new_tokens": 128,
1269
+ "use_cuda_time": false,
1270
+ "gpu_metrics": null
1271
+ },
1272
+ {
1273
+ "wall_time": 1.5882492290111259,
1274
+ "cuda_time": 1.5882193603515624,
1275
+ "batch_size": 16,
1276
+ "new_tokens": 128,
1277
+ "use_cuda_time": false,
1278
+ "gpu_metrics": null
1279
+ },
1280
+ {
1281
+ "wall_time": 1.5917098950594664,
1282
+ "cuda_time": 1.5916837158203125,
1283
+ "batch_size": 16,
1284
+ "new_tokens": 128,
1285
+ "use_cuda_time": false,
1286
+ "gpu_metrics": null
1287
+ },
1288
+ {
1289
+ "wall_time": 1.6004555160179734,
1290
+ "cuda_time": 1.600423828125,
1291
+ "batch_size": 16,
1292
+ "new_tokens": 128,
1293
+ "use_cuda_time": false,
1294
+ "gpu_metrics": null
1295
+ },
1296
+ {
1297
+ "wall_time": 1.5869074059883133,
1298
+ "cuda_time": 1.586880859375,
1299
+ "batch_size": 16,
1300
+ "new_tokens": 128,
1301
+ "use_cuda_time": false,
1302
+ "gpu_metrics": null
1303
+ },
1304
+ {
1305
+ "wall_time": 1.5914721400476992,
1306
+ "cuda_time": 1.5914462890625,
1307
+ "batch_size": 16,
1308
+ "new_tokens": 128,
1309
+ "use_cuda_time": false,
1310
+ "gpu_metrics": null
1311
+ },
1312
+ {
1313
+ "wall_time": 1.5851828149752691,
1314
+ "cuda_time": 1.58515673828125,
1315
+ "batch_size": 16,
1316
+ "new_tokens": 128,
1317
+ "use_cuda_time": false,
1318
+ "gpu_metrics": null
1319
+ },
1320
+ {
1321
+ "wall_time": 1.5776644350262359,
1322
+ "cuda_time": 1.5776337890625,
1323
+ "batch_size": 16,
1324
+ "new_tokens": 128,
1325
+ "use_cuda_time": false,
1326
+ "gpu_metrics": null
1327
+ },
1328
+ {
1329
+ "wall_time": 1.5777849049773067,
1330
+ "cuda_time": 1.577759521484375,
1331
+ "batch_size": 16,
1332
+ "new_tokens": 128,
1333
+ "use_cuda_time": false,
1334
+ "gpu_metrics": null
1335
+ },
1336
+ {
1337
+ "wall_time": 1.5800251649925485,
1338
+ "cuda_time": 1.5799962158203125,
1339
+ "batch_size": 16,
1340
+ "new_tokens": 128,
1341
+ "use_cuda_time": false,
1342
+ "gpu_metrics": null
1343
+ },
1344
+ {
1345
+ "wall_time": 1.5810497630154714,
1346
+ "cuda_time": 1.5809893798828125,
1347
+ "batch_size": 16,
1348
+ "new_tokens": 128,
1349
+ "use_cuda_time": false,
1350
+ "gpu_metrics": null
1351
+ },
1352
+ {
1353
+ "wall_time": 1.5747365789720789,
1354
+ "cuda_time": 1.57470703125,
1355
+ "batch_size": 16,
1356
+ "new_tokens": 128,
1357
+ "use_cuda_time": false,
1358
+ "gpu_metrics": null
1359
+ },
1360
+ {
1361
+ "wall_time": 1.5829719919711351,
1362
+ "cuda_time": 1.5828975830078125,
1363
+ "batch_size": 16,
1364
+ "new_tokens": 128,
1365
+ "use_cuda_time": false,
1366
+ "gpu_metrics": null
1367
+ },
1368
+ {
1369
+ "wall_time": 1.5985179379349574,
1370
+ "cuda_time": 1.5984254150390624,
1371
+ "batch_size": 16,
1372
+ "new_tokens": 128,
1373
+ "use_cuda_time": false,
1374
+ "gpu_metrics": null
1375
+ },
1376
+ {
1377
+ "wall_time": 1.5925404160516337,
1378
+ "cuda_time": 1.5925126953125,
1379
+ "batch_size": 16,
1380
+ "new_tokens": 128,
1381
+ "use_cuda_time": false,
1382
+ "gpu_metrics": null
1383
+ },
1384
+ {
1385
+ "wall_time": 1.5975769789656624,
1386
+ "cuda_time": 1.5975234375,
1387
+ "batch_size": 16,
1388
+ "new_tokens": 128,
1389
+ "use_cuda_time": false,
1390
+ "gpu_metrics": null
1391
+ },
1392
+ {
1393
+ "wall_time": 1.5931882929289714,
1394
+ "cuda_time": 1.593160400390625,
1395
+ "batch_size": 16,
1396
+ "new_tokens": 128,
1397
+ "use_cuda_time": false,
1398
+ "gpu_metrics": null
1399
+ },
1400
+ {
1401
+ "wall_time": 1.5850070579908788,
1402
+ "cuda_time": 1.5849803466796875,
1403
+ "batch_size": 16,
1404
+ "new_tokens": 128,
1405
+ "use_cuda_time": false,
1406
+ "gpu_metrics": null
1407
+ },
1408
+ {
1409
+ "wall_time": 1.5830075700068846,
1410
+ "cuda_time": 1.5829818115234375,
1411
+ "batch_size": 16,
1412
+ "new_tokens": 128,
1413
+ "use_cuda_time": false,
1414
+ "gpu_metrics": null
1415
+ }
1416
+ ]
1417
+ }
1418
+ }
data.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import numpy as np
3
+
4
+
5
+ def estimate_from_measures(measures: list[float], estimator: str) -> float:
6
+ if estimator == "median":
7
+ return float(np.median(measures))
8
+ elif estimator == "mean":
9
+ return float(np.mean(measures))
10
+ raise ValueError(f"Invalid estimator: {estimator}")
11
+
12
+
13
+ class ModelBenchmarkData:
14
+
15
+ def __init__(self, json_path: str) -> None:
16
+ with open(json_path, "r") as f:
17
+ self.data = json.load(f)
18
+
19
+ def get_ttft_tpot_data(self, model_name: str, estimator: str = "median", use_cuda_time: bool = False) -> dict:
20
+ data_points = []
21
+ time_key = "cuda_time" if use_cuda_time else "wall_time"
22
+ for cfg_name, data in self.data.items():
23
+ x_measures = [d[time_key] for d in data["ttft"]]
24
+ y_measures = [d[time_key] for d in data["tpot"]]
25
+ data_points.append({
26
+ "x": estimate_from_measures(x_measures, estimator),
27
+ "y": estimate_from_measures(y_measures, estimator),
28
+ "label": cfg_name,
29
+ })
30
+ return data_points