ilessio-aiflowlab commited on
Commit
cebf12c
·
verified ·
1 Parent(s): a99be3a

Upload benchmarks/bench_09_multi_gpu.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. benchmarks/bench_09_multi_gpu.json +234 -0
benchmarks/bench_09_multi_gpu.json ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "benchmark": "multi_gpu",
3
+ "timestamp": "2026-03-19T12:21:06.000529+00:00",
4
+ "n_gpus_available": 4,
5
+ "gpu_names": [
6
+ "NVIDIA L4",
7
+ "NVIDIA L4",
8
+ "NVIDIA L4",
9
+ "NVIDIA L4"
10
+ ],
11
+ "inference": {
12
+ "gpu_1": {
13
+ "n_gpus": 1,
14
+ "batch_results": {
15
+ "batch_1": {
16
+ "p50_ms": 128.19,
17
+ "p95_ms": 130.86,
18
+ "mean_ms": 128.42,
19
+ "fps": 7.8,
20
+ "per_sample_ms": 128.42
21
+ },
22
+ "batch_4": {
23
+ "p50_ms": 430.18,
24
+ "p95_ms": 440.71,
25
+ "mean_ms": 429.76,
26
+ "fps": 9.3,
27
+ "per_sample_ms": 107.44
28
+ },
29
+ "batch_8": {
30
+ "p50_ms": 853.99,
31
+ "p95_ms": 873.4,
32
+ "mean_ms": 857.72,
33
+ "fps": 9.3,
34
+ "per_sample_ms": 107.21
35
+ },
36
+ "batch_16": {
37
+ "p50_ms": 1734.01,
38
+ "p95_ms": 1759.55,
39
+ "mean_ms": 1727.97,
40
+ "fps": 9.3,
41
+ "per_sample_ms": 108.0
42
+ }
43
+ },
44
+ "memory": {
45
+ "gpu_0_allocated_gb": 3.65,
46
+ "gpu_0_reserved_gb": 5.07
47
+ }
48
+ },
49
+ "gpu_2": {
50
+ "n_gpus": 2,
51
+ "batch_results": {
52
+ "batch_1": {
53
+ "p50_ms": 162.23,
54
+ "p95_ms": 168.32,
55
+ "mean_ms": 164.65,
56
+ "fps": 6.1,
57
+ "per_sample_ms": 164.65
58
+ },
59
+ "batch_4": {
60
+ "p50_ms": 611.69,
61
+ "p95_ms": 613.66,
62
+ "mean_ms": 611.27,
63
+ "fps": 6.5,
64
+ "per_sample_ms": 152.82
65
+ },
66
+ "batch_8": {
67
+ "p50_ms": 799.07,
68
+ "p95_ms": 802.8,
69
+ "mean_ms": 799.2,
70
+ "fps": 10.0,
71
+ "per_sample_ms": 99.9
72
+ },
73
+ "batch_16": {
74
+ "p50_ms": 1185.3,
75
+ "p95_ms": 1190.2,
76
+ "mean_ms": 1184.72,
77
+ "fps": 13.5,
78
+ "per_sample_ms": 74.04
79
+ }
80
+ },
81
+ "memory": {
82
+ "gpu_0_allocated_gb": 3.66,
83
+ "gpu_0_reserved_gb": 4.47,
84
+ "gpu_1_allocated_gb": 0.01,
85
+ "gpu_1_reserved_gb": 4.47
86
+ }
87
+ },
88
+ "gpu_4": {
89
+ "n_gpus": 4,
90
+ "batch_results": {
91
+ "batch_1": {
92
+ "p50_ms": 164.7,
93
+ "p95_ms": 171.49,
94
+ "mean_ms": 167.37,
95
+ "fps": 6.0,
96
+ "per_sample_ms": 167.37
97
+ },
98
+ "batch_4": {
99
+ "p50_ms": 913.4,
100
+ "p95_ms": 915.7,
101
+ "mean_ms": 912.74,
102
+ "fps": 4.4,
103
+ "per_sample_ms": 228.19
104
+ },
105
+ "batch_8": {
106
+ "p50_ms": 1003.53,
107
+ "p95_ms": 1007.34,
108
+ "mean_ms": 1002.41,
109
+ "fps": 8.0,
110
+ "per_sample_ms": 125.3
111
+ },
112
+ "batch_16": {
113
+ "p50_ms": 1178.17,
114
+ "p95_ms": 1182.78,
115
+ "mean_ms": 1178.2,
116
+ "fps": 13.6,
117
+ "per_sample_ms": 73.64
118
+ }
119
+ },
120
+ "memory": {
121
+ "gpu_0_allocated_gb": 3.66,
122
+ "gpu_0_reserved_gb": 4.49,
123
+ "gpu_1_allocated_gb": 0.01,
124
+ "gpu_1_reserved_gb": 4.16,
125
+ "gpu_2_allocated_gb": 0.01,
126
+ "gpu_2_reserved_gb": 4.16,
127
+ "gpu_3_allocated_gb": 0.01,
128
+ "gpu_3_reserved_gb": 4.16
129
+ }
130
+ }
131
+ },
132
+ "training": {
133
+ "gpu_1": {
134
+ "n_gpus": 1,
135
+ "batch_size": 2,
136
+ "n_steps": 30,
137
+ "step_time_mean_ms": 432.4,
138
+ "steps_per_sec": 2.31,
139
+ "samples_per_sec": 4.63,
140
+ "loss_start": 4.0196,
141
+ "loss_end": 1.7553,
142
+ "loss_reduction_pct": 56.3,
143
+ "memory": {
144
+ "gpu_0_peak_gb": 9.0
145
+ }
146
+ },
147
+ "gpu_2": {
148
+ "n_gpus": 2,
149
+ "batch_size": 4,
150
+ "n_steps": 30,
151
+ "step_time_mean_ms": 1264.8,
152
+ "steps_per_sec": 0.79,
153
+ "samples_per_sec": 3.16,
154
+ "loss_start": 1.3165,
155
+ "loss_end": 1.4857,
156
+ "loss_reduction_pct": -12.9,
157
+ "memory": {
158
+ "gpu_0_peak_gb": 14.59,
159
+ "gpu_1_peak_gb": 4.07
160
+ }
161
+ },
162
+ "gpu_4": {
163
+ "n_gpus": 4,
164
+ "batch_size": 8,
165
+ "n_steps": 30,
166
+ "step_time_mean_ms": 2005.1,
167
+ "steps_per_sec": 0.5,
168
+ "samples_per_sec": 3.99,
169
+ "loss_start": 6.7918,
170
+ "loss_end": 1.182,
171
+ "loss_reduction_pct": 82.6,
172
+ "memory": {
173
+ "gpu_0_peak_gb": 14.6,
174
+ "gpu_1_peak_gb": 4.07,
175
+ "gpu_2_peak_gb": 4.07,
176
+ "gpu_3_peak_gb": 4.07
177
+ }
178
+ }
179
+ },
180
+ "fp16": {
181
+ "fp16_gpu_1": {
182
+ "n_gpus": 1,
183
+ "precision": "fp16",
184
+ "batch_results": {
185
+ "batch_4": {
186
+ "p50_ms": 122.14,
187
+ "fps": 32.7,
188
+ "per_sample_ms": 30.56
189
+ },
190
+ "batch_8": {
191
+ "p50_ms": 234.11,
192
+ "fps": 34.2,
193
+ "per_sample_ms": 29.26
194
+ },
195
+ "batch_16": {
196
+ "p50_ms": 486.22,
197
+ "fps": 32.9,
198
+ "per_sample_ms": 30.43
199
+ },
200
+ "batch_32": {
201
+ "p50_ms": 950.24,
202
+ "fps": 33.6,
203
+ "per_sample_ms": 29.73
204
+ }
205
+ }
206
+ },
207
+ "fp16_gpu_4": {
208
+ "n_gpus": 4,
209
+ "precision": "fp16",
210
+ "batch_results": {
211
+ "batch_4": {
212
+ "p50_ms": 901.49,
213
+ "fps": 4.4,
214
+ "per_sample_ms": 225.52
215
+ },
216
+ "batch_8": {
217
+ "p50_ms": 903.67,
218
+ "fps": 8.8,
219
+ "per_sample_ms": 113.02
220
+ },
221
+ "batch_16": {
222
+ "p50_ms": 911.97,
223
+ "fps": 17.5,
224
+ "per_sample_ms": 57.07
225
+ },
226
+ "batch_32": {
227
+ "p50_ms": 1013.11,
228
+ "fps": 31.6,
229
+ "per_sample_ms": 31.67
230
+ }
231
+ }
232
+ }
233
+ }
234
+ }