File size: 1,404 Bytes
38e2d68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
{
  "benchmarked_devices": [
    "cuda"
  ],
  "parameters": {
    "total_params": 4030606,
    "trainable_params": 3178798,
    "total_params_m": 4.030606,
    "trainable_params_m": 3.178798
  },
  "cuda": {
    "memory": {
      "baseline_memory_mb": 70.193359375,
      "peak_memory_mb": 80.720703125,
      "current_memory_mb": 70.76806640625,
      "inference_memory_mb": 10.52734375,
      "device": "cuda"
    },
    "inference_single": {
      "batch_size": 1,
      "mean_latency_ms": 5.136066239720094,
      "std_latency_ms": 0.008697872240662149,
      "median_latency_ms": 5.135121497005457,
      "p95_latency_ms": 5.151860694604693,
      "p99_latency_ms": 5.156857259207754,
      "min_latency_ms": 5.121371003042441,
      "max_latency_ms": 5.1598530044429936,
      "latency_per_image_ms": 5.136066239720094,
      "throughput_img_per_sec": 194.70153875089005,
      "device": "cuda"
    },
    "inference_batch32": {
      "batch_size": 32,
      "mean_latency_ms": 18.34433564043138,
      "std_latency_ms": 0.22169971671972608,
      "median_latency_ms": 18.217552998976316,
      "p95_latency_ms": 18.770105749717914,
      "p99_latency_ms": 18.77734032314038,
      "min_latency_ms": 18.15599100518739,
      "max_latency_ms": 18.78215800388716,
      "latency_per_image_ms": 0.5732604887634807,
      "throughput_img_per_sec": 1744.407681326501,
      "device": "cuda"
    }
  }
}