File size: 5,982 Bytes
e234a15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
{
    "prefill": {
        "memory": {
            "unit": "MB",
            "max_ram": 3581.227008,
            "max_vram": 14426.308608,
            "max_reserved": 13933.477888,
            "max_allocated": 11182.528
        },
        "latency": {
            "unit": "s",
            "mean": 0.4189964497884115,
            "stdev": 0.004250981279472382,
            "values": [
                0.4385589294433594,
                0.423773193359375,
                0.41801422119140624,
                0.41807257080078125,
                0.4178309020996094,
                0.4186828918457031,
                0.41806951904296874,
                0.4179261474609375,
                0.41801318359375,
                0.4178155517578125,
                0.41779302978515626,
                0.41764556884765625,
                0.4177346496582031,
                0.41765786743164063,
                0.41786880493164064,
                0.41775518798828126,
                0.41766604614257813,
                0.417807373046875,
                0.4177121276855469,
                0.41813299560546874,
                0.41812069702148436,
                0.41768341064453124,
                0.41801422119140624,
                0.4175657043457031
            ]
        },
        "throughput": {
            "unit": "tokens/s",
            "value": 1145.594432225845
        },
        "energy": null,
        "efficiency": null
    },
    "decode": {
        "memory": {
            "unit": "MB",
            "max_ram": 3601.494016,
            "max_vram": 14321.451008,
            "max_reserved": 13828.620288,
            "max_allocated": 11336.600064
        },
        "latency": {
            "unit": "s",
            "mean": 29.4220535583496,
            "stdev": 0,
            "values": [
                29.4220535583496
            ]
        },
        "throughput": {
            "unit": "tokens/s",
            "value": 100.94468743012509
        },
        "energy": null,
        "efficiency": null
    },
    "per_token": {
        "memory": null,
        "latency": {
            "unit": "s",
            "mean": 0.2971924601853495,
            "stdev": 0.006030435507012704,
            "values": [
                0.3147960205078125,
                0.30984600830078124,
                0.30846875,
                0.30889166259765627,
                0.3081308288574219,
                0.30789120483398436,
                0.30744577026367187,
                0.3064289245605469,
                0.3073105773925781,
                0.3058995056152344,
                0.30636135864257813,
                0.3053465576171875,
                0.30569369506835936,
                0.3047475280761719,
                0.30424884033203126,
                0.30414337158203125,
                0.3038443603515625,
                0.3047669677734375,
                0.3036334228515625,
                0.3034777526855469,
                0.30244659423828124,
                0.30226739501953126,
                0.3015546875,
                0.30261453247070313,
                0.3014553527832031,
                0.3018014831542969,
                0.30137139892578124,
                0.3006924743652344,
                0.3007979431152344,
                0.30056549072265626,
                0.30052557373046873,
                0.299931640625,
                0.2989137878417969,
                0.2993786926269531,
                0.2994145202636719,
                0.2986608581542969,
                0.29832498168945315,
                0.2994288635253906,
                0.2982072448730469,
                0.29848779296875,
                0.297818115234375,
                0.2969241638183594,
                0.29708697509765625,
                0.2975580139160156,
                0.29730816650390623,
                0.29664154052734376,
                0.2972283020019531,
                0.296310791015625,
                0.29754879760742187,
                0.2962001953125,
                0.29624114990234374,
                0.296637451171875,
                0.2953021545410156,
                0.2963804016113281,
                0.29485159301757813,
                0.294877197265625,
                0.2944604187011719,
                0.2946324462890625,
                0.2944860229492188,
                0.2944901123046875,
                0.2939412536621094,
                0.29395968627929686,
                0.29282098388671873,
                0.29332992553710935,
                0.2928732299804688,
                0.291852294921875,
                0.29285171508789065,
                0.2923458557128906,
                0.29213082885742186,
                0.292485107421875,
                0.2925527038574219,
                0.29165057373046877,
                0.29194955444335935,
                0.292611083984375,
                0.29196389770507813,
                0.2922905578613281,
                0.2916874389648437,
                0.291746826171875,
                0.2913392639160156,
                0.2908856201171875,
                0.29074432373046877,
                0.2905907287597656,
                0.2906265563964844,
                0.29140069580078126,
                0.289986572265625,
                0.2901381225585937,
                0.29077197265625,
                0.29088052368164063,
                0.2900203552246094,
                0.2901637268066406,
                0.29011456298828125,
                0.2901842041015625,
                0.29018316650390624,
                0.290260986328125,
                0.29000909423828125,
                0.29029067993164065,
                0.28940081787109373,
                0.2889861145019531,
                0.2888335266113281
            ]
        },
        "throughput": {
            "unit": "tokens/s",
            "value": 100.94468743012509
        },
        "energy": null,
        "efficiency": null
    }
}