File size: 5,982 Bytes
e234a15 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
{
"prefill": {
"memory": {
"unit": "MB",
"max_ram": 3581.227008,
"max_vram": 14426.308608,
"max_reserved": 13933.477888,
"max_allocated": 11182.528
},
"latency": {
"unit": "s",
"mean": 0.4189964497884115,
"stdev": 0.004250981279472382,
"values": [
0.4385589294433594,
0.423773193359375,
0.41801422119140624,
0.41807257080078125,
0.4178309020996094,
0.4186828918457031,
0.41806951904296874,
0.4179261474609375,
0.41801318359375,
0.4178155517578125,
0.41779302978515626,
0.41764556884765625,
0.4177346496582031,
0.41765786743164063,
0.41786880493164064,
0.41775518798828126,
0.41766604614257813,
0.417807373046875,
0.4177121276855469,
0.41813299560546874,
0.41812069702148436,
0.41768341064453124,
0.41801422119140624,
0.4175657043457031
]
},
"throughput": {
"unit": "tokens/s",
"value": 1145.594432225845
},
"energy": null,
"efficiency": null
},
"decode": {
"memory": {
"unit": "MB",
"max_ram": 3601.494016,
"max_vram": 14321.451008,
"max_reserved": 13828.620288,
"max_allocated": 11336.600064
},
"latency": {
"unit": "s",
"mean": 29.4220535583496,
"stdev": 0,
"values": [
29.4220535583496
]
},
"throughput": {
"unit": "tokens/s",
"value": 100.94468743012509
},
"energy": null,
"efficiency": null
},
"per_token": {
"memory": null,
"latency": {
"unit": "s",
"mean": 0.2971924601853495,
"stdev": 0.006030435507012704,
"values": [
0.3147960205078125,
0.30984600830078124,
0.30846875,
0.30889166259765627,
0.3081308288574219,
0.30789120483398436,
0.30744577026367187,
0.3064289245605469,
0.3073105773925781,
0.3058995056152344,
0.30636135864257813,
0.3053465576171875,
0.30569369506835936,
0.3047475280761719,
0.30424884033203126,
0.30414337158203125,
0.3038443603515625,
0.3047669677734375,
0.3036334228515625,
0.3034777526855469,
0.30244659423828124,
0.30226739501953126,
0.3015546875,
0.30261453247070313,
0.3014553527832031,
0.3018014831542969,
0.30137139892578124,
0.3006924743652344,
0.3007979431152344,
0.30056549072265626,
0.30052557373046873,
0.299931640625,
0.2989137878417969,
0.2993786926269531,
0.2994145202636719,
0.2986608581542969,
0.29832498168945315,
0.2994288635253906,
0.2982072448730469,
0.29848779296875,
0.297818115234375,
0.2969241638183594,
0.29708697509765625,
0.2975580139160156,
0.29730816650390623,
0.29664154052734376,
0.2972283020019531,
0.296310791015625,
0.29754879760742187,
0.2962001953125,
0.29624114990234374,
0.296637451171875,
0.2953021545410156,
0.2963804016113281,
0.29485159301757813,
0.294877197265625,
0.2944604187011719,
0.2946324462890625,
0.2944860229492188,
0.2944901123046875,
0.2939412536621094,
0.29395968627929686,
0.29282098388671873,
0.29332992553710935,
0.2928732299804688,
0.291852294921875,
0.29285171508789065,
0.2923458557128906,
0.29213082885742186,
0.292485107421875,
0.2925527038574219,
0.29165057373046877,
0.29194955444335935,
0.292611083984375,
0.29196389770507813,
0.2922905578613281,
0.2916874389648437,
0.291746826171875,
0.2913392639160156,
0.2908856201171875,
0.29074432373046877,
0.2905907287597656,
0.2906265563964844,
0.29140069580078126,
0.289986572265625,
0.2901381225585937,
0.29077197265625,
0.29088052368164063,
0.2900203552246094,
0.2901637268066406,
0.29011456298828125,
0.2901842041015625,
0.29018316650390624,
0.290260986328125,
0.29000909423828125,
0.29029067993164065,
0.28940081787109373,
0.2889861145019531,
0.2888335266113281
]
},
"throughput": {
"unit": "tokens/s",
"value": 100.94468743012509
},
"energy": null,
"efficiency": null
}
} |