| { | |
| "prefill": { | |
| "memory": { | |
| "unit": "MB", | |
| "max_ram": 3581.227008, | |
| "max_vram": 14426.308608, | |
| "max_reserved": 13933.477888, | |
| "max_allocated": 11182.528 | |
| }, | |
| "latency": { | |
| "unit": "s", | |
| "mean": 0.4189964497884115, | |
| "stdev": 0.004250981279472382, | |
| "values": [ | |
| 0.4385589294433594, | |
| 0.423773193359375, | |
| 0.41801422119140624, | |
| 0.41807257080078125, | |
| 0.4178309020996094, | |
| 0.4186828918457031, | |
| 0.41806951904296874, | |
| 0.4179261474609375, | |
| 0.41801318359375, | |
| 0.4178155517578125, | |
| 0.41779302978515626, | |
| 0.41764556884765625, | |
| 0.4177346496582031, | |
| 0.41765786743164063, | |
| 0.41786880493164064, | |
| 0.41775518798828126, | |
| 0.41766604614257813, | |
| 0.417807373046875, | |
| 0.4177121276855469, | |
| 0.41813299560546874, | |
| 0.41812069702148436, | |
| 0.41768341064453124, | |
| 0.41801422119140624, | |
| 0.4175657043457031 | |
| ] | |
| }, | |
| "throughput": { | |
| "unit": "tokens/s", | |
| "value": 1145.594432225845 | |
| }, | |
| "energy": null, | |
| "efficiency": null | |
| }, | |
| "decode": { | |
| "memory": { | |
| "unit": "MB", | |
| "max_ram": 3601.494016, | |
| "max_vram": 14321.451008, | |
| "max_reserved": 13828.620288, | |
| "max_allocated": 11336.600064 | |
| }, | |
| "latency": { | |
| "unit": "s", | |
| "mean": 29.4220535583496, | |
| "stdev": 0, | |
| "values": [ | |
| 29.4220535583496 | |
| ] | |
| }, | |
| "throughput": { | |
| "unit": "tokens/s", | |
| "value": 100.94468743012509 | |
| }, | |
| "energy": null, | |
| "efficiency": null | |
| }, | |
| "per_token": { | |
| "memory": null, | |
| "latency": { | |
| "unit": "s", | |
| "mean": 0.2971924601853495, | |
| "stdev": 0.006030435507012704, | |
| "values": [ | |
| 0.3147960205078125, | |
| 0.30984600830078124, | |
| 0.30846875, | |
| 0.30889166259765627, | |
| 0.3081308288574219, | |
| 0.30789120483398436, | |
| 0.30744577026367187, | |
| 0.3064289245605469, | |
| 0.3073105773925781, | |
| 0.3058995056152344, | |
| 0.30636135864257813, | |
| 0.3053465576171875, | |
| 0.30569369506835936, | |
| 0.3047475280761719, | |
| 0.30424884033203126, | |
| 0.30414337158203125, | |
| 0.3038443603515625, | |
| 0.3047669677734375, | |
| 0.3036334228515625, | |
| 0.3034777526855469, | |
| 0.30244659423828124, | |
| 0.30226739501953126, | |
| 0.3015546875, | |
| 0.30261453247070313, | |
| 0.3014553527832031, | |
| 0.3018014831542969, | |
| 0.30137139892578124, | |
| 0.3006924743652344, | |
| 0.3007979431152344, | |
| 0.30056549072265626, | |
| 0.30052557373046873, | |
| 0.299931640625, | |
| 0.2989137878417969, | |
| 0.2993786926269531, | |
| 0.2994145202636719, | |
| 0.2986608581542969, | |
| 0.29832498168945315, | |
| 0.2994288635253906, | |
| 0.2982072448730469, | |
| 0.29848779296875, | |
| 0.297818115234375, | |
| 0.2969241638183594, | |
| 0.29708697509765625, | |
| 0.2975580139160156, | |
| 0.29730816650390623, | |
| 0.29664154052734376, | |
| 0.2972283020019531, | |
| 0.296310791015625, | |
| 0.29754879760742187, | |
| 0.2962001953125, | |
| 0.29624114990234374, | |
| 0.296637451171875, | |
| 0.2953021545410156, | |
| 0.2963804016113281, | |
| 0.29485159301757813, | |
| 0.294877197265625, | |
| 0.2944604187011719, | |
| 0.2946324462890625, | |
| 0.2944860229492188, | |
| 0.2944901123046875, | |
| 0.2939412536621094, | |
| 0.29395968627929686, | |
| 0.29282098388671873, | |
| 0.29332992553710935, | |
| 0.2928732299804688, | |
| 0.291852294921875, | |
| 0.29285171508789065, | |
| 0.2923458557128906, | |
| 0.29213082885742186, | |
| 0.292485107421875, | |
| 0.2925527038574219, | |
| 0.29165057373046877, | |
| 0.29194955444335935, | |
| 0.292611083984375, | |
| 0.29196389770507813, | |
| 0.2922905578613281, | |
| 0.2916874389648437, | |
| 0.291746826171875, | |
| 0.2913392639160156, | |
| 0.2908856201171875, | |
| 0.29074432373046877, | |
| 0.2905907287597656, | |
| 0.2906265563964844, | |
| 0.29140069580078126, | |
| 0.289986572265625, | |
| 0.2901381225585937, | |
| 0.29077197265625, | |
| 0.29088052368164063, | |
| 0.2900203552246094, | |
| 0.2901637268066406, | |
| 0.29011456298828125, | |
| 0.2901842041015625, | |
| 0.29018316650390624, | |
| 0.290260986328125, | |
| 0.29000909423828125, | |
| 0.29029067993164065, | |
| 0.28940081787109373, | |
| 0.2889861145019531, | |
| 0.2888335266113281 | |
| ] | |
| }, | |
| "throughput": { | |
| "unit": "tokens/s", | |
| "value": 100.94468743012509 | |
| }, | |
| "energy": null, | |
| "efficiency": null | |
| } | |
| } |