{ "prefill": { "memory": { "unit": "MB", "max_ram": 3581.227008, "max_vram": 14426.308608, "max_reserved": 13933.477888, "max_allocated": 11182.528 }, "latency": { "unit": "s", "mean": 0.4189964497884115, "stdev": 0.004250981279472382, "values": [ 0.4385589294433594, 0.423773193359375, 0.41801422119140624, 0.41807257080078125, 0.4178309020996094, 0.4186828918457031, 0.41806951904296874, 0.4179261474609375, 0.41801318359375, 0.4178155517578125, 0.41779302978515626, 0.41764556884765625, 0.4177346496582031, 0.41765786743164063, 0.41786880493164064, 0.41775518798828126, 0.41766604614257813, 0.417807373046875, 0.4177121276855469, 0.41813299560546874, 0.41812069702148436, 0.41768341064453124, 0.41801422119140624, 0.4175657043457031 ] }, "throughput": { "unit": "tokens/s", "value": 1145.594432225845 }, "energy": null, "efficiency": null }, "decode": { "memory": { "unit": "MB", "max_ram": 3601.494016, "max_vram": 14321.451008, "max_reserved": 13828.620288, "max_allocated": 11336.600064 }, "latency": { "unit": "s", "mean": 29.4220535583496, "stdev": 0, "values": [ 29.4220535583496 ] }, "throughput": { "unit": "tokens/s", "value": 100.94468743012509 }, "energy": null, "efficiency": null }, "per_token": { "memory": null, "latency": { "unit": "s", "mean": 0.2971924601853495, "stdev": 0.006030435507012704, "values": [ 0.3147960205078125, 0.30984600830078124, 0.30846875, 0.30889166259765627, 0.3081308288574219, 0.30789120483398436, 0.30744577026367187, 0.3064289245605469, 0.3073105773925781, 0.3058995056152344, 0.30636135864257813, 0.3053465576171875, 0.30569369506835936, 0.3047475280761719, 0.30424884033203126, 0.30414337158203125, 0.3038443603515625, 0.3047669677734375, 0.3036334228515625, 0.3034777526855469, 0.30244659423828124, 0.30226739501953126, 0.3015546875, 0.30261453247070313, 0.3014553527832031, 0.3018014831542969, 0.30137139892578124, 0.3006924743652344, 0.3007979431152344, 0.30056549072265626, 0.30052557373046873, 0.299931640625, 0.2989137878417969, 0.2993786926269531, 0.2994145202636719, 0.2986608581542969, 0.29832498168945315, 0.2994288635253906, 0.2982072448730469, 0.29848779296875, 0.297818115234375, 0.2969241638183594, 0.29708697509765625, 0.2975580139160156, 0.29730816650390623, 0.29664154052734376, 0.2972283020019531, 0.296310791015625, 0.29754879760742187, 0.2962001953125, 0.29624114990234374, 0.296637451171875, 0.2953021545410156, 0.2963804016113281, 0.29485159301757813, 0.294877197265625, 0.2944604187011719, 0.2946324462890625, 0.2944860229492188, 0.2944901123046875, 0.2939412536621094, 0.29395968627929686, 0.29282098388671873, 0.29332992553710935, 0.2928732299804688, 0.291852294921875, 0.29285171508789065, 0.2923458557128906, 0.29213082885742186, 0.292485107421875, 0.2925527038574219, 0.29165057373046877, 0.29194955444335935, 0.292611083984375, 0.29196389770507813, 0.2922905578613281, 0.2916874389648437, 0.291746826171875, 0.2913392639160156, 0.2908856201171875, 0.29074432373046877, 0.2905907287597656, 0.2906265563964844, 0.29140069580078126, 0.289986572265625, 0.2901381225585937, 0.29077197265625, 0.29088052368164063, 0.2900203552246094, 0.2901637268066406, 0.29011456298828125, 0.2901842041015625, 0.29018316650390624, 0.290260986328125, 0.29000909423828125, 0.29029067993164065, 0.28940081787109373, 0.2889861145019531, 0.2888335266113281 ] }, "throughput": { "unit": "tokens/s", "value": 100.94468743012509 }, "energy": null, "efficiency": null } }