{ "prefill": { "memory": { "unit": "MB", "max_ram": 3470.56128, "max_vram": 12769.558528, "max_reserved": 12276.727808, "max_allocated": 10493.853184 }, "latency": { "unit": "s", "mean": 0.3322811594317036, "stdev": 0.004201918534069472, "values": [ 0.3548702697753906, 0.3352504272460938, 0.3326003112792969, 0.33182821655273437, 0.3319469299316406, 0.33254296875, 0.3314892883300781, 0.33137869262695313, 0.33092608642578125, 0.3313489990234375, 0.3318661193847656, 0.3311390686035156, 0.33138790893554687, 0.3314851989746094, 0.33103564453125, 0.3311247253417969, 0.3315466003417969, 0.33106533813476563, 0.3314124755859375, 0.331188232421875, 0.33160498046875, 0.3317739562988281, 0.3311513671875, 0.33090765380859377, 0.33081649780273437, 0.33114727783203124, 0.33095269775390623, 0.3309649963378906, 0.3314667663574219, 0.33160498046875, 0.33089126586914064 ] }, "throughput": { "unit": "tokens/s", "value": 722.2798921565973 }, "energy": null, "efficiency": null }, "decode": { "memory": { "unit": "MB", "max_ram": 3471.822848, "max_vram": 13524.533248, "max_reserved": 13031.702528, "max_allocated": 10594.238464 }, "latency": { "unit": "s", "mean": 27.10930010986329, "stdev": 0, "values": [ 27.10930010986329 ] }, "throughput": { "unit": "tokens/s", "value": 54.77824930860927 }, "energy": null, "efficiency": null }, "per_token": { "memory": null, "latency": { "unit": "s", "mean": 0.27383131424104334, "stdev": 0.003865402878377042, "values": [ 0.28423577880859374, 0.2813306884765625, 0.28021041870117186, 0.2805688171386719, 0.2805350341796875, 0.280511474609375, 0.28056781005859377, 0.28048178100585935, 0.2796636047363281, 0.279478271484375, 0.2780057678222656, 0.2784327697753906, 0.27773236083984376, 0.2771885986328125, 0.2785331115722656, 0.27619430541992185, 0.27745895385742186, 0.27661618041992186, 0.2762188720703125, 0.27671243286132813, 0.27565362548828126, 0.2763612060546875, 0.27662130737304685, 0.2756331481933594, 0.27525225830078126, 0.2755604248046875, 0.27471261596679686, 0.2745743103027344, 0.27438796997070314, 0.27482009887695313, 0.2740787048339844, 0.2747821960449219, 0.2742958068847656, 0.27405517578125, 0.27351553344726565, 0.27380224609375, 0.2732697448730469, 0.273517578125, 0.27277618408203125, 0.2734888916015625, 0.27336294555664065, 0.27980593872070314, 0.27267788696289064, 0.27327078247070313, 0.2725263366699219, 0.2733834228515625, 0.2763018493652344, 0.28083709716796873, 0.2731366271972656, 0.28008447265625, 0.27196417236328124, 0.27271783447265624, 0.27304241943359375, 0.27160986328125, 0.2719815673828125, 0.2721392517089844, 0.2742210693359375, 0.27142962646484375, 0.2707630004882812, 0.27627825927734373, 0.2755911560058594, 0.27026943969726563, 0.27018649291992186, 0.2707906494140625, 0.2697359313964844, 0.26930584716796874, 0.27066574096679685, 0.26956494140625, 0.2701271057128906, 0.2692567138671875, 0.2692587585449219, 0.2694625244140625, 0.2702479248046875, 0.27053466796875, 0.27034725952148436, 0.2764933166503906, 0.27599053955078123, 0.2792693786621094, 0.2697052307128906, 0.2699970703125, 0.2686484375, 0.2691286926269531, 0.2699591674804688, 0.2695352783203125, 0.2692228698730469, 0.2694993896484375, 0.26948300170898437, 0.26922906494140625, 0.26847027587890626, 0.27224267578125, 0.2729758605957031, 0.27673394775390625, 0.2686033935546875, 0.26899252319335937, 0.269348876953125, 0.2679552001953125, 0.2716129150390625, 0.2688880615234375, 0.26859930419921874 ] }, "throughput": { "unit": "tokens/s", "value": 54.77824930860927 }, "energy": null, "efficiency": null } }