{ "prefill": { "memory": { "unit": "MB", "max_ram": 3525.599232, "max_vram": 11714.691072, "max_reserved": 11221.860352, "max_allocated": 9991.724544 }, "latency": { "unit": "s", "mean": 0.3010050461713006, "stdev": 0.005007823178304763, "values": [ 0.32892837524414065, 0.3053179016113281, 0.3002685546875, 0.2994657287597656, 0.3013898315429687, 0.29939508056640624, 0.29945855712890623, 0.3003228149414062, 0.29954458618164065, 0.29985791015625, 0.3006095275878906, 0.3026851806640625, 0.2997637023925781, 0.300037109375, 0.2996305847167969, 0.29964801025390625, 0.3008983154296875, 0.2997923889160156, 0.2989916076660156, 0.29947698974609377, 0.2995968017578125, 0.2994698181152344, 0.29934283447265625, 0.299536376953125, 0.29968487548828127, 0.29946783447265624, 0.30125567626953126, 0.30218035888671874, 0.2998056945800781, 0.29990194702148437, 0.2995118103027344, 0.29968487548828127, 0.29941860961914063, 0.299831298828125 ] }, "throughput": { "unit": "tokens/s", "value": 531.5525504809801 }, "energy": null, "efficiency": null }, "decode": { "memory": { "unit": "MB", "max_ram": 3536.162816, "max_vram": 12268.3392, "max_reserved": 11775.50848, "max_allocated": 10078.902272 }, "latency": { "unit": "s", "mean": 25.19557006835937, "stdev": 0, "values": [ 25.19557006835937 ] }, "throughput": { "unit": "tokens/s", "value": 39.292621572521725 }, "energy": null, "efficiency": null }, "per_token": { "memory": null, "latency": { "unit": "s", "mean": 0.25450070776120576, "stdev": 0.0028173351708351006, "values": [ 0.2622341003417969, 0.2604031982421875, 0.2598922119140625, 0.259346435546875, 0.26023934936523435, 0.259125244140625, 0.259641357421875, 0.2600816650390625, 0.2582261657714844, 0.2586593322753906, 0.2587289733886719, 0.2589450378417969, 0.2583848876953125, 0.2578052978515625, 0.25718887329101564, 0.2568939514160156, 0.25893478393554686, 0.2582753295898437, 0.25697381591796875, 0.2581626892089844, 0.256964599609375, 0.256321533203125, 0.25642291259765626, 0.25666763305664064, 0.25534873962402344, 0.25577267456054686, 0.25576243591308595, 0.25529037475585936, 0.25610955810546876, 0.2554173431396484, 0.2555606994628906, 0.25545318603515627, 0.2541045684814453, 0.2553528289794922, 0.2549780426025391, 0.2547394561767578, 0.2549903411865234, 0.2602659912109375, 0.2590689392089844, 0.25401242065429686, 0.2539376678466797, 0.254271484375, 0.2540543975830078, 0.25435136413574216, 0.2539878387451172, 0.25366937255859373, 0.25358233642578126, 0.25377484130859373, 0.2529126434326172, 0.2540585021972656, 0.25315840148925783, 0.2530897979736328, 0.2536099853515625, 0.2528061370849609, 0.25310617065429686, 0.25320346069335936, 0.2535393218994141, 0.2527958984375, 0.253264892578125, 0.25287986755371095, 0.2530160675048828, 0.2529689636230469, 0.25187532043457034, 0.25355877685546874, 0.25151078796386717, 0.2520494079589844, 0.25327001953125, 0.25154150390625, 0.2519019470214844, 0.25225625610351565, 0.25124659729003906, 0.25213848876953127, 0.25257369995117185, 0.2515240936279297, 0.2524241943359375, 0.2529085388183594, 0.2525675506591797, 0.25248051452636716, 0.2522879943847656, 0.2524682312011719, 0.2532095947265625, 0.25251429748535154, 0.2515650634765625, 0.25252351379394533, 0.25155378723144534, 0.2517022705078125, 0.25210067749023435, 0.25160902404785157, 0.25191014099121095, 0.25165721130371094, 0.25126296997070313, 0.25138584899902344, 0.250998779296875, 0.2516172790527344, 0.2514114532470703, 0.25093426513671874, 0.25146060180664065, 0.251789306640625, 0.25099366760253905 ] }, "throughput": { "unit": "tokens/s", "value": 39.292621572521725 }, "energy": null, "efficiency": null } }