{ "prefill": { "memory": { "unit": "MB", "max_ram": 3576.500224, "max_vram": 11024.728064, "max_reserved": 10531.897344, "max_allocated": 9458.51392 }, "latency": { "unit": "s", "mean": 0.26589927994577517, "stdev": 0.004717404774457072, "values": [ 0.2945403137207031, 0.2661632080078125, 0.2650818176269531, 0.26499276733398436, 0.2650798034667969, 0.2648647766113281, 0.2649354248046875, 0.26499072265625, 0.2650746765136719, 0.2650808410644531, 0.2649825134277344, 0.26509210205078126, 0.2649774169921875, 0.2652323913574219, 0.2649415588378906, 0.2650306701660156, 0.26549758911132815, 0.26528564453125, 0.26555596923828123, 0.2653173828125, 0.26431591796875, 0.2648350830078125, 0.26553549194335935, 0.26530612182617186, 0.26522317504882814, 0.2651463623046875, 0.26528460693359374, 0.26509005737304686, 0.2648668212890625, 0.26478387451171875, 0.2652835693359375, 0.26543923950195314, 0.2648074340820312, 0.2652200927734375, 0.2651146240234375, 0.26522113037109374, 0.264890380859375, 0.265091064453125 ] }, "throughput": { "unit": "tokens/s", "value": 300.8658015783811 }, "energy": null, "efficiency": null }, "decode": { "memory": { "unit": "MB", "max_ram": 3577.966592, "max_vram": 11127.488512, "max_reserved": 10634.657792, "max_allocated": 9518.949888 }, "latency": { "unit": "s", "mean": 23.714270278930663, "stdev": 0, "values": [ 23.714270278930663 ] }, "throughput": { "unit": "tokens/s", "value": 20.873507562228934 }, "energy": null, "efficiency": null }, "per_token": { "memory": null, "latency": { "unit": "s", "mean": 0.23953808362556225, "stdev": 0.0011223838576208927, "values": [ 0.24126976013183593, 0.24184934997558594, 0.241944580078125, 0.24157696533203124, 0.24142233276367187, 0.24156364440917968, 0.24111923217773437, 0.24104243469238282, 0.24155647277832032, 0.2407731170654297, 0.24124211120605468, 0.24097996520996093, 0.24022732543945313, 0.24134962463378906, 0.24109158325195312, 0.24052633666992188, 0.24072601318359374, 0.24031642150878907, 0.2408704071044922, 0.24035328674316406, 0.24031436157226563, 0.2403256378173828, 0.2404638671875, 0.2411847686767578, 0.23977984619140624, 0.24041165161132813, 0.23975117492675782, 0.24032357788085937, 0.24015565490722657, 0.24032666015625, 0.23952691650390626, 0.23980543518066405, 0.2395371551513672, 0.24026419067382812, 0.24166502380371094, 0.2394122314453125, 0.24026419067382812, 0.2393456573486328, 0.23959449768066407, 0.23952793884277343, 0.23927090454101563, 0.23895552062988282, 0.23910604858398438, 0.23962112426757812, 0.2393866271972656, 0.23915008544921876, 0.23919512939453125, 0.2399006652832031, 0.23890739440917969, 0.23990170288085938, 0.23840255737304689, 0.23848141479492188, 0.2383585205078125, 0.23940914916992187, 0.23918080139160156, 0.23795199584960938, 0.23889100646972655, 0.23912037658691407, 0.23861759948730468, 0.23844248962402342, 0.23861759948730468, 0.2423388214111328, 0.23819981384277344, 0.23846400451660157, 0.23878758239746095, 0.2382704620361328, 0.23828684997558594, 0.23864218139648438, 0.23916134643554687, 0.23734579467773437, 0.23761715698242186, 0.23818240356445314, 0.2382940216064453, 0.238097412109375, 0.23909068298339844, 0.2390118408203125, 0.23936819458007813, 0.23946751403808594, 0.23883673095703126, 0.23915110778808593, 0.23877529907226563, 0.23924327087402344, 0.23862168884277343, 0.23861351013183593, 0.2389811248779297, 0.2389422149658203, 0.2384701385498047, 0.2389063720703125, 0.2385039367675781, 0.23919821166992186, 0.2386544647216797, 0.23751475524902343, 0.23883775329589843, 0.2382458953857422, 0.23910195922851563, 0.2384701385498047, 0.2383462371826172, 0.2390118408203125, 0.23856742858886718 ] }, "throughput": { "unit": "tokens/s", "value": 20.873507562228934 }, "energy": null, "efficiency": null } }