{ "prefill": { "memory": { "unit": "MB", "max_ram": 2261.991424, "max_vram": 6025.117696, "max_reserved": 5540.675584, "max_allocated": 5394.580992 }, "latency": { "unit": "s", "mean": 0.28051526896158857, "stdev": 0.0010279945260692365, "values": [ 0.2865542297363281, 0.2806671447753906, 0.2802841491699219, 0.2804653930664063, 0.2802114562988281, 0.2804633483886719, 0.28021658325195314, 0.2804223937988281, 0.2802063293457031, 0.2805329895019531, 0.28019915771484377, 0.28044082641601564, 0.2802318725585938, 0.28040499877929687, 0.2802206726074219, 0.280469482421875, 0.28022171020507813, 0.2803619689941406, 0.2803384399414062, 0.2805390625, 0.2801838073730469, 0.2803732604980469, 0.2802288513183594, 0.28042547607421875, 0.2802565002441406, 0.28036505126953126, 0.2802288513183594, 0.28041632080078127, 0.2801919860839844, 0.280416259765625, 0.2802309265136719, 0.28041012573242186, 0.2803465576171875, 0.28036294555664065, 0.2801705017089844, 0.28049005126953125 ] }, "throughput": { "unit": "tokens/s", "value": 2281.5157348444955 }, "energy": null, "efficiency": null }, "decode": { "memory": { "unit": "MB", "max_ram": 2261.991424, "max_vram": 7392.4608, "max_reserved": 6908.018688, "max_allocated": 6645.015552 }, "latency": { "unit": "s", "mean": 15.30309118652344, "stdev": 0, "values": [ 15.30309118652344 ] }, "throughput": { "unit": "tokens/s", "value": 258.7712476997684 }, "energy": null, "efficiency": null }, "per_token": { "memory": null, "latency": { "unit": "s", "mean": 0.15457667865175193, "stdev": 0.0035198829598372676, "values": [ 0.14853631591796876, 0.1485629425048828, 0.14871142578125, 0.14902067565917967, 0.14889369201660158, 0.149138427734375, 0.14918450927734375, 0.14940774536132811, 0.1493780517578125, 0.1495572509765625, 0.14969036865234375, 0.14998118591308593, 0.14973338317871093, 0.15008358764648438, 0.14999449157714845, 0.15047987365722656, 0.1502412872314453, 0.1506007080078125, 0.15072767639160156, 0.15112704467773438, 0.15085055541992187, 0.15127040100097655, 0.15110552978515626, 0.15161856079101563, 0.15133798217773436, 0.15164723205566405, 0.151625732421875, 0.15236813354492187, 0.15195341491699219, 0.15232614135742187, 0.15218482971191405, 0.15276133728027344, 0.15240089416503907, 0.15273983764648438, 0.15265689086914064, 0.15338700866699218, 0.15289138793945312, 0.15316275024414064, 0.15308082580566407, 0.15389286804199218, 0.153491455078125, 0.15368704223632812, 0.153523193359375, 0.15430963134765624, 0.15378125, 0.15412428283691407, 0.1539727325439453, 0.1546598358154297, 0.1542451171875, 0.15469056701660155, 0.15449600219726561, 0.15528857421875, 0.15475302124023438, 0.1551595458984375, 0.1550049285888672, 0.15575039672851562, 0.15514828491210939, 0.1556326446533203, 0.15556915283203124, 0.15636582946777344, 0.15577293395996095, 0.1561333770751953, 0.15592037963867186, 0.1568194580078125, 0.15625625610351562, 0.1566033935546875, 0.15639141845703125, 0.15731610107421876, 0.156579833984375, 0.1570672607421875, 0.15684402465820313, 0.1577379913330078, 0.15710514831542968, 0.15760794067382813, 0.15734579467773438, 0.15827763366699218, 0.15748812866210937, 0.15787930297851563, 0.1578219451904297, 0.15878656005859376, 0.15806874084472655, 0.15856640625, 0.158350341796875, 0.15942041015625, 0.15851519775390624, 0.1590292510986328, 0.15878656005859376, 0.15979519653320312, 0.15904768371582031, 0.15951052856445314, 0.15930572509765625, 0.16033587646484376, 0.15943577575683593, 0.16, 0.15970303344726564, 0.16082124328613281, 0.15998976135253906, 0.1604679718017578, 0.16022015380859375 ] }, "throughput": { "unit": "tokens/s", "value": 258.7712476997684 }, "energy": null, "efficiency": null } }