{ "prefill": { "memory": { "unit": "MB", "max_ram": 3610.86976, "max_vram": 13706.985472, "max_reserved": 13214.154752, "max_allocated": 11027.453952 }, "latency": { "unit": "s", "mean": 0.4016479968261719, "stdev": 0.00450307941805889, "values": [ 0.4229984130859375, 0.4061747131347656, 0.4007014465332031, 0.4005621643066406, 0.4003113098144531, 0.4004822998046875, 0.4004290466308594, 0.4008570861816406, 0.40047308349609373, 0.40001739501953126, 0.4006717529296875, 0.4008058776855469, 0.40065023803710936, 0.4005826416015625, 0.40037786865234376, 0.40050381469726565, 0.40064306640625, 0.40063385009765623, 0.40045669555664065, 0.39976141357421874, 0.40033383178710935, 0.4008027648925781, 0.40047308349609373, 0.4009308166503906, 0.40056524658203124 ] }, "throughput": { "unit": "tokens/s", "value": 995.8969126220113 }, "energy": null, "efficiency": null }, "decode": { "memory": { "unit": "MB", "max_ram": 3630.583808, "max_vram": 13736.3456, "max_reserved": 13243.51488, "max_allocated": 11166.721536 }, "latency": { "unit": "s", "mean": 28.560530487060543, "stdev": 0, "values": [ 28.560530487060543 ] }, "throughput": { "unit": "tokens/s", "value": 86.65805423751874 }, "energy": null, "efficiency": null }, "per_token": { "memory": null, "latency": { "unit": "s", "mean": 0.2884902069400055, "stdev": 0.005574553256331573, "values": [ 0.30573773193359377, 0.3014727783203125, 0.29969100952148436, 0.29973300170898437, 0.29858203125, 0.298608642578125, 0.29833624267578124, 0.2978570251464844, 0.29740133666992186, 0.29682278442382815, 0.29672549438476564, 0.2960639953613281, 0.2946611328125, 0.2954721374511719, 0.2954915771484375, 0.2945587158203125, 0.29500518798828124, 0.2944102478027344, 0.29504205322265625, 0.29280154418945314, 0.2928322448730469, 0.29225778198242186, 0.2919669799804688, 0.29329202270507815, 0.2921922607421875, 0.29252301025390626, 0.2918021240234375, 0.29138430786132813, 0.29127166748046873, 0.2904176635742188, 0.2911313781738281, 0.2917529602050781, 0.2908026733398438, 0.29039410400390625, 0.2901688232421875, 0.28997222900390623, 0.28976434326171874, 0.2902865905761719, 0.28897689819335937, 0.28997222900390623, 0.2897592468261719, 0.28981964111328123, 0.28855502319335935, 0.2880440368652344, 0.2889666442871094, 0.28792318725585936, 0.28774911499023437, 0.2884403076171875, 0.2875832214355469, 0.28695040893554685, 0.2873180236816406, 0.2875217895507812, 0.2857482299804687, 0.28675787353515625, 0.2872637329101563, 0.28659710693359375, 0.2864322509765625, 0.28636672973632815, 0.28585574340820313, 0.28576461791992186, 0.2865428466796875, 0.2852802429199219, 0.2851758117675781, 0.284943359375, 0.28499661254882813, 0.28454400634765625, 0.28392242431640624, 0.2840729675292969, 0.28438323974609375, 0.28357427978515626, 0.28338687133789064, 0.28420913696289063, 0.28362240600585936, 0.2831769714355469, 0.2835486755371094, 0.28436376953125, 0.28338381958007813, 0.2831790161132812, 0.28350567626953127, 0.28333056640625, 0.28241510009765625, 0.2824325256347656, 0.2824560546875, 0.2829588623046875, 0.2829619140625, 0.28229632568359375, 0.2825932922363281, 0.28196762084960936, 0.2830182495117187, 0.28210791015625, 0.2823055419921875, 0.28156927490234374, 0.2821396484375, 0.2817843322753906, 0.28104806518554687, 0.28229119873046876, 0.2816358337402344, 0.281427978515625, 0.2809241638183594 ] }, "throughput": { "unit": "tokens/s", "value": 86.65805423751874 }, "energy": null, "efficiency": null } }