| { | |
| "prefill": { | |
| "memory": { | |
| "unit": "MB", | |
| "max_ram": 3610.86976, | |
| "max_vram": 13706.985472, | |
| "max_reserved": 13214.154752, | |
| "max_allocated": 11027.453952 | |
| }, | |
| "latency": { | |
| "unit": "s", | |
| "mean": 0.4016479968261719, | |
| "stdev": 0.00450307941805889, | |
| "values": [ | |
| 0.4229984130859375, | |
| 0.4061747131347656, | |
| 0.4007014465332031, | |
| 0.4005621643066406, | |
| 0.4003113098144531, | |
| 0.4004822998046875, | |
| 0.4004290466308594, | |
| 0.4008570861816406, | |
| 0.40047308349609373, | |
| 0.40001739501953126, | |
| 0.4006717529296875, | |
| 0.4008058776855469, | |
| 0.40065023803710936, | |
| 0.4005826416015625, | |
| 0.40037786865234376, | |
| 0.40050381469726565, | |
| 0.40064306640625, | |
| 0.40063385009765623, | |
| 0.40045669555664065, | |
| 0.39976141357421874, | |
| 0.40033383178710935, | |
| 0.4008027648925781, | |
| 0.40047308349609373, | |
| 0.4009308166503906, | |
| 0.40056524658203124 | |
| ] | |
| }, | |
| "throughput": { | |
| "unit": "tokens/s", | |
| "value": 995.8969126220113 | |
| }, | |
| "energy": null, | |
| "efficiency": null | |
| }, | |
| "decode": { | |
| "memory": { | |
| "unit": "MB", | |
| "max_ram": 3630.583808, | |
| "max_vram": 13736.3456, | |
| "max_reserved": 13243.51488, | |
| "max_allocated": 11166.721536 | |
| }, | |
| "latency": { | |
| "unit": "s", | |
| "mean": 28.560530487060543, | |
| "stdev": 0, | |
| "values": [ | |
| 28.560530487060543 | |
| ] | |
| }, | |
| "throughput": { | |
| "unit": "tokens/s", | |
| "value": 86.65805423751874 | |
| }, | |
| "energy": null, | |
| "efficiency": null | |
| }, | |
| "per_token": { | |
| "memory": null, | |
| "latency": { | |
| "unit": "s", | |
| "mean": 0.2884902069400055, | |
| "stdev": 0.005574553256331573, | |
| "values": [ | |
| 0.30573773193359377, | |
| 0.3014727783203125, | |
| 0.29969100952148436, | |
| 0.29973300170898437, | |
| 0.29858203125, | |
| 0.298608642578125, | |
| 0.29833624267578124, | |
| 0.2978570251464844, | |
| 0.29740133666992186, | |
| 0.29682278442382815, | |
| 0.29672549438476564, | |
| 0.2960639953613281, | |
| 0.2946611328125, | |
| 0.2954721374511719, | |
| 0.2954915771484375, | |
| 0.2945587158203125, | |
| 0.29500518798828124, | |
| 0.2944102478027344, | |
| 0.29504205322265625, | |
| 0.29280154418945314, | |
| 0.2928322448730469, | |
| 0.29225778198242186, | |
| 0.2919669799804688, | |
| 0.29329202270507815, | |
| 0.2921922607421875, | |
| 0.29252301025390626, | |
| 0.2918021240234375, | |
| 0.29138430786132813, | |
| 0.29127166748046873, | |
| 0.2904176635742188, | |
| 0.2911313781738281, | |
| 0.2917529602050781, | |
| 0.2908026733398438, | |
| 0.29039410400390625, | |
| 0.2901688232421875, | |
| 0.28997222900390623, | |
| 0.28976434326171874, | |
| 0.2902865905761719, | |
| 0.28897689819335937, | |
| 0.28997222900390623, | |
| 0.2897592468261719, | |
| 0.28981964111328123, | |
| 0.28855502319335935, | |
| 0.2880440368652344, | |
| 0.2889666442871094, | |
| 0.28792318725585936, | |
| 0.28774911499023437, | |
| 0.2884403076171875, | |
| 0.2875832214355469, | |
| 0.28695040893554685, | |
| 0.2873180236816406, | |
| 0.2875217895507812, | |
| 0.2857482299804687, | |
| 0.28675787353515625, | |
| 0.2872637329101563, | |
| 0.28659710693359375, | |
| 0.2864322509765625, | |
| 0.28636672973632815, | |
| 0.28585574340820313, | |
| 0.28576461791992186, | |
| 0.2865428466796875, | |
| 0.2852802429199219, | |
| 0.2851758117675781, | |
| 0.284943359375, | |
| 0.28499661254882813, | |
| 0.28454400634765625, | |
| 0.28392242431640624, | |
| 0.2840729675292969, | |
| 0.28438323974609375, | |
| 0.28357427978515626, | |
| 0.28338687133789064, | |
| 0.28420913696289063, | |
| 0.28362240600585936, | |
| 0.2831769714355469, | |
| 0.2835486755371094, | |
| 0.28436376953125, | |
| 0.28338381958007813, | |
| 0.2831790161132812, | |
| 0.28350567626953127, | |
| 0.28333056640625, | |
| 0.28241510009765625, | |
| 0.2824325256347656, | |
| 0.2824560546875, | |
| 0.2829588623046875, | |
| 0.2829619140625, | |
| 0.28229632568359375, | |
| 0.2825932922363281, | |
| 0.28196762084960936, | |
| 0.2830182495117187, | |
| 0.28210791015625, | |
| 0.2823055419921875, | |
| 0.28156927490234374, | |
| 0.2821396484375, | |
| 0.2817843322753906, | |
| 0.28104806518554687, | |
| 0.28229119873046876, | |
| 0.2816358337402344, | |
| 0.281427978515625, | |
| 0.2809241638183594 | |
| ] | |
| }, | |
| "throughput": { | |
| "unit": "tokens/s", | |
| "value": 86.65805423751874 | |
| }, | |
| "energy": null, | |
| "efficiency": null | |
| } | |
| } |