benchmarks_8bit_batch_size25 / benchmark_report.json
AwAppp's picture
Upload TextGenerationReport
1d8dc5f verified
{
"prefill": {
"memory": {
"unit": "MB",
"max_ram": 3610.86976,
"max_vram": 13706.985472,
"max_reserved": 13214.154752,
"max_allocated": 11027.453952
},
"latency": {
"unit": "s",
"mean": 0.4016479968261719,
"stdev": 0.00450307941805889,
"values": [
0.4229984130859375,
0.4061747131347656,
0.4007014465332031,
0.4005621643066406,
0.4003113098144531,
0.4004822998046875,
0.4004290466308594,
0.4008570861816406,
0.40047308349609373,
0.40001739501953126,
0.4006717529296875,
0.4008058776855469,
0.40065023803710936,
0.4005826416015625,
0.40037786865234376,
0.40050381469726565,
0.40064306640625,
0.40063385009765623,
0.40045669555664065,
0.39976141357421874,
0.40033383178710935,
0.4008027648925781,
0.40047308349609373,
0.4009308166503906,
0.40056524658203124
]
},
"throughput": {
"unit": "tokens/s",
"value": 995.8969126220113
},
"energy": null,
"efficiency": null
},
"decode": {
"memory": {
"unit": "MB",
"max_ram": 3630.583808,
"max_vram": 13736.3456,
"max_reserved": 13243.51488,
"max_allocated": 11166.721536
},
"latency": {
"unit": "s",
"mean": 28.560530487060543,
"stdev": 0,
"values": [
28.560530487060543
]
},
"throughput": {
"unit": "tokens/s",
"value": 86.65805423751874
},
"energy": null,
"efficiency": null
},
"per_token": {
"memory": null,
"latency": {
"unit": "s",
"mean": 0.2884902069400055,
"stdev": 0.005574553256331573,
"values": [
0.30573773193359377,
0.3014727783203125,
0.29969100952148436,
0.29973300170898437,
0.29858203125,
0.298608642578125,
0.29833624267578124,
0.2978570251464844,
0.29740133666992186,
0.29682278442382815,
0.29672549438476564,
0.2960639953613281,
0.2946611328125,
0.2954721374511719,
0.2954915771484375,
0.2945587158203125,
0.29500518798828124,
0.2944102478027344,
0.29504205322265625,
0.29280154418945314,
0.2928322448730469,
0.29225778198242186,
0.2919669799804688,
0.29329202270507815,
0.2921922607421875,
0.29252301025390626,
0.2918021240234375,
0.29138430786132813,
0.29127166748046873,
0.2904176635742188,
0.2911313781738281,
0.2917529602050781,
0.2908026733398438,
0.29039410400390625,
0.2901688232421875,
0.28997222900390623,
0.28976434326171874,
0.2902865905761719,
0.28897689819335937,
0.28997222900390623,
0.2897592468261719,
0.28981964111328123,
0.28855502319335935,
0.2880440368652344,
0.2889666442871094,
0.28792318725585936,
0.28774911499023437,
0.2884403076171875,
0.2875832214355469,
0.28695040893554685,
0.2873180236816406,
0.2875217895507812,
0.2857482299804687,
0.28675787353515625,
0.2872637329101563,
0.28659710693359375,
0.2864322509765625,
0.28636672973632815,
0.28585574340820313,
0.28576461791992186,
0.2865428466796875,
0.2852802429199219,
0.2851758117675781,
0.284943359375,
0.28499661254882813,
0.28454400634765625,
0.28392242431640624,
0.2840729675292969,
0.28438323974609375,
0.28357427978515626,
0.28338687133789064,
0.28420913696289063,
0.28362240600585936,
0.2831769714355469,
0.2835486755371094,
0.28436376953125,
0.28338381958007813,
0.2831790161132812,
0.28350567626953127,
0.28333056640625,
0.28241510009765625,
0.2824325256347656,
0.2824560546875,
0.2829588623046875,
0.2829619140625,
0.28229632568359375,
0.2825932922363281,
0.28196762084960936,
0.2830182495117187,
0.28210791015625,
0.2823055419921875,
0.28156927490234374,
0.2821396484375,
0.2817843322753906,
0.28104806518554687,
0.28229119873046876,
0.2816358337402344,
0.281427978515625,
0.2809241638183594
]
},
"throughput": {
"unit": "tokens/s",
"value": 86.65805423751874
},
"energy": null,
"efficiency": null
}
}