benchmarks_8bit_batch_size5 / benchmark_report.json
AwAppp's picture
Upload TextGenerationReport
45696ce verified
{
"prefill": {
"memory": {
"unit": "MB",
"max_ram": 3576.500224,
"max_vram": 11024.728064,
"max_reserved": 10531.897344,
"max_allocated": 9458.51392
},
"latency": {
"unit": "s",
"mean": 0.26589927994577517,
"stdev": 0.004717404774457072,
"values": [
0.2945403137207031,
0.2661632080078125,
0.2650818176269531,
0.26499276733398436,
0.2650798034667969,
0.2648647766113281,
0.2649354248046875,
0.26499072265625,
0.2650746765136719,
0.2650808410644531,
0.2649825134277344,
0.26509210205078126,
0.2649774169921875,
0.2652323913574219,
0.2649415588378906,
0.2650306701660156,
0.26549758911132815,
0.26528564453125,
0.26555596923828123,
0.2653173828125,
0.26431591796875,
0.2648350830078125,
0.26553549194335935,
0.26530612182617186,
0.26522317504882814,
0.2651463623046875,
0.26528460693359374,
0.26509005737304686,
0.2648668212890625,
0.26478387451171875,
0.2652835693359375,
0.26543923950195314,
0.2648074340820312,
0.2652200927734375,
0.2651146240234375,
0.26522113037109374,
0.264890380859375,
0.265091064453125
]
},
"throughput": {
"unit": "tokens/s",
"value": 300.8658015783811
},
"energy": null,
"efficiency": null
},
"decode": {
"memory": {
"unit": "MB",
"max_ram": 3577.966592,
"max_vram": 11127.488512,
"max_reserved": 10634.657792,
"max_allocated": 9518.949888
},
"latency": {
"unit": "s",
"mean": 23.714270278930663,
"stdev": 0,
"values": [
23.714270278930663
]
},
"throughput": {
"unit": "tokens/s",
"value": 20.873507562228934
},
"energy": null,
"efficiency": null
},
"per_token": {
"memory": null,
"latency": {
"unit": "s",
"mean": 0.23953808362556225,
"stdev": 0.0011223838576208927,
"values": [
0.24126976013183593,
0.24184934997558594,
0.241944580078125,
0.24157696533203124,
0.24142233276367187,
0.24156364440917968,
0.24111923217773437,
0.24104243469238282,
0.24155647277832032,
0.2407731170654297,
0.24124211120605468,
0.24097996520996093,
0.24022732543945313,
0.24134962463378906,
0.24109158325195312,
0.24052633666992188,
0.24072601318359374,
0.24031642150878907,
0.2408704071044922,
0.24035328674316406,
0.24031436157226563,
0.2403256378173828,
0.2404638671875,
0.2411847686767578,
0.23977984619140624,
0.24041165161132813,
0.23975117492675782,
0.24032357788085937,
0.24015565490722657,
0.24032666015625,
0.23952691650390626,
0.23980543518066405,
0.2395371551513672,
0.24026419067382812,
0.24166502380371094,
0.2394122314453125,
0.24026419067382812,
0.2393456573486328,
0.23959449768066407,
0.23952793884277343,
0.23927090454101563,
0.23895552062988282,
0.23910604858398438,
0.23962112426757812,
0.2393866271972656,
0.23915008544921876,
0.23919512939453125,
0.2399006652832031,
0.23890739440917969,
0.23990170288085938,
0.23840255737304689,
0.23848141479492188,
0.2383585205078125,
0.23940914916992187,
0.23918080139160156,
0.23795199584960938,
0.23889100646972655,
0.23912037658691407,
0.23861759948730468,
0.23844248962402342,
0.23861759948730468,
0.2423388214111328,
0.23819981384277344,
0.23846400451660157,
0.23878758239746095,
0.2382704620361328,
0.23828684997558594,
0.23864218139648438,
0.23916134643554687,
0.23734579467773437,
0.23761715698242186,
0.23818240356445314,
0.2382940216064453,
0.238097412109375,
0.23909068298339844,
0.2390118408203125,
0.23936819458007813,
0.23946751403808594,
0.23883673095703126,
0.23915110778808593,
0.23877529907226563,
0.23924327087402344,
0.23862168884277343,
0.23861351013183593,
0.2389811248779297,
0.2389422149658203,
0.2384701385498047,
0.2389063720703125,
0.2385039367675781,
0.23919821166992186,
0.2386544647216797,
0.23751475524902343,
0.23883775329589843,
0.2382458953857422,
0.23910195922851563,
0.2384701385498047,
0.2383462371826172,
0.2390118408203125,
0.23856742858886718
]
},
"throughput": {
"unit": "tokens/s",
"value": 20.873507562228934
},
"energy": null,
"efficiency": null
}
}