benchmarks_4bit_batch_size45 / benchmark_report.json
AwAppp's picture
Upload TextGenerationReport
ac5b365 verified
{
"prefill": {
"memory": {
"unit": "MB",
"max_ram": 3494.793216,
"max_vram": 6029.312,
"max_reserved": 5544.869888,
"max_allocated": 5402.05824
},
"latency": {
"unit": "s",
"mean": 0.3201763744354248,
"stdev": 0.0009697763372516204,
"values": [
0.3255560607910156,
0.32034609985351564,
0.31998052978515623,
0.32002969360351563,
0.319931396484375,
0.3200337829589844,
0.31998052978515623,
0.31996417236328123,
0.3199129638671875,
0.31991192626953124,
0.32001638793945314,
0.3199836120605469,
0.32001739501953125,
0.31995391845703125,
0.3199856567382813,
0.31999591064453126,
0.319857666015625,
0.320005126953125,
0.3201034240722656,
0.32001739501953125,
0.3199559631347656,
0.3200604248046875,
0.32007986450195314,
0.32001739501953125,
0.3200419921875,
0.3199754333496094,
0.32003073120117187,
0.3198873596191406,
0.3200091857910156,
0.3200081481933594,
0.32007986450195314,
0.31991397094726565
]
},
"throughput": {
"unit": "tokens/s",
"value": 2248.7605503984937
},
"energy": null,
"efficiency": null
},
"decode": {
"memory": {
"unit": "MB",
"max_ram": 3494.793216,
"max_vram": 7511.998464,
"max_reserved": 7027.556352,
"max_allocated": 6811.666944
},
"latency": {
"unit": "s",
"mean": 15.420443618774414,
"stdev": 0,
"values": [
15.420443618774414
]
},
"throughput": {
"unit": "tokens/s",
"value": 288.90219439446156
},
"energy": null,
"efficiency": null
},
"per_token": {
"memory": null,
"latency": {
"unit": "s",
"mean": 0.1557620567552971,
"stdev": 0.003946284964659726,
"values": [
0.14904631042480468,
0.14914352416992188,
0.14919577026367187,
0.14954393005371094,
0.14946202087402344,
0.14966680908203125,
0.1497876434326172,
0.15007539367675782,
0.15004876708984374,
0.15028839111328124,
0.15029862976074218,
0.1507368927001953,
0.150614013671875,
0.15076658630371093,
0.15071334838867187,
0.15137893676757813,
0.15104103088378906,
0.15133491516113282,
0.15136665344238281,
0.15196876525878905,
0.15157862854003906,
0.15214591979980469,
0.1517322235107422,
0.15250125122070313,
0.15216844177246094,
0.1525913543701172,
0.15239474487304688,
0.15318118286132812,
0.1527019500732422,
0.15311564636230468,
0.15294566345214844,
0.1536522216796875,
0.1532508087158203,
0.153891845703125,
0.15347097778320312,
0.15422361755371095,
0.15370445251464843,
0.1540894775390625,
0.15398809814453124,
0.15469056701660155,
0.1542635498046875,
0.15466188049316407,
0.1545963592529297,
0.15538482666015624,
0.1548871612548828,
0.15524354553222655,
0.15513186645507812,
0.15583334350585937,
0.15544216918945314,
0.15583027648925782,
0.15565927124023438,
0.15656755065917968,
0.1559746551513672,
0.15632691955566405,
0.15616921997070313,
0.1570529327392578,
0.15650816345214844,
0.15684402465820313,
0.15670477294921875,
0.15763763427734376,
0.15698841857910156,
0.15741439819335937,
0.1571768341064453,
0.1581230010986328,
0.1574481964111328,
0.158055419921875,
0.15774412536621094,
0.15876710510253905,
0.15799090576171876,
0.15849267578125,
0.15835443115234374,
0.15930776977539063,
0.15858073425292968,
0.15907225036621095,
0.1588940734863281,
0.15995904541015624,
0.15911424255371093,
0.15965798950195312,
0.15930982971191407,
0.160500732421875,
0.15971328735351562,
0.16013311767578126,
0.16002149963378906,
0.16116018676757812,
0.16029592895507813,
0.16075059509277342,
0.16051303100585937,
0.16163737487792967,
0.16082432556152343,
0.16137522888183595,
0.16113357543945311,
0.16221696472167968,
0.1612349395751953,
0.16177049255371093,
0.16158924865722657,
0.16294706726074218,
0.16203570556640626,
0.16257023620605468,
0.16235110473632813
]
},
"throughput": {
"unit": "tokens/s",
"value": 288.90219439446156
},
"energy": null,
"efficiency": null
}
}