benchmarks_4bit_batch_size30 / benchmark_report.json
AwAppp's picture
Upload TextGenerationReport
36a6b96 verified
{
"prefill": {
"memory": {
"unit": "MB",
"max_ram": 3377.37728,
"max_vram": 6058.672128,
"max_reserved": 5576.327168,
"max_allocated": 5355.380736
},
"latency": {
"unit": "s",
"mean": 0.24580386203672833,
"stdev": 0.00022219677637899774,
"values": [
0.24691427612304687,
0.24614707946777345,
0.24578355407714844,
0.24561459350585937,
0.2458961944580078,
0.24560025024414062,
0.2456575927734375,
0.24577023315429689,
0.24575999450683594,
0.24567091369628907,
0.2458787841796875,
0.24583171081542968,
0.24586341857910157,
0.24571495056152343,
0.24590336608886718,
0.24583168029785157,
0.24580096435546875,
0.24568524169921874,
0.24596377563476562,
0.24577023315429689,
0.24546406555175782,
0.24596377563476562,
0.24557772827148439,
0.24591360473632812,
0.24575999450683594,
0.2457108154296875,
0.24578866577148437,
0.2458992614746094,
0.24572927856445312,
0.2458675231933594,
0.24559309387207032,
0.2457733154296875,
0.24587980651855468,
0.24561048889160156,
0.24586239624023437,
0.2460037078857422,
0.24556236267089843,
0.24560127258300782,
0.24583680725097656,
0.2457057342529297,
0.24579583740234376
]
},
"throughput": {
"unit": "tokens/s",
"value": 1952.7764780533748
},
"energy": null,
"efficiency": null
},
"decode": {
"memory": {
"unit": "MB",
"max_ram": 3377.37728,
"max_vram": 7197.425664,
"max_reserved": 6712.983552,
"max_allocated": 6292.99456
},
"latency": {
"unit": "s",
"mean": 14.316073944091789,
"stdev": 0,
"values": [
14.316073944091789
]
},
"throughput": {
"unit": "tokens/s",
"value": 207.45911285445072
},
"energy": null,
"efficiency": null
},
"per_token": {
"memory": null,
"latency": {
"unit": "s",
"mean": 0.14460680751607868,
"stdev": 0.002633980375935467,
"values": [
0.14012416076660156,
0.14033407592773436,
0.14042930603027343,
0.14048460388183595,
0.14043341064453124,
0.14061260986328125,
0.14064947509765624,
0.14090342712402343,
0.14090138244628905,
0.14114303588867189,
0.14110208129882812,
0.14144717407226562,
0.14132838439941406,
0.14151271057128906,
0.14150758361816407,
0.14181170654296876,
0.14139903259277345,
0.14184550476074217,
0.14165708923339843,
0.14212300109863282,
0.14184141540527342,
0.14210560607910155,
0.14203392028808592,
0.1424148406982422,
0.1421895751953125,
0.1424486389160156,
0.14228172302246095,
0.14273228454589842,
0.14244146728515625,
0.14266983032226563,
0.14270361328125,
0.1431470031738281,
0.14275686645507812,
0.14305381774902343,
0.14302105712890625,
0.14357913208007814,
0.14316645812988282,
0.14344499206542968,
0.14331596374511718,
0.1438586883544922,
0.1435484161376953,
0.14384640502929688,
0.14373989868164064,
0.14430105590820314,
0.14400717163085938,
0.1441095733642578,
0.1440245819091797,
0.14447718811035157,
0.14414540100097656,
0.14444032287597655,
0.14435224914550782,
0.1449502716064453,
0.14458265686035157,
0.14494207763671876,
0.14483045959472657,
0.14541413879394532,
0.14501580810546874,
0.14532608032226563,
0.14521241760253906,
0.14594866943359375,
0.14552268981933594,
0.14566502380371094,
0.14550834655761719,
0.14620364379882814,
0.14571417236328124,
0.14605722045898437,
0.14588825988769533,
0.14663066101074218,
0.14609100341796874,
0.14640538024902344,
0.1462906951904297,
0.14695526123046876,
0.1464596405029297,
0.14691635131835937,
0.14672076416015625,
0.14758912658691406,
0.1468211212158203,
0.1471631317138672,
0.14699417114257812,
0.14770176696777343,
0.14713037109375,
0.14749798583984375,
0.14733413696289063,
0.1480693817138672,
0.14749900817871095,
0.1478656005859375,
0.14795468139648438,
0.14866021728515624,
0.148168701171875,
0.14849331665039062,
0.1483356170654297,
0.14925926208496093,
0.14847488403320314,
0.14881587219238282,
0.14865408325195312,
0.1494906921386719,
0.1488107452392578,
0.14924082946777345,
0.1488486328125
]
},
"throughput": {
"unit": "tokens/s",
"value": 207.45911285445072
},
"energy": null,
"efficiency": null
}
}