{ "prefill": { "memory": { "unit": "MB", "max_ram": 3377.37728, "max_vram": 6058.672128, "max_reserved": 5576.327168, "max_allocated": 5355.380736 }, "latency": { "unit": "s", "mean": 0.24580386203672833, "stdev": 0.00022219677637899774, "values": [ 0.24691427612304687, 0.24614707946777345, 0.24578355407714844, 0.24561459350585937, 0.2458961944580078, 0.24560025024414062, 0.2456575927734375, 0.24577023315429689, 0.24575999450683594, 0.24567091369628907, 0.2458787841796875, 0.24583171081542968, 0.24586341857910157, 0.24571495056152343, 0.24590336608886718, 0.24583168029785157, 0.24580096435546875, 0.24568524169921874, 0.24596377563476562, 0.24577023315429689, 0.24546406555175782, 0.24596377563476562, 0.24557772827148439, 0.24591360473632812, 0.24575999450683594, 0.2457108154296875, 0.24578866577148437, 0.2458992614746094, 0.24572927856445312, 0.2458675231933594, 0.24559309387207032, 0.2457733154296875, 0.24587980651855468, 0.24561048889160156, 0.24586239624023437, 0.2460037078857422, 0.24556236267089843, 0.24560127258300782, 0.24583680725097656, 0.2457057342529297, 0.24579583740234376 ] }, "throughput": { "unit": "tokens/s", "value": 1952.7764780533748 }, "energy": null, "efficiency": null }, "decode": { "memory": { "unit": "MB", "max_ram": 3377.37728, "max_vram": 7197.425664, "max_reserved": 6712.983552, "max_allocated": 6292.99456 }, "latency": { "unit": "s", "mean": 14.316073944091789, "stdev": 0, "values": [ 14.316073944091789 ] }, "throughput": { "unit": "tokens/s", "value": 207.45911285445072 }, "energy": null, "efficiency": null }, "per_token": { "memory": null, "latency": { "unit": "s", "mean": 0.14460680751607868, "stdev": 0.002633980375935467, "values": [ 0.14012416076660156, 0.14033407592773436, 0.14042930603027343, 0.14048460388183595, 0.14043341064453124, 0.14061260986328125, 0.14064947509765624, 0.14090342712402343, 0.14090138244628905, 0.14114303588867189, 0.14110208129882812, 0.14144717407226562, 0.14132838439941406, 0.14151271057128906, 0.14150758361816407, 0.14181170654296876, 0.14139903259277345, 0.14184550476074217, 0.14165708923339843, 0.14212300109863282, 0.14184141540527342, 0.14210560607910155, 0.14203392028808592, 0.1424148406982422, 0.1421895751953125, 0.1424486389160156, 0.14228172302246095, 0.14273228454589842, 0.14244146728515625, 0.14266983032226563, 0.14270361328125, 0.1431470031738281, 0.14275686645507812, 0.14305381774902343, 0.14302105712890625, 0.14357913208007814, 0.14316645812988282, 0.14344499206542968, 0.14331596374511718, 0.1438586883544922, 0.1435484161376953, 0.14384640502929688, 0.14373989868164064, 0.14430105590820314, 0.14400717163085938, 0.1441095733642578, 0.1440245819091797, 0.14447718811035157, 0.14414540100097656, 0.14444032287597655, 0.14435224914550782, 0.1449502716064453, 0.14458265686035157, 0.14494207763671876, 0.14483045959472657, 0.14541413879394532, 0.14501580810546874, 0.14532608032226563, 0.14521241760253906, 0.14594866943359375, 0.14552268981933594, 0.14566502380371094, 0.14550834655761719, 0.14620364379882814, 0.14571417236328124, 0.14605722045898437, 0.14588825988769533, 0.14663066101074218, 0.14609100341796874, 0.14640538024902344, 0.1462906951904297, 0.14695526123046876, 0.1464596405029297, 0.14691635131835937, 0.14672076416015625, 0.14758912658691406, 0.1468211212158203, 0.1471631317138672, 0.14699417114257812, 0.14770176696777343, 0.14713037109375, 0.14749798583984375, 0.14733413696289063, 0.1480693817138672, 0.14749900817871095, 0.1478656005859375, 0.14795468139648438, 0.14866021728515624, 0.148168701171875, 0.14849331665039062, 0.1483356170654297, 0.14925926208496093, 0.14847488403320314, 0.14881587219238282, 0.14865408325195312, 0.1494906921386719, 0.1488107452392578, 0.14924082946777345, 0.1488486328125 ] }, "throughput": { "unit": "tokens/s", "value": 207.45911285445072 }, "energy": null, "efficiency": null } }