{ "prefill": { "memory": { "unit": "MB", "max_ram": 3385.741312, "max_vram": 6058.672128, "max_reserved": 5574.230016, "max_allocated": 5342.596608 }, "latency": { "unit": "s", "mean": 0.24166012536911738, "stdev": 0.000219597102317531, "values": [ 0.24255186462402345, 0.24223948669433593, 0.24154214477539063, 0.24127078247070313, 0.24128614807128906, 0.24127999877929687, 0.24149395751953126, 0.24154010009765625, 0.24160870361328124, 0.24123802185058593, 0.24157901000976562, 0.2417223663330078, 0.24163839721679686, 0.24168858337402344, 0.24178279113769532, 0.2415667266845703, 0.24151962280273437, 0.24177253723144532, 0.2415749053955078, 0.24157594299316407, 0.24172647094726563, 0.24174490356445313, 0.2417664031982422, 0.24172134399414064, 0.24179916381835936, 0.24168345642089845, 0.24153701782226564, 0.24150323486328126, 0.24169676208496094, 0.2416660461425781, 0.2417039337158203, 0.2417664031982422, 0.24159539794921875, 0.2418032989501953, 0.2417274932861328, 0.2416680908203125, 0.24169573974609376, 0.24168960571289064, 0.2416680908203125, 0.24168960571289064, 0.24173977661132812, 0.2416609344482422 ] }, "throughput": { "unit": "tokens/s", "value": 1655.217216282705 }, "energy": null, "efficiency": null }, "decode": { "memory": { "unit": "MB", "max_ram": 3385.741312, "max_vram": 7077.888, "max_reserved": 6593.445888, "max_allocated": 6122.28864 }, "latency": { "unit": "s", "mean": 14.189453353881829, "stdev": 0, "values": [ 14.189453353881829 ] }, "throughput": { "unit": "tokens/s", "value": 174.42532409628808 }, "energy": null, "efficiency": null }, "per_token": { "memory": null, "latency": { "unit": "s", "mean": 0.143327811655372, "stdev": 0.0022867196159492443, "values": [ 0.13951898193359374, 0.13945138549804686, 0.13960397338867186, 0.13967257690429688, 0.13968896484375, 0.13975347900390625, 0.13986099243164063, 0.13998284912109374, 0.14005349731445313, 0.14017945861816405, 0.14014259338378907, 0.14040371704101562, 0.1403453369140625, 0.1405982666015625, 0.1405050811767578, 0.14077542114257813, 0.14066073608398438, 0.14082354736328126, 0.14084197998046874, 0.14116557312011718, 0.1409556427001953, 0.14115122985839842, 0.14108876037597656, 0.1414082489013672, 0.14131814575195312, 0.1414615020751953, 0.14137344360351561, 0.14176358032226563, 0.14150962829589844, 0.141765625, 0.1416663055419922, 0.14203085327148438, 0.14181887817382813, 0.14205952453613283, 0.14198272705078124, 0.14248959350585938, 0.14217625427246094, 0.14242201232910157, 0.1422592010498047, 0.1427097625732422, 0.1424435272216797, 0.14264012145996094, 0.1426616668701172, 0.14317872619628907, 0.14284902954101564, 0.1429381103515625, 0.14289715576171874, 0.14333030700683594, 0.1430476837158203, 0.14330368041992186, 0.14318899536132812, 0.1438074951171875, 0.14332722473144532, 0.14364364624023437, 0.14344908142089843, 0.1440010223388672, 0.14353511047363282, 0.14397030639648437, 0.14398361206054688, 0.14445671081542968, 0.1439303741455078, 0.14429592895507812, 0.14409834289550782, 0.14469013977050782, 0.14440447998046874, 0.14462567138671875, 0.14453146362304686, 0.1451182098388672, 0.1446297607421875, 0.14496357727050782, 0.14478643798828125, 0.14544178771972657, 0.14492466735839843, 0.14537318420410156, 0.14522982788085936, 0.1460438995361328, 0.1452605438232422, 0.14575718688964845, 0.1454571533203125, 0.14606643676757813, 0.14558003234863282, 0.14593434143066406, 0.1458831329345703, 0.14647705078125, 0.1459271697998047, 0.14626509094238282, 0.14605311584472655, 0.1467658233642578, 0.1461995544433594, 0.14658253479003908, 0.14628863525390626, 0.14716517639160157, 0.14658253479003908, 0.14690304565429688, 0.14673715209960939, 0.14730650329589845, 0.1468272705078125, 0.14720819091796875, 0.14704537963867187 ] }, "throughput": { "unit": "tokens/s", "value": 174.42532409628808 }, "energy": null, "efficiency": null } }