{ "prefill": { "memory": { "unit": "MB", "max_ram": 3494.793216, "max_vram": 6029.312, "max_reserved": 5544.869888, "max_allocated": 5402.05824 }, "latency": { "unit": "s", "mean": 0.3201763744354248, "stdev": 0.0009697763372516204, "values": [ 0.3255560607910156, 0.32034609985351564, 0.31998052978515623, 0.32002969360351563, 0.319931396484375, 0.3200337829589844, 0.31998052978515623, 0.31996417236328123, 0.3199129638671875, 0.31991192626953124, 0.32001638793945314, 0.3199836120605469, 0.32001739501953125, 0.31995391845703125, 0.3199856567382813, 0.31999591064453126, 0.319857666015625, 0.320005126953125, 0.3201034240722656, 0.32001739501953125, 0.3199559631347656, 0.3200604248046875, 0.32007986450195314, 0.32001739501953125, 0.3200419921875, 0.3199754333496094, 0.32003073120117187, 0.3198873596191406, 0.3200091857910156, 0.3200081481933594, 0.32007986450195314, 0.31991397094726565 ] }, "throughput": { "unit": "tokens/s", "value": 2248.7605503984937 }, "energy": null, "efficiency": null }, "decode": { "memory": { "unit": "MB", "max_ram": 3494.793216, "max_vram": 7511.998464, "max_reserved": 7027.556352, "max_allocated": 6811.666944 }, "latency": { "unit": "s", "mean": 15.420443618774414, "stdev": 0, "values": [ 15.420443618774414 ] }, "throughput": { "unit": "tokens/s", "value": 288.90219439446156 }, "energy": null, "efficiency": null }, "per_token": { "memory": null, "latency": { "unit": "s", "mean": 0.1557620567552971, "stdev": 0.003946284964659726, "values": [ 0.14904631042480468, 0.14914352416992188, 0.14919577026367187, 0.14954393005371094, 0.14946202087402344, 0.14966680908203125, 0.1497876434326172, 0.15007539367675782, 0.15004876708984374, 0.15028839111328124, 0.15029862976074218, 0.1507368927001953, 0.150614013671875, 0.15076658630371093, 0.15071334838867187, 0.15137893676757813, 0.15104103088378906, 0.15133491516113282, 0.15136665344238281, 0.15196876525878905, 0.15157862854003906, 0.15214591979980469, 0.1517322235107422, 0.15250125122070313, 0.15216844177246094, 0.1525913543701172, 0.15239474487304688, 0.15318118286132812, 0.1527019500732422, 0.15311564636230468, 0.15294566345214844, 0.1536522216796875, 0.1532508087158203, 0.153891845703125, 0.15347097778320312, 0.15422361755371095, 0.15370445251464843, 0.1540894775390625, 0.15398809814453124, 0.15469056701660155, 0.1542635498046875, 0.15466188049316407, 0.1545963592529297, 0.15538482666015624, 0.1548871612548828, 0.15524354553222655, 0.15513186645507812, 0.15583334350585937, 0.15544216918945314, 0.15583027648925782, 0.15565927124023438, 0.15656755065917968, 0.1559746551513672, 0.15632691955566405, 0.15616921997070313, 0.1570529327392578, 0.15650816345214844, 0.15684402465820313, 0.15670477294921875, 0.15763763427734376, 0.15698841857910156, 0.15741439819335937, 0.1571768341064453, 0.1581230010986328, 0.1574481964111328, 0.158055419921875, 0.15774412536621094, 0.15876710510253905, 0.15799090576171876, 0.15849267578125, 0.15835443115234374, 0.15930776977539063, 0.15858073425292968, 0.15907225036621095, 0.1588940734863281, 0.15995904541015624, 0.15911424255371093, 0.15965798950195312, 0.15930982971191407, 0.160500732421875, 0.15971328735351562, 0.16013311767578126, 0.16002149963378906, 0.16116018676757812, 0.16029592895507813, 0.16075059509277342, 0.16051303100585937, 0.16163737487792967, 0.16082432556152343, 0.16137522888183595, 0.16113357543945311, 0.16221696472167968, 0.1612349395751953, 0.16177049255371093, 0.16158924865722657, 0.16294706726074218, 0.16203570556640626, 0.16257023620605468, 0.16235110473632813 ] }, "throughput": { "unit": "tokens/s", "value": 288.90219439446156 }, "energy": null, "efficiency": null } }