{ "prefill": { "memory": { "unit": "MB", "max_ram": 3537.494016, "max_vram": 14564.72064, "max_reserved": 14071.88992, "max_allocated": 11367.31392 }, "latency": { "unit": "s", "mean": 0.4472763897439709, "stdev": 0.0047645116545721, "values": [ 0.4695675048828125, 0.44648654174804686, 0.44618341064453126, 0.4461240234375, 0.446529541015625, 0.4470425720214844, 0.44600115966796877, 0.44631143188476563, 0.4458629150390625, 0.4460113830566406, 0.4461997985839844, 0.4461055908203125, 0.44638311767578126, 0.44606976318359376, 0.44569393920898437, 0.4462243957519531, 0.44664935302734377, 0.4462377014160156, 0.44663296508789063, 0.4456673278808594, 0.44621517944335937, 0.4461690979003906, 0.4469882507324219 ] }, "throughput": { "unit": "tokens/s", "value": 1252.022268200998 }, "energy": null, "efficiency": null }, "decode": { "memory": { "unit": "MB", "max_ram": 3539.29216, "max_vram": 14508.097536, "max_reserved": 14015.266816, "max_allocated": 11559.135232 }, "latency": { "unit": "s", "mean": 29.939338134765624, "stdev": 0, "values": [ 29.939338134765624 ] }, "throughput": { "unit": "tokens/s", "value": 115.7340213869469 }, "energy": null, "efficiency": null }, "per_token": { "memory": null, "latency": { "unit": "s", "mean": 0.3024175569168245, "stdev": 0.006495047756518274, "values": [ 0.3167109069824219, 0.31577908325195314, 0.3155199890136719, 0.31530496215820314, 0.31362252807617186, 0.31696792602539064, 0.31384576416015625, 0.31296615600585936, 0.3133685607910156, 0.31271627807617186, 0.311510009765625, 0.31146701049804687, 0.3107440490722656, 0.3104020385742188, 0.31054949951171873, 0.30999551391601565, 0.3091507263183594, 0.3089141845703125, 0.30916915893554686, 0.3088988037109375, 0.3080447998046875, 0.30975588989257813, 0.3072440185546875, 0.30704129028320315, 0.3072491455078125, 0.30681805419921876, 0.3068426208496094, 0.3062528076171875, 0.3066050415039063, 0.3056844787597656, 0.3046492309570312, 0.30730548095703125, 0.30522265625, 0.3050281066894531, 0.305033203125, 0.3047301025390625, 0.3045478515625, 0.3045509033203125, 0.3057029113769531, 0.3038760986328125, 0.30308966064453124, 0.303994873046875, 0.3024803771972656, 0.30274252319335937, 0.3025059814453125, 0.30242303466796877, 0.3026882629394531, 0.30180352783203124, 0.30188235473632813, 0.3011389465332031, 0.3005388793945313, 0.3004610595703125, 0.30039654541015626, 0.29937765502929686, 0.3015362548828125, 0.29953536987304685, 0.2986639404296875, 0.29938278198242185, 0.298392578125, 0.2982430725097656, 0.29859329223632813, 0.2981652526855469, 0.29810891723632815, 0.29872332763671877, 0.29826150512695315, 0.29788363647460936, 0.2971463623046875, 0.2972651672363281, 0.29730508422851565, 0.2972364807128906, 0.29666302490234375, 0.30092901611328127, 0.2961336364746094, 0.29574554443359374, 0.29620736694335936, 0.2961448974609375, 0.29597183227539064, 0.2953799743652344, 0.29584384155273435, 0.29593701171875, 0.29542706298828125, 0.2955735168457031, 0.2946253051757812, 0.2954874572753906, 0.29425869750976563, 0.2946621398925781, 0.2944389038085938, 0.2949775390625, 0.2961080322265625, 0.29468057250976565, 0.2936627197265625, 0.29617459106445315, 0.2940631103515625, 0.2944245910644531, 0.2944296875, 0.29573223876953125, 0.29399038696289065, 0.294677490234375, 0.2972333984375 ] }, "throughput": { "unit": "tokens/s", "value": 115.7340213869469 }, "energy": null, "efficiency": null } }