File size: 5,949 Bytes
523d850 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 |
{
"prefill": {
"memory": {
"unit": "MB",
"max_ram": 3537.494016,
"max_vram": 14564.72064,
"max_reserved": 14071.88992,
"max_allocated": 11367.31392
},
"latency": {
"unit": "s",
"mean": 0.4472763897439709,
"stdev": 0.0047645116545721,
"values": [
0.4695675048828125,
0.44648654174804686,
0.44618341064453126,
0.4461240234375,
0.446529541015625,
0.4470425720214844,
0.44600115966796877,
0.44631143188476563,
0.4458629150390625,
0.4460113830566406,
0.4461997985839844,
0.4461055908203125,
0.44638311767578126,
0.44606976318359376,
0.44569393920898437,
0.4462243957519531,
0.44664935302734377,
0.4462377014160156,
0.44663296508789063,
0.4456673278808594,
0.44621517944335937,
0.4461690979003906,
0.4469882507324219
]
},
"throughput": {
"unit": "tokens/s",
"value": 1252.022268200998
},
"energy": null,
"efficiency": null
},
"decode": {
"memory": {
"unit": "MB",
"max_ram": 3539.29216,
"max_vram": 14508.097536,
"max_reserved": 14015.266816,
"max_allocated": 11559.135232
},
"latency": {
"unit": "s",
"mean": 29.939338134765624,
"stdev": 0,
"values": [
29.939338134765624
]
},
"throughput": {
"unit": "tokens/s",
"value": 115.7340213869469
},
"energy": null,
"efficiency": null
},
"per_token": {
"memory": null,
"latency": {
"unit": "s",
"mean": 0.3024175569168245,
"stdev": 0.006495047756518274,
"values": [
0.3167109069824219,
0.31577908325195314,
0.3155199890136719,
0.31530496215820314,
0.31362252807617186,
0.31696792602539064,
0.31384576416015625,
0.31296615600585936,
0.3133685607910156,
0.31271627807617186,
0.311510009765625,
0.31146701049804687,
0.3107440490722656,
0.3104020385742188,
0.31054949951171873,
0.30999551391601565,
0.3091507263183594,
0.3089141845703125,
0.30916915893554686,
0.3088988037109375,
0.3080447998046875,
0.30975588989257813,
0.3072440185546875,
0.30704129028320315,
0.3072491455078125,
0.30681805419921876,
0.3068426208496094,
0.3062528076171875,
0.3066050415039063,
0.3056844787597656,
0.3046492309570312,
0.30730548095703125,
0.30522265625,
0.3050281066894531,
0.305033203125,
0.3047301025390625,
0.3045478515625,
0.3045509033203125,
0.3057029113769531,
0.3038760986328125,
0.30308966064453124,
0.303994873046875,
0.3024803771972656,
0.30274252319335937,
0.3025059814453125,
0.30242303466796877,
0.3026882629394531,
0.30180352783203124,
0.30188235473632813,
0.3011389465332031,
0.3005388793945313,
0.3004610595703125,
0.30039654541015626,
0.29937765502929686,
0.3015362548828125,
0.29953536987304685,
0.2986639404296875,
0.29938278198242185,
0.298392578125,
0.2982430725097656,
0.29859329223632813,
0.2981652526855469,
0.29810891723632815,
0.29872332763671877,
0.29826150512695315,
0.29788363647460936,
0.2971463623046875,
0.2972651672363281,
0.29730508422851565,
0.2972364807128906,
0.29666302490234375,
0.30092901611328127,
0.2961336364746094,
0.29574554443359374,
0.29620736694335936,
0.2961448974609375,
0.29597183227539064,
0.2953799743652344,
0.29584384155273435,
0.29593701171875,
0.29542706298828125,
0.2955735168457031,
0.2946253051757812,
0.2954874572753906,
0.29425869750976563,
0.2946621398925781,
0.2944389038085938,
0.2949775390625,
0.2961080322265625,
0.29468057250976565,
0.2936627197265625,
0.29617459106445315,
0.2940631103515625,
0.2944245910644531,
0.2944296875,
0.29573223876953125,
0.29399038696289065,
0.294677490234375,
0.2972333984375
]
},
"throughput": {
"unit": "tokens/s",
"value": 115.7340213869469
},
"energy": null,
"efficiency": null
}
} |