Spaces:
Paused
Paused
File size: 9,154 Bytes
006a3d6 ab6fe86 006a3d6 ab6fe86 006a3d6 ab6fe86 944b1b5 720b664 ab6fe86 944b1b5 ab6fe86 720b664 006a3d6 ab6fe86 006a3d6 944b1b5 ab6fe86 006a3d6 ab6fe86 006a3d6 ab6fe86 c1300fe 006a3d6 720b664 006a3d6 720b664 006a3d6 720b664 c1300fe 006a3d6 720b664 006a3d6 c1300fe 720b664 c1300fe 720b664 c1300fe 720b664 c1300fe 720b664 c1300fe 720b664 c1300fe 720b664 006a3d6 c1300fe 006a3d6 c1300fe 720b664 006a3d6 720b664 006a3d6 720b664 006a3d6 720b664 006a3d6 720b664 006a3d6 720b664 006a3d6 720b664 006a3d6 720b664 006a3d6 720b664 006a3d6 720b664 006a3d6 720b664 006a3d6 720b664 006a3d6 720b664 006a3d6 720b664 006a3d6 720b664 006a3d6 720b664 006a3d6 720b664 006a3d6 720b664 006a3d6 720b664 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 | // This file aims to test a scenario where 80 users send 3 messages to ONE model (pessimistic).
import http from 'k6/http';
import { sleep, check } from 'k6';
import { SharedArray } from 'k6/data';
const message_examples = new SharedArray('chat messages', function () {
const f = open('./message_examples.txt');
// .split('\n') creates an array where each line is one element
// .filter removes empty lines at the end of the file
return f.split('\n').map(line => line.trim()).filter(line => line.length > 0);
});
export const options = {
scenarios: {
my_spike_test: {
executor: 'per-vu-iterations',
vus: 80, // 80 total users
iterations: 1, // Each user runs the function exactly once.
},
},
};
export default function () {
// Each VU must wait a random time period to prevent them from
// sending their messages at the exact same time.
sleep(Math.random() * 10);
const url = __ENV.URL
// Each VU sends 3 messages
for (let i = 0; i < 3; i++) {
const messageIndex = ((__VU - 1) * 3 + i) % message_examples.length;
const userMessage = message_examples[messageIndex];
const payload = {
user_id: `VU${__VU}`,
session_id: `VU${__VU}`,
conversation_id: `VU${__VU}`,
human_message: userMessage,
model_type: __ENV.MODEL_TYPE,
consent: true,
age_group: "0-18",
gender: "M",
roles: ["other"],
participant_id: `VU${__VU}`,
lang: "en"
};
const params = { headers: { 'Content-Type': 'application/json' } };
const res = http.post(url + "/chat", JSON.stringify(payload), params);
check(res, {'status is 200': (r) => r.status === 200})
let reply = '';
if (res.status === 200) {
// k6 does not support streaming response bodies. It waits for the entire response until
// the stream is 'done'. Therefore, we do not need to read the chunks one by one.
let data = "";
try {
data = res.json();
} catch (error) {
// However, if the response contains streamed data, it is not in JSON format.
// We would have to read the body to access that data.
data = res.body;
}
reply = data.reply || 'no_reply';
} else {
console.error(res.status);
console.error(res.body);
}
// Simulating reading time, thinking time and writing time.
// Simulate reading speed: ~200ms per word in the reply + 2s thinking time
const readingTime = (reply.split(' ').length * 0.2) + 2;
// Cap it so it doesn't wait forever, but add some randomness (jitter)
const finalSleep = Math.min(readingTime, 15) + (Math.random() * 3);
sleep(finalSleep);
}
}
// TEST RESULT ANALYSIS
// CHAMP
// The bottleneck associated with CHAMP's performance is the HuggingFace Space hardware.
// CHAMP actually requires a lot of computing power, because of the FAISS retrieval system.
// Using a T4 GPU for accelerated retrieval has significantly reduced the average request duration, from 11.75s
// to 2.04s. On the basic free tier, the CPU utilization stays at 99-100%.
// The performance of CHAMP could be improved by selecting a more powerful GPU (such as L4 or A10G)
// or by running a simpler vector search algorithm. The current algorithm (maximal marginal relevance)
// optimizes for similarity to query and diversity among selected documents. A basic similarity search
// should be faster to run. However, 2.04s is an acceptable performance.
// Google (Gemini)
// The average request duration is 2.46s with a maximum of 6.26s. This level of performance is acceptable.
// OpenAI (GPT-5-mini)
// Previous tests used GPT-5-nano. However, this model had strict rate limits and tests would exceed them
// which caused them to fail. The model was switched to GPT-5-mini.
// The performance is quite poor, with an average request duration of 20.44s and a maximum duration of 50.35s.
// The bottleneck lies within the OpenAI API, which leaves us with little room for optimization.
// RAW TEST RESULTS
// CHAMP (GROK) - BASIC CPU - 80VUs
// β TOTAL RESULTS
// checks_total.......: 240 2.409599/s
// checks_succeeded...: 100.00% 240 out of 240
// checks_failed......: 0.00% 0 out of 240
// β status is 200
// HTTP
// http_req_duration..............: avg=11.75s min=74.68ms med=11.66s max=20.75s p(90)=15.8s p(95)=17.43s
// { expected_response:true }...: avg=11.75s min=74.68ms med=11.66s max=20.75s p(90)=15.8s p(95)=17.43s
// http_req_failed................: 0.00% 0 out of 240
// http_reqs......................: 240 2.409599/s
// EXECUTION
// iteration_duration.............: avg=1m25s min=1m0s med=1m26s max=1m39s p(90)=1m37s p(95)=1m38s
// iterations.....................: 80 0.8032/s
// vus............................: 2 min=2 max=80
// vus_max........................: 80 min=80 max=80
// NETWORK
// data_received..................: 546 kB 5.5 kB/s
// data_sent......................: 230 kB 2.3 kB/s
// running (01m39.6s), 00/80 VUs, 80 complete and 0 interrupted iterations
// my_spike_test β [======================================] 80 VUs 01m39.6s/10m0s 80/80 iters, 1 per VU
// CHAMP (GROK) - T4 small - 80VUs
// β TOTAL RESULTS
// checks_total.......: 240 2.153089/s
// checks_succeeded...: 99.58% 239 out of 240
// checks_failed......: 0.41% 1 out of 240
// β status is 200
// β³ 99% β β 239 / β 1
// HTTP
// http_req_duration..............: avg=2.28s min=448.72ms med=1.94s max=1m0s p(90)=3.54s p(95)=3.87s
// { expected_response:true }...: avg=2.04s min=448.72ms med=1.94s max=5.01s p(90)=3.5s p(95)=3.86s
// http_req_failed................: 0.41% 1 out of 240
// http_reqs......................: 240 2.153089/s
// EXECUTION
// iteration_duration.............: avg=55.82s min=32.56s med=56.91s max=1m51s p(90)=1m4s p(95)=1m5s
// iterations.....................: 80 0.717696/s
// vus............................: 1 min=1 max=80
// vus_max........................: 80 min=80 max=80
// NETWORK
// data_received..................: 543 kB 4.9 kB/s
// data_sent......................: 230 kB 2.1 kB/s
// running (01m51.5s), 00/80 VUs, 80 complete and 0 interrupted iterations
// my_spike_test β [======================================] 80 VUs 01m51.5s/10m0s 80/80 iters, 1 per VU
// GEMINI (conservative) - T4 small - 80VUs
// β TOTAL RESULTS
// checks_total.......: 240 3.343352/s
// checks_succeeded...: 100.00% 240 out of 240
// checks_failed......: 0.00% 0 out of 240
// β status is 200
// HTTP
// http_req_duration..............: avg=2.46s min=672.91ms med=2.1s max=6.26s p(90)=4.25s p(95)=5.02s
// { expected_response:true }...: avg=2.46s min=672.91ms med=2.1s max=6.26s p(90)=4.25s p(95)=5.02s
// http_req_failed................: 0.00% 0 out of 240
// http_reqs......................: 240 3.343352/s
// EXECUTION
// iteration_duration.............: avg=1m2s min=51.14s med=1m2s max=1m11s p(90)=1m9s p(95)=1m10s
// iterations.....................: 80 1.114451/s
// vus............................: 4 min=4 max=80
// vus_max........................: 80 min=80 max=80
// NETWORK
// data_received..................: 935 kB 13 kB/s
// data_sent......................: 236 kB 3.3 kB/s
// running (01m11.8s), 00/80 VUs, 80 complete and 0 interrupted iterations
// my_spike_test β [======================================] 80 VUs 01m11.8s/10m0s 80/80 iters, 1 per VU
// GPT-5-mini - T4 small - 80VUs
// β TOTAL RESULTS
// checks_total.......: 240 1.971286/s
// checks_succeeded...: 100.00% 240 out of 240
// checks_failed......: 0.00% 0 out of 240
// β status is 200
// HTTP
// http_req_duration..............: avg=20.44s min=9.48s med=20.02s max=50.35s p(90)=27.07s p(95)=29.45s
// { expected_response:true }...: avg=20.44s min=9.48s med=20.02s max=50.35s p(90)=27.07s p(95)=29.45s
// http_req_failed................: 0.00% 0 out of 240
// http_reqs......................: 240 1.971286/s
// EXECUTION
// iteration_duration.............: avg=1m17s min=56.02s med=1m16s max=2m1s p(90)=1m31s p(95)=1m34s
// iterations.....................: 80 0.657095/s
// vus............................: 1 min=1 max=80
// vus_max........................: 80 min=80 max=80
// NETWORK
// data_received..................: 4.9 MB 41 kB/s
// data_sent......................: 233 kB 1.9 kB/s
// running (02m01.7s), 00/80 VUs, 80 complete and 0 interrupted iterations
// my_spike_test β [======================================] 80 VUs 02m01.7s/10m0s 80/80 iters, 1 per VU |