// This file aims to test a scenario where 80 users send 3 messages to ONE model (pessimistic). import http from 'k6/http'; import { sleep, check } from 'k6'; import { SharedArray } from 'k6/data'; const message_examples = new SharedArray('chat messages', function () { const f = open('./message_examples.txt'); // .split('\n') creates an array where each line is one element // .filter removes empty lines at the end of the file return f.split('\n').map(line => line.trim()).filter(line => line.length > 0); }); export const options = { scenarios: { my_spike_test: { executor: 'per-vu-iterations', vus: 80, // 80 total users iterations: 1, // Each user runs the function exactly once. }, }, }; export default function () { // Each VU must wait a random time period to prevent them from // sending their messages at the exact same time. sleep(Math.random() * 10); const url = __ENV.URL // Each VU sends 3 messages for (let i = 0; i < 3; i++) { const messageIndex = ((__VU - 1) * 3 + i) % message_examples.length; const userMessage = message_examples[messageIndex]; const payload = { user_id: `VU${__VU}`, session_id: `VU${__VU}`, conversation_id: `VU${__VU}`, human_message: userMessage, model_type: __ENV.MODEL_TYPE, consent: true, age_group: "0-18", gender: "M", roles: ["other"], participant_id: `VU${__VU}`, lang: "en" }; const params = { headers: { 'Content-Type': 'application/json' } }; const res = http.post(url + "/chat", JSON.stringify(payload), params); check(res, {'status is 200': (r) => r.status === 200}) let reply = ''; if (res.status === 200) { // k6 does not support streaming response bodies. It waits for the entire response until // the stream is 'done'. Therefore, we do not need to read the chunks one by one. let data = ""; try { data = res.json(); } catch (error) { // However, if the response contains streamed data, it is not in JSON format. // We would have to read the body to access that data. data = res.body; } reply = data.reply || 'no_reply'; } else { console.error(res.status); console.error(res.body); } // Simulating reading time, thinking time and writing time. // Simulate reading speed: ~200ms per word in the reply + 2s thinking time const readingTime = (reply.split(' ').length * 0.2) + 2; // Cap it so it doesn't wait forever, but add some randomness (jitter) const finalSleep = Math.min(readingTime, 15) + (Math.random() * 3); sleep(finalSleep); } } // TEST RESULT ANALYSIS // CHAMP // The bottleneck associated with CHAMP's performance is the HuggingFace Space hardware. // CHAMP actually requires a lot of computing power, because of the FAISS retrieval system. // Using a T4 GPU for accelerated retrieval has significantly reduced the average request duration, from 11.75s // to 2.04s. On the basic free tier, the CPU utilization stays at 99-100%. // The performance of CHAMP could be improved by selecting a more powerful GPU (such as L4 or A10G) // or by running a simpler vector search algorithm. The current algorithm (maximal marginal relevance) // optimizes for similarity to query and diversity among selected documents. A basic similarity search // should be faster to run. However, 2.04s is an acceptable performance. // Google (Gemini) // The average request duration is 2.46s with a maximum of 6.26s. This level of performance is acceptable. // OpenAI (GPT-5-mini) // Previous tests used GPT-5-nano. However, this model had strict rate limits and tests would exceed them // which caused them to fail. The model was switched to GPT-5-mini. // The performance is quite poor, with an average request duration of 20.44s and a maximum duration of 50.35s. // The bottleneck lies within the OpenAI API, which leaves us with little room for optimization. // RAW TEST RESULTS // CHAMP (GROK) - BASIC CPU - 80VUs // █ TOTAL RESULTS // checks_total.......: 240 2.409599/s // checks_succeeded...: 100.00% 240 out of 240 // checks_failed......: 0.00% 0 out of 240 // ✓ status is 200 // HTTP // http_req_duration..............: avg=11.75s min=74.68ms med=11.66s max=20.75s p(90)=15.8s p(95)=17.43s // { expected_response:true }...: avg=11.75s min=74.68ms med=11.66s max=20.75s p(90)=15.8s p(95)=17.43s // http_req_failed................: 0.00% 0 out of 240 // http_reqs......................: 240 2.409599/s // EXECUTION // iteration_duration.............: avg=1m25s min=1m0s med=1m26s max=1m39s p(90)=1m37s p(95)=1m38s // iterations.....................: 80 0.8032/s // vus............................: 2 min=2 max=80 // vus_max........................: 80 min=80 max=80 // NETWORK // data_received..................: 546 kB 5.5 kB/s // data_sent......................: 230 kB 2.3 kB/s // running (01m39.6s), 00/80 VUs, 80 complete and 0 interrupted iterations // my_spike_test ✓ [======================================] 80 VUs 01m39.6s/10m0s 80/80 iters, 1 per VU // CHAMP (GROK) - T4 small - 80VUs // █ TOTAL RESULTS // checks_total.......: 240 2.153089/s // checks_succeeded...: 99.58% 239 out of 240 // checks_failed......: 0.41% 1 out of 240 // ✗ status is 200 // ↳ 99% — ✓ 239 / ✗ 1 // HTTP // http_req_duration..............: avg=2.28s min=448.72ms med=1.94s max=1m0s p(90)=3.54s p(95)=3.87s // { expected_response:true }...: avg=2.04s min=448.72ms med=1.94s max=5.01s p(90)=3.5s p(95)=3.86s // http_req_failed................: 0.41% 1 out of 240 // http_reqs......................: 240 2.153089/s // EXECUTION // iteration_duration.............: avg=55.82s min=32.56s med=56.91s max=1m51s p(90)=1m4s p(95)=1m5s // iterations.....................: 80 0.717696/s // vus............................: 1 min=1 max=80 // vus_max........................: 80 min=80 max=80 // NETWORK // data_received..................: 543 kB 4.9 kB/s // data_sent......................: 230 kB 2.1 kB/s // running (01m51.5s), 00/80 VUs, 80 complete and 0 interrupted iterations // my_spike_test ✓ [======================================] 80 VUs 01m51.5s/10m0s 80/80 iters, 1 per VU // GEMINI (conservative) - T4 small - 80VUs // █ TOTAL RESULTS // checks_total.......: 240 3.343352/s // checks_succeeded...: 100.00% 240 out of 240 // checks_failed......: 0.00% 0 out of 240 // ✓ status is 200 // HTTP // http_req_duration..............: avg=2.46s min=672.91ms med=2.1s max=6.26s p(90)=4.25s p(95)=5.02s // { expected_response:true }...: avg=2.46s min=672.91ms med=2.1s max=6.26s p(90)=4.25s p(95)=5.02s // http_req_failed................: 0.00% 0 out of 240 // http_reqs......................: 240 3.343352/s // EXECUTION // iteration_duration.............: avg=1m2s min=51.14s med=1m2s max=1m11s p(90)=1m9s p(95)=1m10s // iterations.....................: 80 1.114451/s // vus............................: 4 min=4 max=80 // vus_max........................: 80 min=80 max=80 // NETWORK // data_received..................: 935 kB 13 kB/s // data_sent......................: 236 kB 3.3 kB/s // running (01m11.8s), 00/80 VUs, 80 complete and 0 interrupted iterations // my_spike_test ✓ [======================================] 80 VUs 01m11.8s/10m0s 80/80 iters, 1 per VU // GPT-5-mini - T4 small - 80VUs // █ TOTAL RESULTS // checks_total.......: 240 1.971286/s // checks_succeeded...: 100.00% 240 out of 240 // checks_failed......: 0.00% 0 out of 240 // ✓ status is 200 // HTTP // http_req_duration..............: avg=20.44s min=9.48s med=20.02s max=50.35s p(90)=27.07s p(95)=29.45s // { expected_response:true }...: avg=20.44s min=9.48s med=20.02s max=50.35s p(90)=27.07s p(95)=29.45s // http_req_failed................: 0.00% 0 out of 240 // http_reqs......................: 240 1.971286/s // EXECUTION // iteration_duration.............: avg=1m17s min=56.02s med=1m16s max=2m1s p(90)=1m31s p(95)=1m34s // iterations.....................: 80 0.657095/s // vus............................: 1 min=1 max=80 // vus_max........................: 80 min=80 max=80 // NETWORK // data_received..................: 4.9 MB 41 kB/s // data_sent......................: 233 kB 1.9 kB/s // running (02m01.7s), 00/80 VUs, 80 complete and 0 interrupted iterations // my_spike_test ✓ [======================================] 80 VUs 02m01.7s/10m0s 80/80 iters, 1 per VU