champ-chatbot

Paused

File size: 9,154 Bytes

// This file aims to test a scenario where 80 users send 3 messages to ONE model (pessimistic).

import http from 'k6/http';
import { sleep, check } from 'k6';
import { SharedArray } from 'k6/data';

const message_examples = new SharedArray('chat messages', function () {
  const f = open('./message_examples.txt');
  // .split('\n') creates an array where each line is one element
  // .filter removes empty lines at the end of the file
  return f.split('\n').map(line => line.trim()).filter(line => line.length > 0);
});

export const options = {
  scenarios: {
    my_spike_test: {
      executor: 'per-vu-iterations',
      vus: 80,              // 80 total users
      iterations: 1,        // Each user runs the function exactly once.
    },
  },
};

export default function () {
  // Each VU must wait a random time period to prevent them from
  // sending their messages at the exact same time.
  sleep(Math.random() * 10);

  const url = __ENV.URL

  // Each VU sends 3 messages
  for (let i = 0; i < 3; i++) {
    const messageIndex = ((__VU - 1) * 3 + i) % message_examples.length;
    const userMessage = message_examples[messageIndex];

    const payload = {
      user_id: `VU${__VU}`,
      session_id: `VU${__VU}`,
      conversation_id: `VU${__VU}`,
      human_message: userMessage,
      model_type: __ENV.MODEL_TYPE,
      consent: true,
      age_group: "0-18",
      gender: "M",
      roles: ["other"],
      participant_id: `VU${__VU}`,
      lang: "en"
    };
    const params = { headers: { 'Content-Type': 'application/json' } };

    const res = http.post(url + "/chat", JSON.stringify(payload), params);

    check(res, {'status is 200': (r) => r.status === 200})

    let reply = '';
    if (res.status === 200) {
      // k6 does not support streaming response bodies. It waits for the entire response until
      // the stream is 'done'. Therefore, we do not need to read the chunks one by one.
      let data = "";
      try {
        data = res.json();
      } catch (error) {
        // However, if the response contains streamed data, it is not in JSON format.
        // We would have to read the body to access that data.
        data = res.body;
      }
      reply = data.reply || 'no_reply';
    } else {
      console.error(res.status);
      console.error(res.body);
    }

    // Simulating reading time, thinking time and writing time.
    // Simulate reading speed: ~200ms per word in the reply + 2s thinking time
    const readingTime = (reply.split(' ').length * 0.2) + 2;
    // Cap it so it doesn't wait forever, but add some randomness (jitter)
    const finalSleep = Math.min(readingTime, 15) + (Math.random() * 3);
    
    sleep(finalSleep);  
  }
}

// TEST RESULT ANALYSIS

// CHAMP
// The bottleneck associated with CHAMP's performance is the HuggingFace Space hardware.
// CHAMP actually requires a lot of computing power, because of the FAISS retrieval system.
// Using a T4 GPU for accelerated retrieval has significantly reduced the average request duration, from 11.75s
// to 2.04s. On the basic free tier, the CPU utilization stays at 99-100%.
// The performance of CHAMP could be improved by selecting a more powerful GPU (such as L4 or A10G)
// or by running a simpler vector search algorithm. The current algorithm (maximal marginal relevance)
// optimizes for similarity to query and diversity among selected documents. A basic similarity search
// should be faster to run. However, 2.04s is an acceptable performance.

// Google (Gemini)
// The average request duration is 2.46s with a maximum of 6.26s. This level of performance is acceptable.

// OpenAI (GPT-5-mini)
// Previous tests used GPT-5-nano. However, this model had strict rate limits and tests would exceed them
// which caused them to fail. The model was switched to GPT-5-mini.
// The performance is quite poor, with an average request duration of 20.44s and a maximum duration of 50.35s.
// The bottleneck lies within the OpenAI API, which leaves us with little room for optimization.


// RAW TEST RESULTS

// CHAMP (GROK) - BASIC CPU - 80VUs
//   █ TOTAL RESULTS

//     checks_total.......: 240     2.409599/s
//     checks_succeeded...: 100.00% 240 out of 240
//     checks_failed......: 0.00%   0 out of 240

//     ✓ status is 200

//     HTTP
//     http_req_duration..............: avg=11.75s min=74.68ms med=11.66s max=20.75s p(90)=15.8s p(95)=17.43s
//       { expected_response:true }...: avg=11.75s min=74.68ms med=11.66s max=20.75s p(90)=15.8s p(95)=17.43s
//     http_req_failed................: 0.00%  0 out of 240
//     http_reqs......................: 240    2.409599/s

//     EXECUTION
//     iteration_duration.............: avg=1m25s  min=1m0s    med=1m26s  max=1m39s  p(90)=1m37s p(95)=1m38s
//     iterations.....................: 80     0.8032/s
//     vus............................: 2      min=2        max=80
//     vus_max........................: 80     min=80       max=80

//     NETWORK
//     data_received..................: 546 kB 5.5 kB/s
//     data_sent......................: 230 kB 2.3 kB/s

// running (01m39.6s), 00/80 VUs, 80 complete and 0 interrupted iterations
// my_spike_test ✓ [======================================] 80 VUs  01m39.6s/10m0s  80/80 iters, 1 per VU



// CHAMP (GROK) - T4 small - 80VUs
//   █ TOTAL RESULTS

//     checks_total.......: 240    2.153089/s
//     checks_succeeded...: 99.58% 239 out of 240
//     checks_failed......: 0.41%  1 out of 240

//     ✗ status is 200
//       ↳  99% — ✓ 239 / ✗ 1

//     HTTP
//     http_req_duration..............: avg=2.28s  min=448.72ms med=1.94s  max=1m0s  p(90)=3.54s p(95)=3.87s
//       { expected_response:true }...: avg=2.04s  min=448.72ms med=1.94s  max=5.01s p(90)=3.5s  p(95)=3.86s
//     http_req_failed................: 0.41%  1 out of 240
//     http_reqs......................: 240    2.153089/s

//     EXECUTION
//     iteration_duration.............: avg=55.82s min=32.56s   med=56.91s max=1m51s p(90)=1m4s  p(95)=1m5s
//     iterations.....................: 80     0.717696/s
//     vus............................: 1      min=1        max=80
//     vus_max........................: 80     min=80       max=80

//     NETWORK
//     data_received..................: 543 kB 4.9 kB/s
//     data_sent......................: 230 kB 2.1 kB/s

// running (01m51.5s), 00/80 VUs, 80 complete and 0 interrupted iterations
// my_spike_test ✓ [======================================] 80 VUs  01m51.5s/10m0s  80/80 iters, 1 per VU



// GEMINI (conservative) - T4 small - 80VUs
//   █ TOTAL RESULTS

//     checks_total.......: 240     3.343352/s
//     checks_succeeded...: 100.00% 240 out of 240
//     checks_failed......: 0.00%   0 out of 240

//     ✓ status is 200

//     HTTP
//     http_req_duration..............: avg=2.46s min=672.91ms med=2.1s max=6.26s p(90)=4.25s p(95)=5.02s
//       { expected_response:true }...: avg=2.46s min=672.91ms med=2.1s max=6.26s p(90)=4.25s p(95)=5.02s
//     http_req_failed................: 0.00%  0 out of 240
//     http_reqs......................: 240    3.343352/s

//     EXECUTION
//     iteration_duration.............: avg=1m2s  min=51.14s   med=1m2s max=1m11s p(90)=1m9s  p(95)=1m10s
//     iterations.....................: 80     1.114451/s
//     vus............................: 4      min=4        max=80
//     vus_max........................: 80     min=80       max=80

//     NETWORK
//     data_received..................: 935 kB 13 kB/s
//     data_sent......................: 236 kB 3.3 kB/s

// running (01m11.8s), 00/80 VUs, 80 complete and 0 interrupted iterations
// my_spike_test ✓ [======================================] 80 VUs  01m11.8s/10m0s  80/80 iters, 1 per VU



// GPT-5-mini - T4 small - 80VUs
//   █ TOTAL RESULTS

//     checks_total.......: 240     1.971286/s
//     checks_succeeded...: 100.00% 240 out of 240
//     checks_failed......: 0.00%   0 out of 240

//     ✓ status is 200

//     HTTP
//     http_req_duration..............: avg=20.44s min=9.48s  med=20.02s max=50.35s p(90)=27.07s p(95)=29.45s
//       { expected_response:true }...: avg=20.44s min=9.48s  med=20.02s max=50.35s p(90)=27.07s p(95)=29.45s
//     http_req_failed................: 0.00%  0 out of 240
//     http_reqs......................: 240    1.971286/s

//     EXECUTION
//     iteration_duration.............: avg=1m17s  min=56.02s med=1m16s  max=2m1s   p(90)=1m31s  p(95)=1m34s
//     iterations.....................: 80     0.657095/s
//     vus............................: 1      min=1        max=80
//     vus_max........................: 80     min=80       max=80

//     NETWORK
//     data_received..................: 4.9 MB 41 kB/s
//     data_sent......................: 233 kB 1.9 kB/s

// running (02m01.7s), 00/80 VUs, 80 complete and 0 interrupted iterations
// my_spike_test ✓ [======================================] 80 VUs  02m01.7s/10m0s  80/80 iters, 1 per VU