champ-chatbot

Paused

App Files Files Community

qyle commited on Feb 18

Commit

944b1b5

verified ·

1 Parent(s): 044875d

pii removal file upload

Browse files

Files changed (8) hide show

.gitignore +2 -1
champ/agent.py +23 -19
load_conversations.ipynb +322 -0
main.py +33 -24
requirements.txt +5 -2
telemetry.py +48 -0
tests/stress_tests/chat_session.js +8 -3
tests/stress_tests/file_uploads.js +79 -0

.gitignore CHANGED Viewed

@@ -3,4 +3,5 @@ __pycache__/
 .venv/
 venv/
 .env
-.venv*/

 .venv/
 venv/
 .env
+.venv*/
+conversations.json

champ/agent.py CHANGED Viewed

@@ -6,12 +6,16 @@ from langchain.agents.middleware import dynamic_prompt, ModelRequest
 from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
 from langchain_community.vectorstores import FAISS as LCFAISS
 from classes.prompt_sanitizer import PromptSanitizer
 # from classes.guardrail_manager import GuardrailManager
 from .prompts import CHAMP_SYSTEM_PROMPT_V4
 def _build_retrieval_query(messages) -> str:
     user_turns = []
@@ -33,17 +37,18 @@ def make_prompt_with_context(
 ):
     @dynamic_prompt
     def prompt_with_context(request: ModelRequest) -> str:
-        retrieval_query = _build_retrieval_query(request.state["messages"])
-        fetch_k = 20
-        try:
-            retrieved_docs = vector_store.max_marginal_relevance_search(
-                retrieval_query,
-                k=k,
-                fetch_k=fetch_k,
-                lambda_mult=0.5,  # 0.0 = diverse, 1.0 = similar; 0.3–0.7 is typical
-            )
-        except Exception:
-            retrieved_docs = vector_store.similarity_search(retrieval_query, k=k)
         seen = set()
         unique_docs = []
@@ -56,19 +61,18 @@ def make_prompt_with_context(
         docs_content = "\n\n".join(doc.page_content for doc in unique_docs)
-        # guardrails = GuardrailManager(is_champ=True)
-        # sanitized_docs = guardrails.sanitize(docs_content)
-        # sanitized_last_query = guardrails.sanitize(retrieval_query)
-        sanitizer = PromptSanitizer()
-        sanitized_docs_content = sanitizer.sanitize(docs_content)
-        sanitized_retrieval_query = sanitizer.sanitize(retrieval_query)
         language = "English" if lang == "en" else "French"
         return CHAMP_SYSTEM_PROMPT_V4.format(
             last_query=sanitized_retrieval_query,
-            context=sanitized_docs_content,
             language=language,
         )

 from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
 from langchain_community.vectorstores import FAISS as LCFAISS
+from opentelemetry import trace
 from classes.prompt_sanitizer import PromptSanitizer
 # from classes.guardrail_manager import GuardrailManager
 from .prompts import CHAMP_SYSTEM_PROMPT_V4
+tracer = trace.get_tracer(__name__)
 def _build_retrieval_query(messages) -> str:
     user_turns = []
 ):
     @dynamic_prompt
     def prompt_with_context(request: ModelRequest) -> str:
+        with tracer.start_as_current_span("retrieving documents"):
+            retrieval_query = _build_retrieval_query(request.state["messages"])
+            fetch_k = 20
+            try:
+                retrieved_docs = vector_store.max_marginal_relevance_search(
+                    retrieval_query,
+                    k=k,
+                    fetch_k=fetch_k,
+                    lambda_mult=0.5,  # 0.0 = diverse, 1.0 = similar; 0.3–0.7 is typical
+                )
+            except Exception:
+                retrieved_docs = vector_store.similarity_search(retrieval_query, k=k)
         seen = set()
         unique_docs = []
         docs_content = "\n\n".join(doc.page_content for doc in unique_docs)
+        # No need to sanitize the docs_content as the documents are sanitized
+        # when received at the file PUT endpoint.
+        with tracer.start_as_current_span("PromptSanitizer"):
+            sanitizer = PromptSanitizer()
+        with tracer.start_as_current_span("sanitize retrieval_query"):
+            sanitized_retrieval_query = sanitizer.sanitize(retrieval_query)
         language = "English" if lang == "en" else "French"
         return CHAMP_SYSTEM_PROMPT_V4.format(
             last_query=sanitized_retrieval_query,
+            context=docs_content,
             language=language,
         )

load_conversations.ipynb ADDED Viewed

	@@ -0,0 +1,322 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fc07d69e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import boto3\n",
+    "import json\n",
+    "import os\n",
+    "import pytz\n",
+    "\n",
+    "from boto3.dynamodb.conditions import Attr\n",
+    "from collections import defaultdict\n",
+    "from datetime import datetime\n",
+    "from decimal import Decimal\n",
+    "from dotenv import load_dotenv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aa2cda42",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "load_dotenv()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2a2214ff",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "AWS_REGION = os.getenv(\"AWS_REGION\", \"us-east-1\")\n",
+    "AWS_ACCESS_KEY = os.getenv(\"AWS_ACCESS_KEY\", None)\n",
+    "AWS_SECRET_ACCESS_KEY = os.getenv(\"AWS_SECRET_ACCESS_KEY\", None)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "290b04ff",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 1. Initialize the DynamoDB resource\n",
+    "dynamodb = boto3.resource(\n",
+    "    \"dynamodb\",\n",
+    "    region_name=AWS_REGION,\n",
+    "    aws_access_key_id=AWS_ACCESS_KEY,\n",
+    "    aws_secret_access_key=AWS_SECRET_ACCESS_KEY,\n",
+    ")\n",
+    "client = dynamodb.meta.client"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "81477e4d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "existing_tables = client.list_tables()[\"TableNames\"]\n",
+    "existing_tables"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a3426a87",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "table = dynamodb.Table(\"chatbot-conversations\")\n",
+    "table"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3111db17",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_all_grouped_sessions(date_string):\n",
+    "    grouped_data = defaultdict(list)\n",
+    "\n",
+    "    # Scan the entire table\n",
+    "    response = table.scan(FilterExpression=Attr(\"timestamp\").gte(date_string))\n",
+    "    items = response.get(\"Items\", [])\n",
+    "\n",
+    "    # Handle pagination if the table is large\n",
+    "    while \"LastEvaluatedKey\" in response:\n",
+    "        response = table.scan(\n",
+    "            ExclusiveStartKey=response[\"LastEvaluatedKey\"],\n",
+    "            FilterExpression=Attr(\"timestamp\").gte(date_string),\n",
+    "        )\n",
+    "        items.extend(response.get(\"Items\", []))\n",
+    "\n",
+    "    # Grouping logic\n",
+    "    for item in items:\n",
+    "        sid = item[\"session_id\"]\n",
+    "        grouped_data[sid].append(item)\n",
+    "\n",
+    "    return dict(grouped_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8b72d9ae",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "local_timezone = pytz.timezone(\"America/Montreal\")\n",
+    "\n",
+    "# Date of the demo\n",
+    "# We want to extract every conversation since that date\n",
+    "local_date = datetime(2026, 2, 10, 0, 0, 0)\n",
+    "\n",
+    "localized_date = local_timezone.localize(local_date)\n",
+    "\n",
+    "utc_date = localized_date.astimezone(pytz.utc)\n",
+    "\n",
+    "# We format the date for dynamodb\n",
+    "utc_date_dynamodb = utc_date.strftime(\"%Y-%m-%dT%H:%M:%SZ\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a5444c0f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sessions = get_all_grouped_sessions(utc_date_dynamodb)\n",
+    "sessions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "eb4bfda0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Helper to handle DynamoDB Decimal types when saving the conversations\n",
+    "def decimal_default(obj):\n",
+    "    if isinstance(obj, Decimal):\n",
+    "        return float(obj) if obj % 1 > 0 else int(obj)\n",
+    "    raise TypeError\n",
+    "\n",
+    "\n",
+    "with open(\"conversations.json\", \"w\", encoding=\"utf-8\") as f:\n",
+    "    json.dump(\n",
+    "        sessions,\n",
+    "        f,\n",
+    "        indent=4,  # Makes the file human-readable (pretty-print)\n",
+    "        ensure_ascii=True,  # Allows special characters (like emojis or accents)\n",
+    "        default=decimal_default,  # Uses our helper for Decimals\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a53d80a5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(\"conversations.json\", \"r\", encoding=\"utf-8\") as f:\n",
+    "    conversations = json.load(f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "55cf17e5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "conversations"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b03b5f25",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def filter_grouped_sessions(grouped_data, target_timestamp):\n",
+    "    return {\n",
+    "        sid: [item for item in items if item[\"timestamp\"] >= target_timestamp]\n",
+    "        for sid, items in grouped_data.items()\n",
+    "        # Optional: remove the session_id if the resulting list is empty\n",
+    "        if any(item[\"timestamp\"] >= target_timestamp for item in items)\n",
+    "    }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "42f7cc13",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "local_date = datetime(2026, 2, 6, 14, 41, 0)\n",
+    "\n",
+    "localized_date = local_timezone.localize(local_date)\n",
+    "\n",
+    "utc_date = localized_date.astimezone(pytz.utc)\n",
+    "\n",
+    "# We format the date for dynamodb\n",
+    "utc_date_dynamodb = utc_date.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n",
+    "\n",
+    "filtered_conversations = filter_grouped_sessions(conversations, utc_date_dynamodb)\n",
+    "filtered_conversations"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0b229170",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "problematic_vus = []\n",
+    "for vu, conv in filtered_conversations.items():\n",
+    "    if any(message[\"data\"][\"reply\"] == \"\" for message in conv):\n",
+    "        problematic_vus.append(vu)\n",
+    "\n",
+    "print(problematic_vus)\n",
+    "print(len(problematic_vus))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5e450ab2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "filtered_conversations[problematic_vus[7]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5ec5002b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for conv in filtered_conversations.values():\n",
+    "    if any(\"error\" in message for message in conv):\n",
+    "        print(conv)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3b20b4f2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "conversations[\"VU78\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "67fad2b2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def filter_comments(grouped_data):\n",
+    "    return {\n",
+    "        sid: filtered_items\n",
+    "        for sid, items in grouped_data.items()\n",
+    "        if (filtered_items := [item for item in items if \"comment\" in item[\"data\"]])\n",
+    "    }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4fae7474",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "comments = filter_comments(conversations)\n",
+    "comments"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv (3.13.11)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

main.py CHANGED Viewed

@@ -11,11 +11,12 @@ from typing import AsyncGenerator, List, Literal, Tuple, Dict, Any
 from dotenv import load_dotenv
 from fastapi import FastAPI, File, Form, Request, BackgroundTasks, Response, UploadFile
-from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse
 from fastapi.staticfiles import StaticFiles
 from fastapi.templating import Jinja2Templates
 from champ.rag import (
     create_embedding_model,
     create_session_vector_store,
@@ -59,8 +60,6 @@ from champ.prompts import (
 )
 from champ.service import ChampService
-from pyinstrument import Profiler
 from helpers.file_helper import (
     extract_text_from_docx,
     extract_text_from_img,
@@ -70,6 +69,7 @@ from helpers.file_helper import (
     replace_spaces_in_filename,
 )
 from classes.session_document_store import SessionDocumentStore
 load_dotenv()
@@ -96,9 +96,6 @@ if GEMINI_API_KEY is None:
         "GEMINI_API_KEY is not set. "
         "Go to Space → Settings → Variables & secrets and add one."
     )
-ORIGIN = os.getenv("ORIGIN")
-if ORIGIN is None:
-    ORIGIN = "http://localhost:8000"
 openai_client = AsyncOpenAI(api_key=OPENAI_API_KEY) if OPENAI_API_KEY else None
 gemini_client = genai.Client(api_key=GEMINI_API_KEY) if GEMINI_API_KEY else None
@@ -209,30 +206,29 @@ def _call_gemini(model_id: str, msgs: list[dict], temperature: float) -> str:
 def call_llm(
     req: ChatRequest,
 ) -> AsyncGenerator[str, None] | Tuple[str, Dict[str, Any]]:
-    if DEV:
-        profiler = Profiler()
-        profiler.start()
     session_id = req.session_id
     if req.model_type == "champ":
         session_documents = session_document_store.get_documents(session_id)
-        vector_store = (
-            base_vector_store
-            if session_documents is None
-            else create_session_vector_store(
-                base_vector_store, embedding_model, session_documents
             )
-        )
-        champ = ChampService(vector_store=vector_store, lang=req.lang)
-        msgs = convert_messages_langchain(req.messages)
-        reply, triage_meta = champ.invoke(msgs)
-        if DEV:
-            profiler.stop()
-            profiler.print()
         return reply, triage_meta
@@ -279,6 +275,8 @@ async def lifespan(app: FastAPI):
 app = FastAPI(lifespan=lifespan)
 app.mount("/static", StaticFiles(directory="static"), name="static")
 templates = Jinja2Templates(directory="templates")
@@ -288,6 +286,9 @@ async def home(request: Request):
     return templates.TemplateResponse("index.html", {"request": request})
 @app.post("/chat")
 async def chat_endpoint(payload: ChatRequest, background_tasks: BackgroundTasks):
     if not payload.messages:
@@ -300,7 +301,8 @@ async def chat_endpoint(payload: ChatRequest, background_tasks: BackgroundTasks)
     try:
         loop = asyncio.get_running_loop()
-        result = await loop.run_in_executor(None, call_llm, payload)
         if isinstance(result, AsyncGenerator):
@@ -421,13 +423,16 @@ async def upload_file(
     _, extension = os.path.splitext(file_name)
     if extension not in SUPPORTED_FILE_EXTENSIONS:
         return Response(status_code=STATUS_CODE_UNSUPPORTED_MEDIA_TYPE)
     file_mime = file.headers.get("content-type")
     if file_mime is None:
         return Response(status_code=STATUS_CODE_UNSUPPORTED_MEDIA_TYPE)
     if file_mime not in SUPPORTED_FILE_TYPES:
         return Response(status_code=STATUS_CODE_UNSUPPORTED_MEDIA_TYPE)
     # Read in chunks to avoid RAM spikes
@@ -444,6 +449,7 @@ async def upload_file(
     file_mime = magic.from_buffer(file_content[:2048], mime=True)
     if file_mime not in SUPPORTED_FILE_TYPES:
         return Response(status_code=STATUS_CODE_UNSUPPORTED_MEDIA_TYPE)
     if file_mime == "application/pdf":
@@ -465,7 +471,10 @@ async def upload_file(
     if file_text is None:
         return Response(status_code=STATUS_CODE_INTERNAL_SERVER_ERROR)
-    session_document_store.create_document(session_id, file_text, file_name)
     session_tracker.add_session(session_id)
     # Should the logging event be coupled to the LLM call instead of the API call?

 from dotenv import load_dotenv
 from fastapi import FastAPI, File, Form, Request, BackgroundTasks, Response, UploadFile
 from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse
 from fastapi.staticfiles import StaticFiles
 from fastapi.templating import Jinja2Templates
+from opentelemetry import trace
 from champ.rag import (
     create_embedding_model,
     create_session_vector_store,
 )
 from champ.service import ChampService
 from helpers.file_helper import (
     extract_text_from_docx,
     extract_text_from_img,
     replace_spaces_in_filename,
 )
 from classes.session_document_store import SessionDocumentStore
+from telemetry import setup_telemetry
 load_dotenv()
         "GEMINI_API_KEY is not set. "
         "Go to Space → Settings → Variables & secrets and add one."
     )
 openai_client = AsyncOpenAI(api_key=OPENAI_API_KEY) if OPENAI_API_KEY else None
 gemini_client = genai.Client(api_key=GEMINI_API_KEY) if GEMINI_API_KEY else None
 def call_llm(
     req: ChatRequest,
 ) -> AsyncGenerator[str, None] | Tuple[str, Dict[str, Any]]:
     session_id = req.session_id
+    tracer = trace.get_tracer(__name__)
     if req.model_type == "champ":
         session_documents = session_document_store.get_documents(session_id)
+        with tracer.start_as_current_span("vector_store"):
+            vector_store = (
+                base_vector_store
+                if session_documents is None
+                else create_session_vector_store(
+                    base_vector_store, embedding_model, session_documents
+                )
             )
+        with tracer.start_as_current_span("ChampService"):
+            champ = ChampService(vector_store=vector_store, lang=req.lang)
+        with tracer.start_as_current_span("convert_messages_langchain"):
+            msgs = convert_messages_langchain(req.messages)
+        with tracer.start_as_current_span("invoke"):
+            reply, triage_meta = champ.invoke(msgs)
         return reply, triage_meta
 app = FastAPI(lifespan=lifespan)
+setup_telemetry(app)
 app.mount("/static", StaticFiles(directory="static"), name="static")
 templates = Jinja2Templates(directory="templates")
     return templates.TemplateResponse("index.html", {"request": request})
+tracer = trace.get_tracer(__name__)
 @app.post("/chat")
 async def chat_endpoint(payload: ChatRequest, background_tasks: BackgroundTasks):
     if not payload.messages:
     try:
         loop = asyncio.get_running_loop()
+        with tracer.start_as_current_span("call_llm"):
+            result = await loop.run_in_executor(None, call_llm, payload)
         if isinstance(result, AsyncGenerator):
     _, extension = os.path.splitext(file_name)
     if extension not in SUPPORTED_FILE_EXTENSIONS:
+        print("Unsupported extension")
         return Response(status_code=STATUS_CODE_UNSUPPORTED_MEDIA_TYPE)
     file_mime = file.headers.get("content-type")
     if file_mime is None:
+        print("None content-type")
         return Response(status_code=STATUS_CODE_UNSUPPORTED_MEDIA_TYPE)
     if file_mime not in SUPPORTED_FILE_TYPES:
+        print(f"Unsupported file_mime: {file_mime}")
         return Response(status_code=STATUS_CODE_UNSUPPORTED_MEDIA_TYPE)
     # Read in chunks to avoid RAM spikes
     file_mime = magic.from_buffer(file_content[:2048], mime=True)
     if file_mime not in SUPPORTED_FILE_TYPES:
+        print("magic file_mime unsupported")
         return Response(status_code=STATUS_CODE_UNSUPPORTED_MEDIA_TYPE)
     if file_mime == "application/pdf":
     if file_text is None:
         return Response(status_code=STATUS_CODE_INTERNAL_SERVER_ERROR)
+    sanitizer = PromptSanitizer()
+    sanitized_file_text = sanitizer.sanitize(file_text)
+    session_document_store.create_document(session_id, sanitized_file_text, file_name)
     session_tracker.add_session(session_id)
     # Should the logging event be coupled to the LLM call instead of the API call?

requirements.txt CHANGED Viewed

@@ -127,7 +127,6 @@ xxhash==3.6.0
 yarl==1.22.0
 zstandard==0.25.0
 pytz==2025.2
-pyinstrument==5.1.2
 pymupdf==1.27.1
 python-docx==1.2.0
 nh3==0.3.2
@@ -137,4 +136,8 @@ easyocr==1.7.2
 langdetect==1.0.9
 spacy==3.8.11
 presidio_analyzer==2.2.361
-presidio_anonymizer==2.2.361

 yarl==1.22.0
 zstandard==0.25.0
 pytz==2025.2
 pymupdf==1.27.1
 python-docx==1.2.0
 nh3==0.3.2
 langdetect==1.0.9
 spacy==3.8.11
 presidio_analyzer==2.2.361
+presidio_anonymizer==2.2.361
+opentelemetry-api==1.39.1
+opentelemetry-sdk==1.39.1
+opentelemetry-instrumentation-fastapi==0.60b1
+opentelemetry-instrumentation-httpx==0.60b1

telemetry.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import os
+from opentelemetry import trace
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter
+from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
+from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
+from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
+class FilteredConsoleExporter(SpanExporter):
+    WATCHED_SPANS = {
+        "call_llm",
+        "vector_store",
+        "ChampService",
+        "convert_messages_langchain",
+        "invoke",
+        "retrieving documents",
+        "PromptSanitizer",
+        "sanitize docs_content",
+        "sanitize retrieval_query",
+    }
+    def export(self, spans):
+        for span in spans:
+            if (
+                span.name in self.WATCHED_SPANS
+                and span.end_time is not None
+                and span.start_time is not None
+            ):
+                print(
+                    f"[{span.name}] duration: {(span.end_time - span.start_time) / 1e6:.2f}ms"
+                )
+        return SpanExportResult.SUCCESS
+    def shutdown(self):
+        pass
+def setup_telemetry(app):
+    if os.getenv("ENV", "dev").lower() != "dev":
+        return
+    provider = TracerProvider()
+    provider.add_span_processor(BatchSpanProcessor(FilteredConsoleExporter()))
+    trace.set_tracer_provider(provider)
+    FastAPIInstrumentor.instrument_app(app)
+    HTTPXClientInstrumentor().instrument()

tests/stress_tests/chat_session.js CHANGED Viewed

@@ -38,10 +38,15 @@ export default function () {
     const payload = {
       user_id: `VU${__VU}`,
       session_id: `VU${__VU}`,
       messages: conversation,
-      temperature: 0.2,
       model_type: __ENV.MODEL_TYPE,
       consent: true,
     };
     const params = { headers: { 'Content-Type': 'application/json' } };
@@ -61,12 +66,12 @@ export default function () {
         // We would have to read the body to access that data.
         data = res.body;
       }
-      reply = data.reply || '(No reply)';
       conversation.push({ role: 'assistant', content: reply });
     } else {
       console.error(res.status);
       console.error(res.body);
-      conversation.push({ role: 'assistant', content: '(No reply)' });
     }
     // Simulating reading time, thinking time and writing time.

     const payload = {
       user_id: `VU${__VU}`,
       session_id: `VU${__VU}`,
+      conversation_id: `VU${__VU}`,
       messages: conversation,
       model_type: __ENV.MODEL_TYPE,
       consent: true,
+      age_group: "0-18",
+      gender: "M",
+      roles: ["other"],
+      participant_id: `VU${__VU}`,
+      lang: "en"
     };
     const params = { headers: { 'Content-Type': 'application/json' } };
         // We would have to read the body to access that data.
         data = res.body;
       }
+      reply = data.reply || 'no_reply';
       conversation.push({ role: 'assistant', content: reply });
     } else {
       console.error(res.status);
       console.error(res.body);
+      conversation.push({ role: 'assistant', content: 'no_reply' });
     }
     // Simulating reading time, thinking time and writing time.

tests/stress_tests/file_uploads.js ADDED Viewed

	@@ -0,0 +1,79 @@

+import http from 'k6/http';
+import { sleep, check } from 'k6';
+// 1. Open the file as binary outside the default function
+const testFile = open('./small_file.pdf', 'b');
+export const options = {
+  scenarios: {
+    my_spike_test: {
+      executor: 'per-vu-iterations',
+      vus: 80,
+      iterations: 1,
+    },
+  },
+};
+export default function () {
+  sleep(Math.random() * 10);
+  const url = __ENV.URL;
+  for (let i = 0; i < 3; i++) {
+    // 2. Construct Multipart Form Data
+    // We wrap the file and the JSON fields into a single data object
+    const data = {
+      file: http.file(testFile, 'small_file.pdf', 'application/pdf'), // The file
+      session_id: `VU${__VU}_${i}`,
+    };
+    // 3. Remove 'Content-Type': 'application/json'
+    // k6 will automatically set the correct 'multipart/form-data' header with a boundary
+    const res = http.put(url, data);
+    check(res, { 'status is 200': (r) => r.status === 200 });
+  }
+}
+// TEST RESULT ANALYSIS
+// The average HTTP request duration varies greatly with the size of the uploaded file.
+// The system supports very easily small uploaded files (15 KB for example). For that
+// scenario, the average HTTP request duration was about 40ms
+// However, the system cannot support 80 users sending 3 large files (8.5 MB). The
+// requests simply timeout.
+// SMALL FILES
+//   █ TOTAL RESULTS
+//     checks_total.......: 240     14.904509/s
+//     checks_succeeded...: 100.00% 240 out of 240
+//     checks_failed......: 0.00%   0 out of 240
+//     ✓ status is 200
+//     HTTP
+//     http_req_duration..............: avg=43.66ms min=21.72ms med=36.84ms max=304.38ms p(90)=74.06ms p(95)=79.16ms
+//       { expected_response:true }...: avg=43.66ms min=21.72ms med=36.84ms max=304.38ms p(90)=74.06ms p(95)=79.16ms
+//     http_req_failed................: 0.00%  0 out of 240
+//     http_reqs......................: 240    14.904509/s
+//     EXECUTION
+//     iteration_duration.............: avg=11.33s  min=6.22s   med=11.38s  max=16.1s    p(90)=15.18s  p(95)=15.8s
+//     iterations.....................: 80     4.96817/s
+//     vus............................: 3      min=3        max=80
+//     vus_max........................: 80     min=80       max=80
+//     NETWORK
+//     data_received..................: 446 kB 28 kB/s
+//     data_sent......................: 3.8 MB 234 kB/s
+// running (00m16.1s), 00/80 VUs, 80 complete and 0 interrupted iterations
+// my_spike_test ✓ [======================================] 80 VUs  00m16.1s/10m0s  80/80 iters, 1 per VU
+// LARGE FILES
+// A list of error messages (request timeout) appears.