qyle commited on
Commit
944b1b5
·
verified ·
1 Parent(s): 044875d

pii removal file upload

Browse files
.gitignore CHANGED
@@ -3,4 +3,5 @@ __pycache__/
3
  .venv/
4
  venv/
5
  .env
6
- .venv*/
 
 
3
  .venv/
4
  venv/
5
  .env
6
+ .venv*/
7
+ conversations.json
champ/agent.py CHANGED
@@ -6,12 +6,16 @@ from langchain.agents.middleware import dynamic_prompt, ModelRequest
6
  from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
7
  from langchain_community.vectorstores import FAISS as LCFAISS
8
 
 
 
9
  from classes.prompt_sanitizer import PromptSanitizer
10
 
11
  # from classes.guardrail_manager import GuardrailManager
12
 
13
  from .prompts import CHAMP_SYSTEM_PROMPT_V4
14
 
 
 
15
 
16
  def _build_retrieval_query(messages) -> str:
17
  user_turns = []
@@ -33,17 +37,18 @@ def make_prompt_with_context(
33
  ):
34
  @dynamic_prompt
35
  def prompt_with_context(request: ModelRequest) -> str:
36
- retrieval_query = _build_retrieval_query(request.state["messages"])
37
- fetch_k = 20
38
- try:
39
- retrieved_docs = vector_store.max_marginal_relevance_search(
40
- retrieval_query,
41
- k=k,
42
- fetch_k=fetch_k,
43
- lambda_mult=0.5, # 0.0 = diverse, 1.0 = similar; 0.3–0.7 is typical
44
- )
45
- except Exception:
46
- retrieved_docs = vector_store.similarity_search(retrieval_query, k=k)
 
47
 
48
  seen = set()
49
  unique_docs = []
@@ -56,19 +61,18 @@ def make_prompt_with_context(
56
 
57
  docs_content = "\n\n".join(doc.page_content for doc in unique_docs)
58
 
59
- # guardrails = GuardrailManager(is_champ=True)
60
- # sanitized_docs = guardrails.sanitize(docs_content)
61
- # sanitized_last_query = guardrails.sanitize(retrieval_query)
62
-
63
- sanitizer = PromptSanitizer()
64
- sanitized_docs_content = sanitizer.sanitize(docs_content)
65
- sanitized_retrieval_query = sanitizer.sanitize(retrieval_query)
66
 
67
  language = "English" if lang == "en" else "French"
68
 
69
  return CHAMP_SYSTEM_PROMPT_V4.format(
70
  last_query=sanitized_retrieval_query,
71
- context=sanitized_docs_content,
72
  language=language,
73
  )
74
 
 
6
  from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
7
  from langchain_community.vectorstores import FAISS as LCFAISS
8
 
9
+ from opentelemetry import trace
10
+
11
  from classes.prompt_sanitizer import PromptSanitizer
12
 
13
  # from classes.guardrail_manager import GuardrailManager
14
 
15
  from .prompts import CHAMP_SYSTEM_PROMPT_V4
16
 
17
+ tracer = trace.get_tracer(__name__)
18
+
19
 
20
  def _build_retrieval_query(messages) -> str:
21
  user_turns = []
 
37
  ):
38
  @dynamic_prompt
39
  def prompt_with_context(request: ModelRequest) -> str:
40
+ with tracer.start_as_current_span("retrieving documents"):
41
+ retrieval_query = _build_retrieval_query(request.state["messages"])
42
+ fetch_k = 20
43
+ try:
44
+ retrieved_docs = vector_store.max_marginal_relevance_search(
45
+ retrieval_query,
46
+ k=k,
47
+ fetch_k=fetch_k,
48
+ lambda_mult=0.5, # 0.0 = diverse, 1.0 = similar; 0.3–0.7 is typical
49
+ )
50
+ except Exception:
51
+ retrieved_docs = vector_store.similarity_search(retrieval_query, k=k)
52
 
53
  seen = set()
54
  unique_docs = []
 
61
 
62
  docs_content = "\n\n".join(doc.page_content for doc in unique_docs)
63
 
64
+ # No need to sanitize the docs_content as the documents are sanitized
65
+ # when received at the file PUT endpoint.
66
+ with tracer.start_as_current_span("PromptSanitizer"):
67
+ sanitizer = PromptSanitizer()
68
+ with tracer.start_as_current_span("sanitize retrieval_query"):
69
+ sanitized_retrieval_query = sanitizer.sanitize(retrieval_query)
 
70
 
71
  language = "English" if lang == "en" else "French"
72
 
73
  return CHAMP_SYSTEM_PROMPT_V4.format(
74
  last_query=sanitized_retrieval_query,
75
+ context=docs_content,
76
  language=language,
77
  )
78
 
load_conversations.ipynb ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "fc07d69e",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import boto3\n",
11
+ "import json\n",
12
+ "import os\n",
13
+ "import pytz\n",
14
+ "\n",
15
+ "from boto3.dynamodb.conditions import Attr\n",
16
+ "from collections import defaultdict\n",
17
+ "from datetime import datetime\n",
18
+ "from decimal import Decimal\n",
19
+ "from dotenv import load_dotenv"
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "code",
24
+ "execution_count": null,
25
+ "id": "aa2cda42",
26
+ "metadata": {},
27
+ "outputs": [],
28
+ "source": [
29
+ "load_dotenv()"
30
+ ]
31
+ },
32
+ {
33
+ "cell_type": "code",
34
+ "execution_count": null,
35
+ "id": "2a2214ff",
36
+ "metadata": {},
37
+ "outputs": [],
38
+ "source": [
39
+ "AWS_REGION = os.getenv(\"AWS_REGION\", \"us-east-1\")\n",
40
+ "AWS_ACCESS_KEY = os.getenv(\"AWS_ACCESS_KEY\", None)\n",
41
+ "AWS_SECRET_ACCESS_KEY = os.getenv(\"AWS_SECRET_ACCESS_KEY\", None)"
42
+ ]
43
+ },
44
+ {
45
+ "cell_type": "code",
46
+ "execution_count": null,
47
+ "id": "290b04ff",
48
+ "metadata": {},
49
+ "outputs": [],
50
+ "source": [
51
+ "# 1. Initialize the DynamoDB resource\n",
52
+ "dynamodb = boto3.resource(\n",
53
+ " \"dynamodb\",\n",
54
+ " region_name=AWS_REGION,\n",
55
+ " aws_access_key_id=AWS_ACCESS_KEY,\n",
56
+ " aws_secret_access_key=AWS_SECRET_ACCESS_KEY,\n",
57
+ ")\n",
58
+ "client = dynamodb.meta.client"
59
+ ]
60
+ },
61
+ {
62
+ "cell_type": "code",
63
+ "execution_count": null,
64
+ "id": "81477e4d",
65
+ "metadata": {},
66
+ "outputs": [],
67
+ "source": [
68
+ "existing_tables = client.list_tables()[\"TableNames\"]\n",
69
+ "existing_tables"
70
+ ]
71
+ },
72
+ {
73
+ "cell_type": "code",
74
+ "execution_count": null,
75
+ "id": "a3426a87",
76
+ "metadata": {},
77
+ "outputs": [],
78
+ "source": [
79
+ "table = dynamodb.Table(\"chatbot-conversations\")\n",
80
+ "table"
81
+ ]
82
+ },
83
+ {
84
+ "cell_type": "code",
85
+ "execution_count": null,
86
+ "id": "3111db17",
87
+ "metadata": {},
88
+ "outputs": [],
89
+ "source": [
90
+ "def get_all_grouped_sessions(date_string):\n",
91
+ " grouped_data = defaultdict(list)\n",
92
+ "\n",
93
+ " # Scan the entire table\n",
94
+ " response = table.scan(FilterExpression=Attr(\"timestamp\").gte(date_string))\n",
95
+ " items = response.get(\"Items\", [])\n",
96
+ "\n",
97
+ " # Handle pagination if the table is large\n",
98
+ " while \"LastEvaluatedKey\" in response:\n",
99
+ " response = table.scan(\n",
100
+ " ExclusiveStartKey=response[\"LastEvaluatedKey\"],\n",
101
+ " FilterExpression=Attr(\"timestamp\").gte(date_string),\n",
102
+ " )\n",
103
+ " items.extend(response.get(\"Items\", []))\n",
104
+ "\n",
105
+ " # Grouping logic\n",
106
+ " for item in items:\n",
107
+ " sid = item[\"session_id\"]\n",
108
+ " grouped_data[sid].append(item)\n",
109
+ "\n",
110
+ " return dict(grouped_data)"
111
+ ]
112
+ },
113
+ {
114
+ "cell_type": "code",
115
+ "execution_count": null,
116
+ "id": "8b72d9ae",
117
+ "metadata": {},
118
+ "outputs": [],
119
+ "source": [
120
+ "local_timezone = pytz.timezone(\"America/Montreal\")\n",
121
+ "\n",
122
+ "# Date of the demo\n",
123
+ "# We want to extract every conversation since that date\n",
124
+ "local_date = datetime(2026, 2, 10, 0, 0, 0)\n",
125
+ "\n",
126
+ "localized_date = local_timezone.localize(local_date)\n",
127
+ "\n",
128
+ "utc_date = localized_date.astimezone(pytz.utc)\n",
129
+ "\n",
130
+ "# We format the date for dynamodb\n",
131
+ "utc_date_dynamodb = utc_date.strftime(\"%Y-%m-%dT%H:%M:%SZ\")"
132
+ ]
133
+ },
134
+ {
135
+ "cell_type": "code",
136
+ "execution_count": null,
137
+ "id": "a5444c0f",
138
+ "metadata": {},
139
+ "outputs": [],
140
+ "source": [
141
+ "sessions = get_all_grouped_sessions(utc_date_dynamodb)\n",
142
+ "sessions"
143
+ ]
144
+ },
145
+ {
146
+ "cell_type": "code",
147
+ "execution_count": null,
148
+ "id": "eb4bfda0",
149
+ "metadata": {},
150
+ "outputs": [],
151
+ "source": [
152
+ "# Helper to handle DynamoDB Decimal types when saving the conversations\n",
153
+ "def decimal_default(obj):\n",
154
+ " if isinstance(obj, Decimal):\n",
155
+ " return float(obj) if obj % 1 > 0 else int(obj)\n",
156
+ " raise TypeError\n",
157
+ "\n",
158
+ "\n",
159
+ "with open(\"conversations.json\", \"w\", encoding=\"utf-8\") as f:\n",
160
+ " json.dump(\n",
161
+ " sessions,\n",
162
+ " f,\n",
163
+ " indent=4, # Makes the file human-readable (pretty-print)\n",
164
+ " ensure_ascii=True, # Allows special characters (like emojis or accents)\n",
165
+ " default=decimal_default, # Uses our helper for Decimals\n",
166
+ " )"
167
+ ]
168
+ },
169
+ {
170
+ "cell_type": "code",
171
+ "execution_count": null,
172
+ "id": "a53d80a5",
173
+ "metadata": {},
174
+ "outputs": [],
175
+ "source": [
176
+ "with open(\"conversations.json\", \"r\", encoding=\"utf-8\") as f:\n",
177
+ " conversations = json.load(f)"
178
+ ]
179
+ },
180
+ {
181
+ "cell_type": "code",
182
+ "execution_count": null,
183
+ "id": "55cf17e5",
184
+ "metadata": {},
185
+ "outputs": [],
186
+ "source": [
187
+ "conversations"
188
+ ]
189
+ },
190
+ {
191
+ "cell_type": "code",
192
+ "execution_count": null,
193
+ "id": "b03b5f25",
194
+ "metadata": {},
195
+ "outputs": [],
196
+ "source": [
197
+ "def filter_grouped_sessions(grouped_data, target_timestamp):\n",
198
+ " return {\n",
199
+ " sid: [item for item in items if item[\"timestamp\"] >= target_timestamp]\n",
200
+ " for sid, items in grouped_data.items()\n",
201
+ " # Optional: remove the session_id if the resulting list is empty\n",
202
+ " if any(item[\"timestamp\"] >= target_timestamp for item in items)\n",
203
+ " }"
204
+ ]
205
+ },
206
+ {
207
+ "cell_type": "code",
208
+ "execution_count": null,
209
+ "id": "42f7cc13",
210
+ "metadata": {},
211
+ "outputs": [],
212
+ "source": [
213
+ "local_date = datetime(2026, 2, 6, 14, 41, 0)\n",
214
+ "\n",
215
+ "localized_date = local_timezone.localize(local_date)\n",
216
+ "\n",
217
+ "utc_date = localized_date.astimezone(pytz.utc)\n",
218
+ "\n",
219
+ "# We format the date for dynamodb\n",
220
+ "utc_date_dynamodb = utc_date.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n",
221
+ "\n",
222
+ "filtered_conversations = filter_grouped_sessions(conversations, utc_date_dynamodb)\n",
223
+ "filtered_conversations"
224
+ ]
225
+ },
226
+ {
227
+ "cell_type": "code",
228
+ "execution_count": null,
229
+ "id": "0b229170",
230
+ "metadata": {},
231
+ "outputs": [],
232
+ "source": [
233
+ "problematic_vus = []\n",
234
+ "for vu, conv in filtered_conversations.items():\n",
235
+ " if any(message[\"data\"][\"reply\"] == \"\" for message in conv):\n",
236
+ " problematic_vus.append(vu)\n",
237
+ "\n",
238
+ "print(problematic_vus)\n",
239
+ "print(len(problematic_vus))"
240
+ ]
241
+ },
242
+ {
243
+ "cell_type": "code",
244
+ "execution_count": null,
245
+ "id": "5e450ab2",
246
+ "metadata": {},
247
+ "outputs": [],
248
+ "source": [
249
+ "filtered_conversations[problematic_vus[7]]"
250
+ ]
251
+ },
252
+ {
253
+ "cell_type": "code",
254
+ "execution_count": null,
255
+ "id": "5ec5002b",
256
+ "metadata": {},
257
+ "outputs": [],
258
+ "source": [
259
+ "for conv in filtered_conversations.values():\n",
260
+ " if any(\"error\" in message for message in conv):\n",
261
+ " print(conv)"
262
+ ]
263
+ },
264
+ {
265
+ "cell_type": "code",
266
+ "execution_count": null,
267
+ "id": "3b20b4f2",
268
+ "metadata": {},
269
+ "outputs": [],
270
+ "source": [
271
+ "conversations[\"VU78\"]"
272
+ ]
273
+ },
274
+ {
275
+ "cell_type": "code",
276
+ "execution_count": null,
277
+ "id": "67fad2b2",
278
+ "metadata": {},
279
+ "outputs": [],
280
+ "source": [
281
+ "def filter_comments(grouped_data):\n",
282
+ " return {\n",
283
+ " sid: filtered_items\n",
284
+ " for sid, items in grouped_data.items()\n",
285
+ " if (filtered_items := [item for item in items if \"comment\" in item[\"data\"]])\n",
286
+ " }"
287
+ ]
288
+ },
289
+ {
290
+ "cell_type": "code",
291
+ "execution_count": null,
292
+ "id": "4fae7474",
293
+ "metadata": {},
294
+ "outputs": [],
295
+ "source": [
296
+ "comments = filter_comments(conversations)\n",
297
+ "comments"
298
+ ]
299
+ }
300
+ ],
301
+ "metadata": {
302
+ "kernelspec": {
303
+ "display_name": ".venv (3.13.11)",
304
+ "language": "python",
305
+ "name": "python3"
306
+ },
307
+ "language_info": {
308
+ "codemirror_mode": {
309
+ "name": "ipython",
310
+ "version": 3
311
+ },
312
+ "file_extension": ".py",
313
+ "mimetype": "text/x-python",
314
+ "name": "python",
315
+ "nbconvert_exporter": "python",
316
+ "pygments_lexer": "ipython3",
317
+ "version": "3.13.11"
318
+ }
319
+ },
320
+ "nbformat": 4,
321
+ "nbformat_minor": 5
322
+ }
main.py CHANGED
@@ -11,11 +11,12 @@ from typing import AsyncGenerator, List, Literal, Tuple, Dict, Any
11
  from dotenv import load_dotenv
12
 
13
  from fastapi import FastAPI, File, Form, Request, BackgroundTasks, Response, UploadFile
14
- from fastapi.middleware.cors import CORSMiddleware
15
  from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse
16
  from fastapi.staticfiles import StaticFiles
17
  from fastapi.templating import Jinja2Templates
18
 
 
 
19
  from champ.rag import (
20
  create_embedding_model,
21
  create_session_vector_store,
@@ -59,8 +60,6 @@ from champ.prompts import (
59
  )
60
  from champ.service import ChampService
61
 
62
- from pyinstrument import Profiler
63
-
64
  from helpers.file_helper import (
65
  extract_text_from_docx,
66
  extract_text_from_img,
@@ -70,6 +69,7 @@ from helpers.file_helper import (
70
  replace_spaces_in_filename,
71
  )
72
  from classes.session_document_store import SessionDocumentStore
 
73
 
74
  load_dotenv()
75
 
@@ -96,9 +96,6 @@ if GEMINI_API_KEY is None:
96
  "GEMINI_API_KEY is not set. "
97
  "Go to Space → Settings → Variables & secrets and add one."
98
  )
99
- ORIGIN = os.getenv("ORIGIN")
100
- if ORIGIN is None:
101
- ORIGIN = "http://localhost:8000"
102
 
103
  openai_client = AsyncOpenAI(api_key=OPENAI_API_KEY) if OPENAI_API_KEY else None
104
  gemini_client = genai.Client(api_key=GEMINI_API_KEY) if GEMINI_API_KEY else None
@@ -209,30 +206,29 @@ def _call_gemini(model_id: str, msgs: list[dict], temperature: float) -> str:
209
  def call_llm(
210
  req: ChatRequest,
211
  ) -> AsyncGenerator[str, None] | Tuple[str, Dict[str, Any]]:
212
- if DEV:
213
- profiler = Profiler()
214
- profiler.start()
215
-
216
  session_id = req.session_id
217
 
 
 
218
  if req.model_type == "champ":
219
  session_documents = session_document_store.get_documents(session_id)
220
- vector_store = (
221
- base_vector_store
222
- if session_documents is None
223
- else create_session_vector_store(
224
- base_vector_store, embedding_model, session_documents
 
 
225
  )
226
- )
227
 
228
- champ = ChampService(vector_store=vector_store, lang=req.lang)
 
229
 
230
- msgs = convert_messages_langchain(req.messages)
231
- reply, triage_meta = champ.invoke(msgs)
232
 
233
- if DEV:
234
- profiler.stop()
235
- profiler.print()
236
 
237
  return reply, triage_meta
238
 
@@ -279,6 +275,8 @@ async def lifespan(app: FastAPI):
279
 
280
 
281
  app = FastAPI(lifespan=lifespan)
 
 
282
  app.mount("/static", StaticFiles(directory="static"), name="static")
283
  templates = Jinja2Templates(directory="templates")
284
 
@@ -288,6 +286,9 @@ async def home(request: Request):
288
  return templates.TemplateResponse("index.html", {"request": request})
289
 
290
 
 
 
 
291
  @app.post("/chat")
292
  async def chat_endpoint(payload: ChatRequest, background_tasks: BackgroundTasks):
293
  if not payload.messages:
@@ -300,7 +301,8 @@ async def chat_endpoint(payload: ChatRequest, background_tasks: BackgroundTasks)
300
 
301
  try:
302
  loop = asyncio.get_running_loop()
303
- result = await loop.run_in_executor(None, call_llm, payload)
 
304
 
305
  if isinstance(result, AsyncGenerator):
306
 
@@ -421,13 +423,16 @@ async def upload_file(
421
 
422
  _, extension = os.path.splitext(file_name)
423
  if extension not in SUPPORTED_FILE_EXTENSIONS:
 
424
  return Response(status_code=STATUS_CODE_UNSUPPORTED_MEDIA_TYPE)
425
 
426
  file_mime = file.headers.get("content-type")
427
  if file_mime is None:
 
428
  return Response(status_code=STATUS_CODE_UNSUPPORTED_MEDIA_TYPE)
429
 
430
  if file_mime not in SUPPORTED_FILE_TYPES:
 
431
  return Response(status_code=STATUS_CODE_UNSUPPORTED_MEDIA_TYPE)
432
 
433
  # Read in chunks to avoid RAM spikes
@@ -444,6 +449,7 @@ async def upload_file(
444
 
445
  file_mime = magic.from_buffer(file_content[:2048], mime=True)
446
  if file_mime not in SUPPORTED_FILE_TYPES:
 
447
  return Response(status_code=STATUS_CODE_UNSUPPORTED_MEDIA_TYPE)
448
 
449
  if file_mime == "application/pdf":
@@ -465,7 +471,10 @@ async def upload_file(
465
  if file_text is None:
466
  return Response(status_code=STATUS_CODE_INTERNAL_SERVER_ERROR)
467
 
468
- session_document_store.create_document(session_id, file_text, file_name)
 
 
 
469
  session_tracker.add_session(session_id)
470
 
471
  # Should the logging event be coupled to the LLM call instead of the API call?
 
11
  from dotenv import load_dotenv
12
 
13
  from fastapi import FastAPI, File, Form, Request, BackgroundTasks, Response, UploadFile
 
14
  from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse
15
  from fastapi.staticfiles import StaticFiles
16
  from fastapi.templating import Jinja2Templates
17
 
18
+ from opentelemetry import trace
19
+
20
  from champ.rag import (
21
  create_embedding_model,
22
  create_session_vector_store,
 
60
  )
61
  from champ.service import ChampService
62
 
 
 
63
  from helpers.file_helper import (
64
  extract_text_from_docx,
65
  extract_text_from_img,
 
69
  replace_spaces_in_filename,
70
  )
71
  from classes.session_document_store import SessionDocumentStore
72
+ from telemetry import setup_telemetry
73
 
74
  load_dotenv()
75
 
 
96
  "GEMINI_API_KEY is not set. "
97
  "Go to Space → Settings → Variables & secrets and add one."
98
  )
 
 
 
99
 
100
  openai_client = AsyncOpenAI(api_key=OPENAI_API_KEY) if OPENAI_API_KEY else None
101
  gemini_client = genai.Client(api_key=GEMINI_API_KEY) if GEMINI_API_KEY else None
 
206
  def call_llm(
207
  req: ChatRequest,
208
  ) -> AsyncGenerator[str, None] | Tuple[str, Dict[str, Any]]:
 
 
 
 
209
  session_id = req.session_id
210
 
211
+ tracer = trace.get_tracer(__name__)
212
+
213
  if req.model_type == "champ":
214
  session_documents = session_document_store.get_documents(session_id)
215
+ with tracer.start_as_current_span("vector_store"):
216
+ vector_store = (
217
+ base_vector_store
218
+ if session_documents is None
219
+ else create_session_vector_store(
220
+ base_vector_store, embedding_model, session_documents
221
+ )
222
  )
 
223
 
224
+ with tracer.start_as_current_span("ChampService"):
225
+ champ = ChampService(vector_store=vector_store, lang=req.lang)
226
 
227
+ with tracer.start_as_current_span("convert_messages_langchain"):
228
+ msgs = convert_messages_langchain(req.messages)
229
 
230
+ with tracer.start_as_current_span("invoke"):
231
+ reply, triage_meta = champ.invoke(msgs)
 
232
 
233
  return reply, triage_meta
234
 
 
275
 
276
 
277
  app = FastAPI(lifespan=lifespan)
278
+ setup_telemetry(app)
279
+
280
  app.mount("/static", StaticFiles(directory="static"), name="static")
281
  templates = Jinja2Templates(directory="templates")
282
 
 
286
  return templates.TemplateResponse("index.html", {"request": request})
287
 
288
 
289
+ tracer = trace.get_tracer(__name__)
290
+
291
+
292
  @app.post("/chat")
293
  async def chat_endpoint(payload: ChatRequest, background_tasks: BackgroundTasks):
294
  if not payload.messages:
 
301
 
302
  try:
303
  loop = asyncio.get_running_loop()
304
+ with tracer.start_as_current_span("call_llm"):
305
+ result = await loop.run_in_executor(None, call_llm, payload)
306
 
307
  if isinstance(result, AsyncGenerator):
308
 
 
423
 
424
  _, extension = os.path.splitext(file_name)
425
  if extension not in SUPPORTED_FILE_EXTENSIONS:
426
+ print("Unsupported extension")
427
  return Response(status_code=STATUS_CODE_UNSUPPORTED_MEDIA_TYPE)
428
 
429
  file_mime = file.headers.get("content-type")
430
  if file_mime is None:
431
+ print("None content-type")
432
  return Response(status_code=STATUS_CODE_UNSUPPORTED_MEDIA_TYPE)
433
 
434
  if file_mime not in SUPPORTED_FILE_TYPES:
435
+ print(f"Unsupported file_mime: {file_mime}")
436
  return Response(status_code=STATUS_CODE_UNSUPPORTED_MEDIA_TYPE)
437
 
438
  # Read in chunks to avoid RAM spikes
 
449
 
450
  file_mime = magic.from_buffer(file_content[:2048], mime=True)
451
  if file_mime not in SUPPORTED_FILE_TYPES:
452
+ print("magic file_mime unsupported")
453
  return Response(status_code=STATUS_CODE_UNSUPPORTED_MEDIA_TYPE)
454
 
455
  if file_mime == "application/pdf":
 
471
  if file_text is None:
472
  return Response(status_code=STATUS_CODE_INTERNAL_SERVER_ERROR)
473
 
474
+ sanitizer = PromptSanitizer()
475
+ sanitized_file_text = sanitizer.sanitize(file_text)
476
+
477
+ session_document_store.create_document(session_id, sanitized_file_text, file_name)
478
  session_tracker.add_session(session_id)
479
 
480
  # Should the logging event be coupled to the LLM call instead of the API call?
requirements.txt CHANGED
@@ -127,7 +127,6 @@ xxhash==3.6.0
127
  yarl==1.22.0
128
  zstandard==0.25.0
129
  pytz==2025.2
130
- pyinstrument==5.1.2
131
  pymupdf==1.27.1
132
  python-docx==1.2.0
133
  nh3==0.3.2
@@ -137,4 +136,8 @@ easyocr==1.7.2
137
  langdetect==1.0.9
138
  spacy==3.8.11
139
  presidio_analyzer==2.2.361
140
- presidio_anonymizer==2.2.361
 
 
 
 
 
127
  yarl==1.22.0
128
  zstandard==0.25.0
129
  pytz==2025.2
 
130
  pymupdf==1.27.1
131
  python-docx==1.2.0
132
  nh3==0.3.2
 
136
  langdetect==1.0.9
137
  spacy==3.8.11
138
  presidio_analyzer==2.2.361
139
+ presidio_anonymizer==2.2.361
140
+ opentelemetry-api==1.39.1
141
+ opentelemetry-sdk==1.39.1
142
+ opentelemetry-instrumentation-fastapi==0.60b1
143
+ opentelemetry-instrumentation-httpx==0.60b1
telemetry.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from opentelemetry import trace
3
+ from opentelemetry.sdk.trace import TracerProvider
4
+ from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter
5
+ from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
6
+ from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
7
+ from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
8
+
9
+
10
+ class FilteredConsoleExporter(SpanExporter):
11
+ WATCHED_SPANS = {
12
+ "call_llm",
13
+ "vector_store",
14
+ "ChampService",
15
+ "convert_messages_langchain",
16
+ "invoke",
17
+ "retrieving documents",
18
+ "PromptSanitizer",
19
+ "sanitize docs_content",
20
+ "sanitize retrieval_query",
21
+ }
22
+
23
+ def export(self, spans):
24
+ for span in spans:
25
+ if (
26
+ span.name in self.WATCHED_SPANS
27
+ and span.end_time is not None
28
+ and span.start_time is not None
29
+ ):
30
+ print(
31
+ f"[{span.name}] duration: {(span.end_time - span.start_time) / 1e6:.2f}ms"
32
+ )
33
+ return SpanExportResult.SUCCESS
34
+
35
+ def shutdown(self):
36
+ pass
37
+
38
+
39
+ def setup_telemetry(app):
40
+ if os.getenv("ENV", "dev").lower() != "dev":
41
+ return
42
+
43
+ provider = TracerProvider()
44
+ provider.add_span_processor(BatchSpanProcessor(FilteredConsoleExporter()))
45
+ trace.set_tracer_provider(provider)
46
+
47
+ FastAPIInstrumentor.instrument_app(app)
48
+ HTTPXClientInstrumentor().instrument()
tests/stress_tests/chat_session.js CHANGED
@@ -38,10 +38,15 @@ export default function () {
38
  const payload = {
39
  user_id: `VU${__VU}`,
40
  session_id: `VU${__VU}`,
 
41
  messages: conversation,
42
- temperature: 0.2,
43
  model_type: __ENV.MODEL_TYPE,
44
  consent: true,
 
 
 
 
 
45
  };
46
  const params = { headers: { 'Content-Type': 'application/json' } };
47
 
@@ -61,12 +66,12 @@ export default function () {
61
  // We would have to read the body to access that data.
62
  data = res.body;
63
  }
64
- reply = data.reply || '(No reply)';
65
  conversation.push({ role: 'assistant', content: reply });
66
  } else {
67
  console.error(res.status);
68
  console.error(res.body);
69
- conversation.push({ role: 'assistant', content: '(No reply)' });
70
  }
71
 
72
  // Simulating reading time, thinking time and writing time.
 
38
  const payload = {
39
  user_id: `VU${__VU}`,
40
  session_id: `VU${__VU}`,
41
+ conversation_id: `VU${__VU}`,
42
  messages: conversation,
 
43
  model_type: __ENV.MODEL_TYPE,
44
  consent: true,
45
+ age_group: "0-18",
46
+ gender: "M",
47
+ roles: ["other"],
48
+ participant_id: `VU${__VU}`,
49
+ lang: "en"
50
  };
51
  const params = { headers: { 'Content-Type': 'application/json' } };
52
 
 
66
  // We would have to read the body to access that data.
67
  data = res.body;
68
  }
69
+ reply = data.reply || 'no_reply';
70
  conversation.push({ role: 'assistant', content: reply });
71
  } else {
72
  console.error(res.status);
73
  console.error(res.body);
74
+ conversation.push({ role: 'assistant', content: 'no_reply' });
75
  }
76
 
77
  // Simulating reading time, thinking time and writing time.
tests/stress_tests/file_uploads.js ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import http from 'k6/http';
2
+ import { sleep, check } from 'k6';
3
+
4
+ // 1. Open the file as binary outside the default function
5
+ const testFile = open('./small_file.pdf', 'b');
6
+
7
+ export const options = {
8
+ scenarios: {
9
+ my_spike_test: {
10
+ executor: 'per-vu-iterations',
11
+ vus: 80,
12
+ iterations: 1,
13
+ },
14
+ },
15
+ };
16
+
17
+ export default function () {
18
+ sleep(Math.random() * 10);
19
+ const url = __ENV.URL;
20
+
21
+ for (let i = 0; i < 3; i++) {
22
+ // 2. Construct Multipart Form Data
23
+ // We wrap the file and the JSON fields into a single data object
24
+ const data = {
25
+ file: http.file(testFile, 'small_file.pdf', 'application/pdf'), // The file
26
+ session_id: `VU${__VU}_${i}`,
27
+ };
28
+
29
+ // 3. Remove 'Content-Type': 'application/json'
30
+ // k6 will automatically set the correct 'multipart/form-data' header with a boundary
31
+ const res = http.put(url, data);
32
+
33
+ check(res, { 'status is 200': (r) => r.status === 200 });
34
+ }
35
+ }
36
+
37
+ // TEST RESULT ANALYSIS
38
+
39
+ // The average HTTP request duration varies greatly with the size of the uploaded file.
40
+ // The system supports very easily small uploaded files (15 KB for example). For that
41
+ // scenario, the average HTTP request duration was about 40ms
42
+
43
+ // However, the system cannot support 80 users sending 3 large files (8.5 MB). The
44
+ // requests simply timeout.
45
+
46
+ // SMALL FILES
47
+ // █ TOTAL RESULTS
48
+
49
+ // checks_total.......: 240 14.904509/s
50
+ // checks_succeeded...: 100.00% 240 out of 240
51
+ // checks_failed......: 0.00% 0 out of 240
52
+
53
+ // ✓ status is 200
54
+
55
+ // HTTP
56
+ // http_req_duration..............: avg=43.66ms min=21.72ms med=36.84ms max=304.38ms p(90)=74.06ms p(95)=79.16ms
57
+ // { expected_response:true }...: avg=43.66ms min=21.72ms med=36.84ms max=304.38ms p(90)=74.06ms p(95)=79.16ms
58
+ // http_req_failed................: 0.00% 0 out of 240
59
+ // http_reqs......................: 240 14.904509/s
60
+
61
+ // EXECUTION
62
+ // iteration_duration.............: avg=11.33s min=6.22s med=11.38s max=16.1s p(90)=15.18s p(95)=15.8s
63
+ // iterations.....................: 80 4.96817/s
64
+ // vus............................: 3 min=3 max=80
65
+ // vus_max........................: 80 min=80 max=80
66
+
67
+ // NETWORK
68
+ // data_received..................: 446 kB 28 kB/s
69
+ // data_sent......................: 3.8 MB 234 kB/s
70
+
71
+
72
+
73
+
74
+ // running (00m16.1s), 00/80 VUs, 80 complete and 0 interrupted iterations
75
+ // my_spike_test ✓ [======================================] 80 VUs 00m16.1s/10m0s 80/80 iters, 1 per VU
76
+
77
+
78
+ // LARGE FILES
79
+ // A list of error messages (request timeout) appears.