ikram98ai commited on
Commit
9f772a8
Β·
1 Parent(s): 4cfbb41

refactoring the code and adding mcp

Browse files
README.md CHANGED
@@ -5,7 +5,7 @@ colorFrom: indigo
5
  colorTo: yellow
6
  sdk: gradio
7
  sdk_version: 5.49.1
8
- app_file: app.py
9
  pinned: false
10
  python_version: 3.13
11
  ---
 
5
  colorTo: yellow
6
  sdk: gradio
7
  sdk_version: 5.49.1
8
+ app_file: src/app.py
9
  pinned: false
10
  python_version: 3.13
11
  ---
requirements.txt CHANGED
@@ -1,150 +1,426 @@
 
 
1
  aiofiles==24.1.0
 
2
  aiohappyeyeballs==2.6.1
3
- aiohttp==3.13.1
 
 
4
  aiosignal==1.4.0
 
5
  annotated-doc==0.0.3
 
6
  annotated-types==0.7.0
 
7
  anyio==4.11.0
8
- asttokens==3.0.0
 
 
 
 
 
 
9
  attrs==25.4.0
 
 
 
 
10
  audioop-lts==0.2.2
 
11
  brotli==1.1.0
 
12
  cachetools==6.2.1
 
13
  certifi==2025.10.5
 
 
 
 
14
  cffi==2.0.0
 
15
  charset-normalizer==3.4.4
 
 
 
16
  click==8.3.0
17
- comm==0.2.3
 
 
 
18
  cryptography==46.0.3
 
19
  dataclasses-json==0.6.7
20
- debugpy==1.8.17
21
- decorator==5.2.1
22
  distro==1.9.0
23
- executing==2.2.1
24
- fastapi==0.120.1
 
25
  ffmpy==0.6.4
 
26
  filelock==3.20.0
 
27
  filetype==1.2.0
 
28
  frozenlist==1.8.0
29
- fsspec==2025.9.0
 
 
 
 
 
 
30
  google-ai-generativelanguage==0.9.0
31
- google-api-core==2.28.0
32
- google-auth==2.41.1
 
 
 
 
 
33
  googleapis-common-protos==1.71.0
 
 
 
34
  gradio==5.49.1
 
35
  gradio-client==1.13.3
 
36
  greenlet==3.2.4
 
37
  groovy==0.1.2
 
38
  grpcio==1.76.0
 
 
 
 
 
39
  grpcio-status==1.76.0
 
40
  h11==0.16.0
 
 
 
41
  hf-xet==1.2.0
 
42
  httpcore==1.0.9
 
43
  httpx==0.28.1
 
 
 
 
 
 
 
 
 
44
  httpx-sse==0.4.3
45
- huggingface-hub==1.0.0
 
 
 
 
 
 
46
  idna==3.11
47
- iniconfig==2.3.0
48
- ipykernel==7.1.0
49
- ipython==9.6.0
50
- ipython-pygments-lexers==1.1.1
51
- jedi==0.19.2
52
  jinja2==3.1.6
 
53
  jiter==0.11.1
 
 
 
54
  jsonpatch==1.33
 
55
  jsonpointer==3.0.0
56
- jupyter-client==8.6.3
57
- jupyter-core==5.9.1
 
 
 
58
  langchain==1.0.2
 
59
  langchain-classic==1.0.0
 
60
  langchain-community==0.4.1
61
- langchain-core==1.0.1
 
 
 
 
 
 
 
 
 
 
 
 
62
  langchain-google-genai==3.0.0
 
63
  langchain-milvus==0.2.2
 
64
  langchain-openai==1.0.1
 
65
  langchain-text-splitters==1.0.0
66
- langgraph==1.0.1
 
 
 
 
67
  langgraph-checkpoint==3.0.0
68
- langgraph-prebuilt==1.0.1
 
 
 
 
69
  langgraph-sdk==0.2.9
70
- langsmith==0.4.38
 
 
 
 
 
71
  markdown-it-py==4.0.0
 
72
  markupsafe==3.0.3
 
 
 
73
  marshmallow==3.26.1
74
- matplotlib-inline==0.2.1
 
 
75
  mdurl==0.1.2
 
76
  milvus-lite==2.5.1
 
77
  multidict==6.7.0
 
 
 
78
  mypy-extensions==1.1.0
79
- nest-asyncio==1.6.0
80
  numpy==2.3.4
 
 
 
 
 
 
 
81
  openai==2.6.1
 
82
  orjson==3.11.4
 
 
 
 
 
83
  ormsgpack==1.11.0
 
84
  packaging==25.0
 
 
 
 
 
 
 
85
  pandas==2.3.3
86
- parso==0.8.5
 
 
87
  pdfminer-six==20250506
88
- pexpect==4.9.0
89
  pillow==11.3.0
90
- platformdirs==4.5.0
91
- pluggy==1.6.0
92
- prompt-toolkit==3.0.52
93
  propcache==0.4.1
 
 
 
94
  proto-plus==1.26.1
 
 
 
95
  protobuf==6.33.0
96
- psutil==7.1.2
97
- ptyprocess==0.7.0
98
- pure-eval==0.2.3
 
 
 
 
99
  pyasn1==0.6.1
 
 
 
100
  pyasn1-modules==0.4.2
 
101
  pycparser==2.23
 
102
  pydantic==2.11.10
 
 
 
 
 
 
 
 
 
 
 
 
103
  pydantic-core==2.33.2
 
104
  pydantic-settings==2.11.0
 
 
 
105
  pydub==0.25.1
 
106
  pygments==2.19.2
107
- pymilvus==2.6.2
108
- pytest==8.4.2
 
109
  python-dateutil==2.9.0.post0
 
110
  python-dotenv==1.2.1
 
 
 
111
  python-multipart==0.0.20
 
 
 
112
  pytz==2025.2
 
113
  pyyaml==6.0.3
114
- pyzmq==27.1.0
 
 
 
 
 
115
  rank-bm25==0.2.2
 
 
 
 
 
116
  regex==2025.10.23
 
117
  requests==2.32.5
 
 
 
 
 
 
 
118
  requests-toolbelt==1.0.0
 
119
  rich==14.2.0
 
 
 
 
 
120
  rsa==4.9.1
121
- ruff==0.14.2
 
 
122
  safehttpx==0.1.7
 
 
 
 
 
123
  semantic-version==2.10.0
 
124
  setuptools==80.9.0
 
125
  shellingham==1.5.4
 
 
 
126
  six==1.17.0
 
127
  sniffio==1.3.1
 
 
 
128
  sqlalchemy==2.0.44
129
- stack-data==0.6.3
130
- starlette==0.48.0
 
 
 
 
 
 
 
 
131
  tenacity==9.1.2
 
 
 
 
 
132
  tiktoken==0.12.0
 
133
  tomlkit==0.13.3
134
- tornado==6.5.2
135
  tqdm==4.67.1
136
- traitlets==5.14.3
 
 
 
137
  typer==0.20.0
 
138
  typer-slim==0.20.0
 
139
  typing-extensions==4.15.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  typing-inspect==0.9.0
 
141
  typing-inspection==0.4.2
 
 
 
142
  tzdata==2025.2
143
- ujson==5.11.0
144
- urllib3==2.3.0
 
145
  uvicorn==0.38.0
146
- wcwidth==0.2.14
 
 
147
  websockets==15.0.1
 
148
  xxhash==3.6.0
 
149
  yarl==1.22.0
 
150
  zstandard==0.25.0
 
 
1
+ # This file was autogenerated by uv via the following command:
2
+ # uv pip compile pyproject.toml -o hierRAG/requirements.txt
3
  aiofiles==24.1.0
4
+ # via gradio
5
  aiohappyeyeballs==2.6.1
6
+ # via aiohttp
7
+ aiohttp==3.13.2
8
+ # via langchain-community
9
  aiosignal==1.4.0
10
+ # via aiohttp
11
  annotated-doc==0.0.3
12
+ # via fastapi
13
  annotated-types==0.7.0
14
+ # via pydantic
15
  anyio==4.11.0
16
+ # via
17
+ # gradio
18
+ # httpx
19
+ # mcp
20
+ # openai
21
+ # sse-starlette
22
+ # starlette
23
  attrs==25.4.0
24
+ # via
25
+ # aiohttp
26
+ # jsonschema
27
+ # referencing
28
  audioop-lts==0.2.2
29
+ # via gradio
30
  brotli==1.1.0
31
+ # via gradio
32
  cachetools==6.2.1
33
+ # via google-auth
34
  certifi==2025.10.5
35
+ # via
36
+ # httpcore
37
+ # httpx
38
+ # requests
39
  cffi==2.0.0
40
+ # via cryptography
41
  charset-normalizer==3.4.4
42
+ # via
43
+ # pdfminer-six
44
+ # requests
45
  click==8.3.0
46
+ # via
47
+ # typer
48
+ # typer-slim
49
+ # uvicorn
50
  cryptography==46.0.3
51
+ # via pdfminer-six
52
  dataclasses-json==0.6.7
53
+ # via langchain-community
 
54
  distro==1.9.0
55
+ # via openai
56
+ fastapi==0.120.4
57
+ # via gradio
58
  ffmpy==0.6.4
59
+ # via gradio
60
  filelock==3.20.0
61
+ # via huggingface-hub
62
  filetype==1.2.0
63
+ # via langchain-google-genai
64
  frozenlist==1.8.0
65
+ # via
66
+ # aiohttp
67
+ # aiosignal
68
+ fsspec==2025.10.0
69
+ # via
70
+ # gradio-client
71
+ # huggingface-hub
72
  google-ai-generativelanguage==0.9.0
73
+ # via langchain-google-genai
74
+ google-api-core==2.28.1
75
+ # via google-ai-generativelanguage
76
+ google-auth==2.42.1
77
+ # via
78
+ # google-ai-generativelanguage
79
+ # google-api-core
80
  googleapis-common-protos==1.71.0
81
+ # via
82
+ # google-api-core
83
+ # grpcio-status
84
  gradio==5.49.1
85
+ # via hier-rag (pyproject.toml)
86
  gradio-client==1.13.3
87
+ # via gradio
88
  greenlet==3.2.4
89
+ # via sqlalchemy
90
  groovy==0.1.2
91
+ # via gradio
92
  grpcio==1.76.0
93
+ # via
94
+ # google-ai-generativelanguage
95
+ # google-api-core
96
+ # grpcio-status
97
+ # pymilvus
98
  grpcio-status==1.76.0
99
+ # via google-api-core
100
  h11==0.16.0
101
+ # via
102
+ # httpcore
103
+ # uvicorn
104
  hf-xet==1.2.0
105
+ # via huggingface-hub
106
  httpcore==1.0.9
107
+ # via httpx
108
  httpx==0.28.1
109
+ # via
110
+ # gradio
111
+ # gradio-client
112
+ # huggingface-hub
113
+ # langgraph-sdk
114
+ # langsmith
115
+ # mcp
116
+ # openai
117
+ # safehttpx
118
  httpx-sse==0.4.3
119
+ # via
120
+ # langchain-community
121
+ # mcp
122
+ huggingface-hub==1.0.1
123
+ # via
124
+ # gradio
125
+ # gradio-client
126
  idna==3.11
127
+ # via
128
+ # anyio
129
+ # httpx
130
+ # requests
131
+ # yarl
132
  jinja2==3.1.6
133
+ # via gradio
134
  jiter==0.11.1
135
+ # via openai
136
+ joblib==1.5.2
137
+ # via scikit-learn
138
  jsonpatch==1.33
139
+ # via langchain-core
140
  jsonpointer==3.0.0
141
+ # via jsonpatch
142
+ jsonschema==4.25.1
143
+ # via mcp
144
+ jsonschema-specifications==2025.9.1
145
+ # via jsonschema
146
  langchain==1.0.2
147
+ # via hier-rag (pyproject.toml)
148
  langchain-classic==1.0.0
149
+ # via langchain-community
150
  langchain-community==0.4.1
151
+ # via hier-rag (pyproject.toml)
152
+ langchain-core==1.0.2
153
+ # via
154
+ # langchain
155
+ # langchain-classic
156
+ # langchain-community
157
+ # langchain-google-genai
158
+ # langchain-milvus
159
+ # langchain-openai
160
+ # langchain-text-splitters
161
+ # langgraph
162
+ # langgraph-checkpoint
163
+ # langgraph-prebuilt
164
  langchain-google-genai==3.0.0
165
+ # via langchain
166
  langchain-milvus==0.2.2
167
+ # via hier-rag (pyproject.toml)
168
  langchain-openai==1.0.1
169
+ # via langchain
170
  langchain-text-splitters==1.0.0
171
+ # via
172
+ # hier-rag (pyproject.toml)
173
+ # langchain-classic
174
+ langgraph==1.0.2
175
+ # via langchain
176
  langgraph-checkpoint==3.0.0
177
+ # via
178
+ # langgraph
179
+ # langgraph-prebuilt
180
+ langgraph-prebuilt==1.0.2
181
+ # via langgraph
182
  langgraph-sdk==0.2.9
183
+ # via langgraph
184
+ langsmith==0.4.39
185
+ # via
186
+ # langchain-classic
187
+ # langchain-community
188
+ # langchain-core
189
  markdown-it-py==4.0.0
190
+ # via rich
191
  markupsafe==3.0.3
192
+ # via
193
+ # gradio
194
+ # jinja2
195
  marshmallow==3.26.1
196
+ # via dataclasses-json
197
+ mcp==1.10.1
198
+ # via gradio
199
  mdurl==0.1.2
200
+ # via markdown-it-py
201
  milvus-lite==2.5.1
202
+ # via hier-rag (pyproject.toml)
203
  multidict==6.7.0
204
+ # via
205
+ # aiohttp
206
+ # yarl
207
  mypy-extensions==1.1.0
208
+ # via typing-inspect
209
  numpy==2.3.4
210
+ # via
211
+ # gradio
212
+ # langchain-community
213
+ # pandas
214
+ # rank-bm25
215
+ # scikit-learn
216
+ # scipy
217
  openai==2.6.1
218
+ # via langchain-openai
219
  orjson==3.11.4
220
+ # via
221
+ # gradio
222
+ # langgraph-sdk
223
+ # langsmith
224
+ # pymilvus
225
  ormsgpack==1.11.0
226
+ # via langgraph-checkpoint
227
  packaging==25.0
228
+ # via
229
+ # gradio
230
+ # gradio-client
231
+ # huggingface-hub
232
+ # langchain-core
233
+ # langsmith
234
+ # marshmallow
235
  pandas==2.3.3
236
+ # via
237
+ # gradio
238
+ # pymilvus
239
  pdfminer-six==20250506
240
+ # via hier-rag (pyproject.toml)
241
  pillow==11.3.0
242
+ # via gradio
 
 
243
  propcache==0.4.1
244
+ # via
245
+ # aiohttp
246
+ # yarl
247
  proto-plus==1.26.1
248
+ # via
249
+ # google-ai-generativelanguage
250
+ # google-api-core
251
  protobuf==6.33.0
252
+ # via
253
+ # google-ai-generativelanguage
254
+ # google-api-core
255
+ # googleapis-common-protos
256
+ # grpcio-status
257
+ # proto-plus
258
+ # pymilvus
259
  pyasn1==0.6.1
260
+ # via
261
+ # pyasn1-modules
262
+ # rsa
263
  pyasn1-modules==0.4.2
264
+ # via google-auth
265
  pycparser==2.23
266
+ # via cffi
267
  pydantic==2.11.10
268
+ # via
269
+ # fastapi
270
+ # gradio
271
+ # langchain
272
+ # langchain-classic
273
+ # langchain-core
274
+ # langchain-google-genai
275
+ # langgraph
276
+ # langsmith
277
+ # mcp
278
+ # openai
279
+ # pydantic-settings
280
  pydantic-core==2.33.2
281
+ # via pydantic
282
  pydantic-settings==2.11.0
283
+ # via
284
+ # langchain-community
285
+ # mcp
286
  pydub==0.25.1
287
+ # via gradio
288
  pygments==2.19.2
289
+ # via rich
290
+ pymilvus==2.6.3
291
+ # via langchain-milvus
292
  python-dateutil==2.9.0.post0
293
+ # via pandas
294
  python-dotenv==1.2.1
295
+ # via
296
+ # pydantic-settings
297
+ # pymilvus
298
  python-multipart==0.0.20
299
+ # via
300
+ # gradio
301
+ # mcp
302
  pytz==2025.2
303
+ # via pandas
304
  pyyaml==6.0.3
305
+ # via
306
+ # gradio
307
+ # huggingface-hub
308
+ # langchain-classic
309
+ # langchain-community
310
+ # langchain-core
311
  rank-bm25==0.2.2
312
+ # via hier-rag (pyproject.toml)
313
+ referencing==0.37.0
314
+ # via
315
+ # jsonschema
316
+ # jsonschema-specifications
317
  regex==2025.10.23
318
+ # via tiktoken
319
  requests==2.32.5
320
+ # via
321
+ # google-api-core
322
+ # langchain-classic
323
+ # langchain-community
324
+ # langsmith
325
+ # requests-toolbelt
326
+ # tiktoken
327
  requests-toolbelt==1.0.0
328
+ # via langsmith
329
  rich==14.2.0
330
+ # via typer
331
+ rpds-py==0.28.0
332
+ # via
333
+ # jsonschema
334
+ # referencing
335
  rsa==4.9.1
336
+ # via google-auth
337
+ ruff==0.14.3
338
+ # via gradio
339
  safehttpx==0.1.7
340
+ # via gradio
341
+ scikit-learn==1.7.2
342
+ # via hier-rag (pyproject.toml)
343
+ scipy==1.16.3
344
+ # via scikit-learn
345
  semantic-version==2.10.0
346
+ # via gradio
347
  setuptools==80.9.0
348
+ # via pymilvus
349
  shellingham==1.5.4
350
+ # via
351
+ # huggingface-hub
352
+ # typer
353
  six==1.17.0
354
+ # via python-dateutil
355
  sniffio==1.3.1
356
+ # via
357
+ # anyio
358
+ # openai
359
  sqlalchemy==2.0.44
360
+ # via
361
+ # langchain-classic
362
+ # langchain-community
363
+ sse-starlette==3.0.3
364
+ # via mcp
365
+ starlette==0.49.3
366
+ # via
367
+ # fastapi
368
+ # gradio
369
+ # mcp
370
  tenacity==9.1.2
371
+ # via
372
+ # langchain-community
373
+ # langchain-core
374
+ threadpoolctl==3.6.0
375
+ # via scikit-learn
376
  tiktoken==0.12.0
377
+ # via langchain-openai
378
  tomlkit==0.13.3
379
+ # via gradio
380
  tqdm==4.67.1
381
+ # via
382
+ # huggingface-hub
383
+ # milvus-lite
384
+ # openai
385
  typer==0.20.0
386
+ # via gradio
387
  typer-slim==0.20.0
388
+ # via huggingface-hub
389
  typing-extensions==4.15.0
390
+ # via
391
+ # fastapi
392
+ # gradio
393
+ # gradio-client
394
+ # grpcio
395
+ # huggingface-hub
396
+ # langchain-core
397
+ # openai
398
+ # pydantic
399
+ # pydantic-core
400
+ # sqlalchemy
401
+ # typer
402
+ # typer-slim
403
+ # typing-inspect
404
+ # typing-inspection
405
  typing-inspect==0.9.0
406
+ # via dataclasses-json
407
  typing-inspection==0.4.2
408
+ # via
409
+ # pydantic
410
+ # pydantic-settings
411
  tzdata==2025.2
412
+ # via pandas
413
+ urllib3==2.5.0
414
+ # via requests
415
  uvicorn==0.38.0
416
+ # via
417
+ # gradio
418
+ # mcp
419
  websockets==15.0.1
420
+ # via gradio-client
421
  xxhash==3.6.0
422
+ # via langgraph
423
  yarl==1.22.0
424
+ # via aiohttp
425
  zstandard==0.25.0
426
+ # via langsmith
{core β†’ src}/__init__.py RENAMED
File without changes
app.py β†’ src/app.py RENAMED
@@ -1,18 +1,25 @@
1
  import gradio as gr
2
  import time
3
- from pathlib import Path
4
- from core.ingest import ingest
5
- from core.retrieval import generate, retrieval
6
- from core.index import MetaData
7
  import yaml
 
 
 
 
 
 
 
 
 
8
 
9
- # Import evaluation functions
10
- from core.eval import (
 
 
 
11
  run_full_evaluation,
12
  save_results,
13
  generate_summary_report,
14
  setup_test_data,
15
- EVAL_QUERIES
16
  )
17
 
18
 
@@ -183,12 +190,12 @@ def setup_synthetic_data(collections):
183
 
184
  try:
185
  docs_length = setup_test_data(collections)
186
- return f"βœ… Successfully ingested {docs_length} synthetic test data for each: {', '.join(collections)}"
187
  except Exception as e:
188
  return f"❌ Error setting up test data: {str(e)}"
189
 
190
 
191
- def run_evaluation_batch(collections, output_dir):
192
  """Run full batch evaluation"""
193
  if not collections:
194
  return (
@@ -248,11 +255,6 @@ def run_evaluation_batch(collections, output_dir):
248
  f"Error: {str(e)}"
249
  )
250
 
251
- def get_predefined_queries_list():
252
- """Get list of predefined queries for dropdown"""
253
- return [""] + [f"{i}: {q.model_dump()}" for i, q in enumerate(EVAL_QUERIES)]
254
-
255
-
256
  # --- Static choices (not from YAML) ---
257
  LANG_CHOICES = ["en", "ja"]
258
  DOC_TYPE_CHOICES = [None, "policy", "manual", "faq"]
@@ -423,12 +425,12 @@ with gr.Blocks(theme=gr.themes.Soft(), title="RAG Evaluation System") as demo:
423
  with gr.Tab("πŸ§ͺ Evaluation"):
424
 
425
 
426
- gr.Markdown("""
427
  ### Run Complete Evaluation
428
 
429
  This will:
430
- 1. Initial ingest synthetic test data (60 documents)
431
- 2. Run 15 predefined evaluation queries
432
  3. Generate comprehensive reports (CSV, JSON, Markdown)
433
  4. Compare Base RAG vs Hierarchical RAG
434
  """)
@@ -447,6 +449,11 @@ with gr.Blocks(theme=gr.themes.Soft(), title="RAG Evaluation System") as demo:
447
  value="reports",
448
  info="Directory where evaluation reports will be saved"
449
  )
 
 
 
 
 
450
 
451
  with gr.Row():
452
  setup_data_btn = gr.Button(
@@ -501,7 +508,8 @@ with gr.Blocks(theme=gr.themes.Soft(), title="RAG Evaluation System") as demo:
501
  csv_download,
502
  json_download,
503
  eval_summary_md
504
- ]
 
505
  )
506
 
507
  # --- Event Handlers ---
@@ -537,4 +545,4 @@ with gr.Blocks(theme=gr.themes.Soft(), title="RAG Evaluation System") as demo:
537
 
538
 
539
  if __name__ == "__main__":
540
- demo.launch()
 
1
  import gradio as gr
2
  import time
 
 
 
 
3
  import yaml
4
+ import sys
5
+ from pathlib import Path
6
+ from dataclasses import asdict
7
+
8
+ # Ensure project root is on sys.path when running this module as a script.
9
+ _project_root = Path(__file__).resolve().parents[1]
10
+ if str(_project_root) not in sys.path:
11
+ sys.path.insert(0, str(_project_root))
12
+
13
 
14
+ from src.core.ingest import ingest
15
+ from src.core.retrieval import generate, retrieval
16
+ from src.core.index import MetaData
17
+ from src.core.synthetic_data import EVAL_QUERIES, SYNTHETIC_DOCUMENTS
18
+ from src.core.eval import (
19
  run_full_evaluation,
20
  save_results,
21
  generate_summary_report,
22
  setup_test_data,
 
23
  )
24
 
25
 
 
190
 
191
  try:
192
  docs_length = setup_test_data(collections)
193
+ return f"βœ… Successfully ingested {docs_length} synthetic test data for: {', '.join(collections)}"
194
  except Exception as e:
195
  return f"❌ Error setting up test data: {str(e)}"
196
 
197
 
198
+ def run_evaluation_batch(collections, output_dir, progress=gr.Progress(track_tqdm=True)):
199
  """Run full batch evaluation"""
200
  if not collections:
201
  return (
 
255
  f"Error: {str(e)}"
256
  )
257
 
 
 
 
 
 
258
  # --- Static choices (not from YAML) ---
259
  LANG_CHOICES = ["en", "ja"]
260
  DOC_TYPE_CHOICES = [None, "policy", "manual", "faq"]
 
425
  with gr.Tab("πŸ§ͺ Evaluation"):
426
 
427
 
428
+ gr.Markdown(f"""
429
  ### Run Complete Evaluation
430
 
431
  This will:
432
+ 1. Initial ingest synthetic test data ({sum(len(docs) for docs in SYNTHETIC_DOCUMENTS.values())} documents)
433
+ 2. Run {len(EVAL_QUERIES)} predefined evaluation queries
434
  3. Generate comprehensive reports (CSV, JSON, Markdown)
435
  4. Compare Base RAG vs Hierarchical RAG
436
  """)
 
449
  value="reports",
450
  info="Directory where evaluation reports will be saved"
451
  )
452
+
453
+ with gr.Accordion("SYNTHETIC_DOCUMENTS", open=False):
454
+ gr.JSON(value=SYNTHETIC_DOCUMENTS)
455
+ with gr.Accordion("EVAL_QUERIES", open=False):
456
+ gr.JSON(value=[asdict(q) for q in EVAL_QUERIES])
457
 
458
  with gr.Row():
459
  setup_data_btn = gr.Button(
 
508
  csv_download,
509
  json_download,
510
  eval_summary_md
511
+ ],
512
+ show_progress="full"
513
  )
514
 
515
  # --- Event Handlers ---
 
545
 
546
 
547
  if __name__ == "__main__":
548
+ demo.launch(mcp_server=True)
src/core/__init__.py ADDED
File without changes
{core β†’ src/core}/eval.py RENAMED
@@ -7,6 +7,8 @@ import json
7
  import csv
8
  import time
9
  import uuid
 
 
10
  from pathlib import Path
11
  from typing import List, Dict
12
  from datetime import datetime
@@ -190,27 +192,19 @@ def run_full_evaluation(
190
 
191
  # Filter queries by requested collections
192
  queries_to_eval = [q for q in EVAL_QUERIES if q.collection in collections]
193
-
194
  print(f"\n{'='*70}")
195
  print(f"Starting Evaluation: {len(queries_to_eval)} queries across {len(collections)} collections")
196
  print(f"{'='*70}\n")
197
 
198
- for i, eval_query in enumerate(queries_to_eval, 1):
199
- print(f"[{i}/{len(queries_to_eval)}] Evaluating: {eval_query.description}")
200
- print(f" Collection: {eval_query.collection}")
201
- print(f" Query: {eval_query.query[:60]}...")
202
-
203
  # Evaluate with base RAG
204
- print(" - Running Base RAG...")
205
  base_result = evaluate_single_query(eval_query, "base")
206
  all_results["base"].append(base_result)
207
 
208
  # Evaluate with hierarchical RAG
209
- print(" - Running Hierarchical RAG...")
210
  hier_result = evaluate_single_query(eval_query, "hierarchical")
211
  all_results["hierarchical"].append(hier_result)
212
-
213
- print(f" βœ“ Complete (Base: {base_result.total_latency_ms:.0f}ms, Hier: {hier_result.total_latency_ms:.0f}ms)\n")
214
 
215
  return all_results
216
 
@@ -408,8 +402,10 @@ def generate_summary_report(results: Dict[str, List[EvalResult]], output_dir: st
408
  f.write("## Detailed Query Results\n\n")
409
 
410
  # Sample queries with comparison
411
- for i, (base_r, hier_r) in enumerate(zip(base_results[:5], hier_results[:5]), 1):
412
  f.write(f"### Query {i}: {base_r.query}\n\n")
 
 
413
  f.write(f"**Collection:** {base_r.collection}\n\n")
414
 
415
  f.write("| Aspect | Base RAG | Hierarchical RAG |\n")
@@ -431,7 +427,7 @@ def setup_test_data(collections: List[str] = None):
431
  print("\n" + "="*70)
432
  print("Setting up test data for evaluation")
433
  print("="*70 + "\n")
434
-
435
  for collection_name in collections:
436
  if collection_name not in SYNTHETIC_DOCUMENTS:
437
  print(f"⚠️ No synthetic data available for '{collection_name}', skipping...")
@@ -450,11 +446,11 @@ def setup_test_data(collections: List[str] = None):
450
  vectorstore = get_vectorstore(collection_name)
451
  ids = [str(uuid.uuid4()) for _ in range(len(documents))]
452
  vectorstore.add_documents(documents, ids=ids)
453
-
454
  print(f"βœ“ Completed '{collection_name}' collection")
455
 
456
  print("\n" + "="*70)
457
  print("Test data setup complete!")
458
  print("="*70 + "\n")
459
 
460
- return len(documents)
 
7
  import csv
8
  import time
9
  import uuid
10
+ from tqdm import tqdm
11
+ from random import shuffle
12
  from pathlib import Path
13
  from typing import List, Dict
14
  from datetime import datetime
 
192
 
193
  # Filter queries by requested collections
194
  queries_to_eval = [q for q in EVAL_QUERIES if q.collection in collections]
195
+ shuffle(queries_to_eval)
196
  print(f"\n{'='*70}")
197
  print(f"Starting Evaluation: {len(queries_to_eval)} queries across {len(collections)} collections")
198
  print(f"{'='*70}\n")
199
 
200
+ for eval_query in tqdm(queries_to_eval, desc="Running evaluation queries"):
 
 
 
 
201
  # Evaluate with base RAG
 
202
  base_result = evaluate_single_query(eval_query, "base")
203
  all_results["base"].append(base_result)
204
 
205
  # Evaluate with hierarchical RAG
 
206
  hier_result = evaluate_single_query(eval_query, "hierarchical")
207
  all_results["hierarchical"].append(hier_result)
 
 
208
 
209
  return all_results
210
 
 
402
  f.write("## Detailed Query Results\n\n")
403
 
404
  # Sample queries with comparison
405
+ for i, (base_r, hier_r) in enumerate(zip(base_results[:20], hier_results[:20]), 1):
406
  f.write(f"### Query {i}: {base_r.query}\n\n")
407
+ f.write(f"### Base Response {i}:\n{base_r.generated_answer}\n\n")
408
+ f.write(f"### Hier Response {i}:\n{hier_r.generated_answer}\n\n")
409
  f.write(f"**Collection:** {base_r.collection}\n\n")
410
 
411
  f.write("| Aspect | Base RAG | Hierarchical RAG |\n")
 
427
  print("\n" + "="*70)
428
  print("Setting up test data for evaluation")
429
  print("="*70 + "\n")
430
+ tot_docs = 0
431
  for collection_name in collections:
432
  if collection_name not in SYNTHETIC_DOCUMENTS:
433
  print(f"⚠️ No synthetic data available for '{collection_name}', skipping...")
 
446
  vectorstore = get_vectorstore(collection_name)
447
  ids = [str(uuid.uuid4()) for _ in range(len(documents))]
448
  vectorstore.add_documents(documents, ids=ids)
449
+ tot_docs += len(documents)
450
  print(f"βœ“ Completed '{collection_name}' collection")
451
 
452
  print("\n" + "="*70)
453
  print("Test data setup complete!")
454
  print("="*70 + "\n")
455
 
456
+ return tot_docs
{core β†’ src/core}/index.py RENAMED
@@ -23,7 +23,7 @@ class MetaData(BaseModel):
23
  model = ChatOpenAI(model="gpt-5-nano")
24
  emb_model = OpenAIEmbeddings(model="text-embedding-3-small", dimensions=1536)
25
 
26
- MILVUS_URI = "./rag_task.db"
27
 
28
 
29
  def get_vectorstore(collection_name: str) -> Milvus:
 
23
  model = ChatOpenAI(model="gpt-5-nano")
24
  emb_model = OpenAIEmbeddings(model="text-embedding-3-small", dimensions=1536)
25
 
26
+ MILVUS_URI = "./data/rag_task.db"
27
 
28
 
29
  def get_vectorstore(collection_name: str) -> Milvus:
{core β†’ src/core}/ingest.py RENAMED
@@ -1,4 +1,4 @@
1
- from langchain_community.document_loaders import PDFMinerLoader
2
  from langchain_text_splitters import RecursiveCharacterTextSplitter
3
  from langchain_core.documents import Document
4
  from langchain_openai import ChatOpenAI
@@ -17,7 +17,10 @@ model = ChatOpenAI(model="gpt-5-nano")
17
  def ingest(file_paths: List[str], collection_name: str, metadata: MetaData):
18
  documents: list[Document] = []
19
  for file_path in file_paths:
20
- docs = PDFMinerLoader(file_path).load()
 
 
 
21
  documents.extend(docs)
22
  for doc in docs:
23
  doc.metadata["source"] = file_path.split("/")[-1]
@@ -39,7 +42,6 @@ def ingest(file_paths: List[str], collection_name: str, metadata: MetaData):
39
  "doc_id": doc_id,
40
  "chunk_id": str(uuid.uuid4()),
41
  "source_name": chunk.metadata["source"],
42
- "total_pages": chunk.metadata["total_pages"],
43
  "start_index": chunk.metadata["start_index"],
44
  **metadata.model_dump(),
45
  },
 
1
+ from langchain_community.document_loaders import PDFMinerLoader,TextLoader
2
  from langchain_text_splitters import RecursiveCharacterTextSplitter
3
  from langchain_core.documents import Document
4
  from langchain_openai import ChatOpenAI
 
17
  def ingest(file_paths: List[str], collection_name: str, metadata: MetaData):
18
  documents: list[Document] = []
19
  for file_path in file_paths:
20
+ if file_path.endswith(".txt"):
21
+ docs = TextLoader(file_path, encoding="utf-8").load()
22
+ elif file_path.endswith(".pdf"):
23
+ docs = PDFMinerLoader(file_path).load()
24
  documents.extend(docs)
25
  for doc in docs:
26
  doc.metadata["source"] = file_path.split("/")[-1]
 
42
  "doc_id": doc_id,
43
  "chunk_id": str(uuid.uuid4()),
44
  "source_name": chunk.metadata["source"],
 
45
  "start_index": chunk.metadata["start_index"],
46
  **metadata.model_dump(),
47
  },
src/core/rag.ipynb ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "a57aab57",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "# from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI\n",
11
+ "from langchain_openai import ChatOpenAI\n",
12
+ "from langchain_openai.embeddings import OpenAIEmbeddings\n",
13
+ "from langchain_milvus import Milvus, BM25BuiltInFunction\n",
14
+ "from typing import Literal, Optional\n",
15
+ "from pydantic import BaseModel\n",
16
+ "from dotenv import load_dotenv, find_dotenv\n",
17
+ "\n",
18
+ "find_dotenv()\n",
19
+ "load_dotenv()\n",
20
+ "\n",
21
+ "\n",
22
+ "class MetaData(BaseModel):\n",
23
+ " language: Literal[\"ja\", \"en\"]\n",
24
+ " domain: Optional[str] = None\n",
25
+ " section: Optional[str] = None\n",
26
+ " topic: Optional[str] = None\n",
27
+ " doc_type: Optional[Literal[\"policy\", \"manual\", \"faq\"]] = None\n",
28
+ "\n",
29
+ "\n",
30
+ "# model = ChatGoogleGenerativeAI(model=\"models/gemini-2.5-flash-lite\")\n",
31
+ "# emb_model = GoogleGenerativeAIEmbeddings(model=\"models/gemini-embedding-001\", output_dimensionality=1536)\n",
32
+ "model = ChatOpenAI(model=\"gpt-5-nano\")\n",
33
+ "emb_model = OpenAIEmbeddings(model=\"text-embedding-3-small\", dimensions=1536)\n",
34
+ "\n",
35
+ "MILVUS_URI = \"./rag_task.db\"\n",
36
+ "\n",
37
+ "\n",
38
+ "def get_vectorstore(collection_name: str) -> Milvus:\n",
39
+ " vectorstore = Milvus(\n",
40
+ " embedding_function=emb_model,\n",
41
+ " collection_name=collection_name,\n",
42
+ " connection_args={\"uri\": MILVUS_URI},\n",
43
+ " index_params={\"index_type\": \"FLAT\", \"metric_type\": \"L2\"},\n",
44
+ " )\n",
45
+ " # builtin_function=BM25BuiltInFunction(output_field_names=\"sparse\"),\n",
46
+ " # text_field=\"text\",\n",
47
+ " # vector_field=[\"dense\", \"sparse\"],\n",
48
+ " print(f\"vectorstore successfully initialized for {collection_name}\")\n",
49
+ " return vectorstore\n"
50
+ ]
51
+ },
52
+ {
53
+ "cell_type": "code",
54
+ "execution_count": 3,
55
+ "id": "db72701e",
56
+ "metadata": {},
57
+ "outputs": [],
58
+ "source": [
59
+ "import re\n",
60
+ "\n",
61
+ "\n",
62
+ "def mask_pii(text: str) -> str:\n",
63
+ " \"\"\"Mask Personally Identifiable Information\"\"\"\n",
64
+ " # Email addresses\n",
65
+ " text = re.sub(r'\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}\\b', '[EMAIL]', text)\n",
66
+ " \n",
67
+ " # Phone numbers\n",
68
+ " text = re.sub(r'\\b\\d{3}[-.]?\\d{3}[-.]?\\d{4}\\b', '[PHONE]', text)\n",
69
+ " \n",
70
+ " # Credit card numbers\n",
71
+ " text = re.sub(r'\\b\\d{4}[- ]?\\d{4}[- ]?\\d{4}[- ]?\\d{4}\\b', '[CREDIT_CARD]', text)\n",
72
+ " \n",
73
+ " # Social Security Numbers\n",
74
+ " text = re.sub(r'\\b\\d{3}-\\d{2}-\\d{4}\\b', '[SSN]', text)\n",
75
+ " \n",
76
+ " return text\n"
77
+ ]
78
+ },
79
+ {
80
+ "cell_type": "code",
81
+ "execution_count": 4,
82
+ "id": "f6037cfd",
83
+ "metadata": {},
84
+ "outputs": [],
85
+ "source": [
86
+ "from langchain_community.document_loaders import PDFMinerLoader\n",
87
+ "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
88
+ "from langchain_core.documents import Document\n",
89
+ "from langchain_openai import ChatOpenAI\n",
90
+ "from dotenv import load_dotenv, find_dotenv\n",
91
+ "from typing import List\n",
92
+ "import uuid\n",
93
+ "\n",
94
+ "\n",
95
+ "find_dotenv()\n",
96
+ "load_dotenv()\n",
97
+ "\n",
98
+ "model = ChatOpenAI(model=\"gpt-5-nano\")\n",
99
+ "\n",
100
+ "\n",
101
+ "def ingest(file_paths: List[str], collection_name: str, metadata: MetaData):\n",
102
+ " documents: list[Document] = []\n",
103
+ " for file_path in file_paths:\n",
104
+ " docs = PDFMinerLoader(file_path).load()\n",
105
+ " documents.extend(docs)\n",
106
+ " for doc in docs:\n",
107
+ " doc.metadata[\"source\"] = file_path.split(\"/\")[-1]\n",
108
+ " \n",
109
+ " print(f\"loaded {len(documents)} documents from {len(file_paths)} files.\")\n",
110
+ " text_splitter = RecursiveCharacterTextSplitter(\n",
111
+ " chunk_size=1200, # chunk size (characters)\n",
112
+ " chunk_overlap=200, # chunk overlap (characters)\n",
113
+ " add_start_index=True, # track index in original document\n",
114
+ " )\n",
115
+ " chunks = text_splitter.split_documents(documents)\n",
116
+ " print(f\"generated {len(chunks)} chunks.\")\n",
117
+ "\n",
118
+ " doc_id = str(uuid.uuid4())\n",
119
+ " docs = [\n",
120
+ " Document(\n",
121
+ " page_content=mask_pii(chunk.page_content),\n",
122
+ " metadata={\n",
123
+ " \"doc_id\": doc_id,\n",
124
+ " \"chunk_id\": str(uuid.uuid4()),\n",
125
+ " \"source_name\": chunk.metadata[\"source\"],\n",
126
+ " \"total_pages\": chunk.metadata[\"total_pages\"],\n",
127
+ " \"start_index\": chunk.metadata[\"start_index\"],\n",
128
+ " **metadata.model_dump(),\n",
129
+ " },\n",
130
+ " )\n",
131
+ " for chunk in chunks\n",
132
+ " ]\n",
133
+ "\n",
134
+ " vectorstore = get_vectorstore(collection_name)\n",
135
+ " ids = [str(uuid.uuid4()) for _ in range(len(docs))]\n",
136
+ " vectorstore.add_documents(docs, ids=ids)\n",
137
+ " success_message = f\"Ingested {len(docs)} documents into {collection_name} index.\"\n",
138
+ " print(success_message)\n",
139
+ " return success_message\n"
140
+ ]
141
+ },
142
+ {
143
+ "cell_type": "code",
144
+ "execution_count": 5,
145
+ "id": "92a1751f",
146
+ "metadata": {},
147
+ "outputs": [],
148
+ "source": [
149
+ "from langchain_core.documents import Document\n",
150
+ "from langchain_openai import ChatOpenAI\n",
151
+ "from langchain_community.retrievers import BM25Retriever\n",
152
+ "from dotenv import load_dotenv, find_dotenv\n",
153
+ "from typing import List\n",
154
+ "\n",
155
+ "find_dotenv()\n",
156
+ "load_dotenv()\n",
157
+ "\n",
158
+ "model = ChatOpenAI(model=\"gpt-5-nano\")\n",
159
+ "\n",
160
+ "\n",
161
+ "def reranker(query: str, docs: List[Document]) -> List[Document]:\n",
162
+ " print(f\"Retrieved {len(docs)} documents\")\n",
163
+ " retriever = BM25Retriever.from_documents(docs)\n",
164
+ " result = retriever.invoke(query)\n",
165
+ " print(\"RERANKER Result: \", len(result), result[0])\n",
166
+ " return result\n",
167
+ "\n",
168
+ "\n",
169
+ "def retrieval(\n",
170
+ " query: str, collection_name: str, filter_data: MetaData\n",
171
+ ") -> List[tuple[Document, float]]:\n",
172
+ " vectorstore = get_vectorstore(collection_name)\n",
173
+ " print(\n",
174
+ " f\"RETRIEVAL query: {query[:40]}, for {collection_name} collection, with filters: {filter_data}\"\n",
175
+ " )\n",
176
+ "\n",
177
+ " filters = [f'language == \"{filter_data.language}\"']\n",
178
+ " if filter_data.doc_type:\n",
179
+ " filters.append(f'doc_type == \"{filter_data.doc_type}\"')\n",
180
+ " if filter_data.domain:\n",
181
+ " filters.append(f'domain == \"{filter_data.domain}\"')\n",
182
+ " if filter_data.section:\n",
183
+ " filters.append(f'section == \"{filter_data.section}\"')\n",
184
+ " if filter_data.topic:\n",
185
+ " filters.append(f'topic == \"{filter_data.topic}\"')\n",
186
+ "\n",
187
+ " expr = \" and \".join(filters) if filters else None\n",
188
+ " try:\n",
189
+ " results = vectorstore.similarity_search_with_relevance_scores(\n",
190
+ " query, k=5, expr=expr\n",
191
+ " )\n",
192
+ " except ValueError as e:\n",
193
+ " print(f\"Error in retrieval: {str(e)}\")\n",
194
+ " return []\n",
195
+ " docs = []\n",
196
+ " for doc, score in results:\n",
197
+ " doc.metadata[\"similarity_score\"] = score\n",
198
+ " docs.append(doc)\n",
199
+ " # docs = reranker(query, docs)\n",
200
+ " print(\"RETRIEVED DOCS: \", len(docs))\n",
201
+ " return docs\n",
202
+ "\n",
203
+ "\n",
204
+ "def generate(query: str, ctx_docs: List[Document]) -> str:\n",
205
+ " context = \"\\n\".join([doc.page_content for doc in ctx_docs])\n",
206
+ " prompt = f\"\"\"Answer shortly to the user question according to the given context. Only answer if the context is given to you.\n",
207
+ " question: {query}\n",
208
+ " context: {context}\n",
209
+ "\"\"\"\n",
210
+ " output = model.invoke(prompt)\n",
211
+ " return output.content\n"
212
+ ]
213
+ },
214
+ {
215
+ "cell_type": "code",
216
+ "execution_count": null,
217
+ "id": "4fb1e93f",
218
+ "metadata": {},
219
+ "outputs": [],
220
+ "source": []
221
+ }
222
+ ],
223
+ "metadata": {
224
+ "kernelspec": {
225
+ "display_name": "hier-rag",
226
+ "language": "python",
227
+ "name": "python3"
228
+ },
229
+ "language_info": {
230
+ "codemirror_mode": {
231
+ "name": "ipython",
232
+ "version": 3
233
+ },
234
+ "file_extension": ".py",
235
+ "mimetype": "text/x-python",
236
+ "name": "python",
237
+ "nbconvert_exporter": "python",
238
+ "pygments_lexer": "ipython3",
239
+ "version": "3.13.3"
240
+ }
241
+ },
242
+ "nbformat": 4,
243
+ "nbformat_minor": 5
244
+ }
{core β†’ src/core}/retrieval.py RENAMED
@@ -13,10 +13,12 @@ model = ChatOpenAI(model="gpt-5-nano")
13
 
14
  def reranker(query: str, docs: List[Document]) -> List[Document]:
15
  print(f"Retrieved {len(docs)} documents")
 
 
16
  retriever = BM25Retriever.from_documents(docs)
17
- result = retriever.invoke(query)
18
- print("RERANKER Result: ", len(result), result[0])
19
- return result
20
 
21
 
22
  def retrieval(
 
13
 
14
  def reranker(query: str, docs: List[Document]) -> List[Document]:
15
  print(f"Retrieved {len(docs)} documents")
16
+ if len(docs) <= 1:
17
+ return docs
18
  retriever = BM25Retriever.from_documents(docs)
19
+ docs = retriever.invoke(query)
20
+ print("RERANKER Result: ", len(docs))
21
+ return docs
22
 
23
 
24
  def retrieval(
{core β†’ src/core}/synthetic_data.py RENAMED
The diff for this file is too large to render. See raw diff
 
{core β†’ src/core}/utils.py RENAMED
@@ -7,7 +7,7 @@ def mask_pii(text: str) -> str:
7
  text = re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL]', text)
8
 
9
  # Phone numbers
10
- text = re.sub(r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b', '[PHONE]', text)
11
 
12
  # Credit card numbers
13
  text = re.sub(r'\b\d{4}[- ]?\d{4}[- ]?\d{4}[- ]?\d{4}\b', '[CREDIT_CARD]', text)
 
7
  text = re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL]', text)
8
 
9
  # Phone numbers
10
+ text = re.sub(r'\b(?:\d{3}[-.]?\d{4}|\d{3}[-.]?\d{3}[-.]?\d{4})\b', '[PHONE]', text)
11
 
12
  # Credit card numbers
13
  text = re.sub(r'\b\d{4}[- ]?\d{4}[- ]?\d{4}[- ]?\d{4}\b', '[CREDIT_CARD]', text)