akryldigital commited on
Commit
27e8dcc
·
verified ·
1 Parent(s): fa33a8f
Files changed (1) hide show
  1. app.py +235 -193
app.py CHANGED
@@ -3,6 +3,11 @@ Intelligent Audit Report Chatbot UI
3
  """
4
 
5
  import os
 
 
 
 
 
6
 
7
  import time
8
  import json
@@ -21,9 +26,21 @@ import plotly.express as px
21
  from langchain_core.messages import HumanMessage, AIMessage
22
 
23
 
24
- from src.agents import get_multi_agent_chatbot, get_smart_chatbot, get_gemini_chatbot
 
 
 
 
 
 
25
  from src.feedback import FeedbackManager
26
- from src.ui_components import get_custom_css, display_chunk_statistics_charts, display_chunk_statistics_table, extract_chunk_statistics
 
 
 
 
 
 
27
 
28
  from src.config.paths import (
29
  IS_DEPLOYED,
@@ -83,10 +100,11 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
83
  logger = logging.getLogger(__name__)
84
 
85
  # Log environment setup for debugging
86
- logger.info(f"📁 PROJECT_DIR: {PROJECT_DIR}")
87
- logger.info(f"🌍 Environment: {'DEPLOYED' if IS_DEPLOYED else 'LOCAL'}")
88
- logger.info(f"🔧 OMP_NUM_THREADS: {os.environ.get('OMP_NUM_THREADS', 'NOT SET')}")
89
- logger.info(f"📁 HuggingFace cache: {os.environ.get('HF_HOME', 'DEFAULT (not overridden)')}")
 
90
 
91
 
92
  # Page config
@@ -98,21 +116,22 @@ st.set_page_config(
98
  )
99
 
100
 
 
101
  import torch, sys
102
- try:
103
- cuda_ = torch.cuda.is_available()
104
- print("CUDA:", cuda_)
105
- if cuda_:
106
- if "gpu_check" not in st.session_state:
107
- st.write(f"Device: {torch.cuda.get_device_name(0)}")
108
- print("Device:", torch.cuda.get_device_name(0))
109
- except Exception as e:
110
- if "gpu_check" not in st.session_state:
111
- st.write(f"GPU check skipped: {e.__str__()}")
112
- traceback.print_exc()
113
- print("GPU check skipped:", e, file=sys.stderr)
114
- finally:
115
- st.session_state.gpu_check = True
116
 
117
 
118
  st.markdown(get_custom_css(), unsafe_allow_html=True)
@@ -130,6 +149,9 @@ def get_chatbot(version: str = "v1"):
130
  """Initialize and return the chatbot based on version"""
131
  if version == "beta":
132
  return get_gemini_chatbot()
 
 
 
133
  else:
134
  # Check environment variable for system type (v1)
135
  system = os.environ.get('CHATBOT_SYSTEM', 'multi-agent')
@@ -209,7 +231,7 @@ def main():
209
  # Track RAG retrieval history for feedback
210
  if 'rag_retrieval_history' not in st.session_state:
211
  st.session_state.rag_retrieval_history = []
212
- # Version selection (v1 or beta)
213
  if 'chatbot_version' not in st.session_state:
214
  st.session_state.chatbot_version = "v1"
215
 
@@ -226,7 +248,9 @@ def main():
226
  try:
227
  # Different spinner messages for different versions
228
  if st.session_state.chatbot_version == "beta":
229
- spinner_msg = "🔄 Initializing Gemini FSA"
 
 
230
  else:
231
  spinner_msg = "🔄 Loading AI models and connecting to database..."
232
 
@@ -237,9 +261,14 @@ def main():
237
  print("✅ AI system ready!")
238
  except Exception as e:
239
  st.error(f"❌ Failed to initialize chatbot: {str(e)}")
240
- # Only show Gemini-specific error message for beta version
241
  if st.session_state.chatbot_version == "beta":
242
  st.error("Please check your environment variables (GEMINI_API_KEY, GEMINI_FILESTORE_NAME for beta)")
 
 
 
 
 
243
  else:
244
  st.error("Please check your configuration and ensure all required models and databases are accessible.")
245
  # Reset to v1 to prevent infinite loop
@@ -271,11 +300,11 @@ def main():
271
  st.markdown("<br>", unsafe_allow_html=True) # Add some spacing
272
  selected_version = st.radio(
273
  "**Version:**",
274
- options=["v1", "beta"],
275
- index=0 if st.session_state.chatbot_version == "v1" else 1,
276
  horizontal=True,
277
  key="version_selector",
278
- help="Select v1 (default RAG system) or beta (Gemini FSA)"
279
  )
280
 
281
  # Update version if changed
@@ -299,6 +328,8 @@ def main():
299
  # Show version info
300
  if st.session_state.chatbot_version == "beta":
301
  st.info("🔬 **Beta Mode**: Using Google Gemini FSA")
 
 
302
 
303
  # Session info
304
  duration = int(time.time() - st.session_state.session_start_time)
@@ -315,7 +346,7 @@ def main():
315
  # Sidebar for filters
316
  with st.sidebar:
317
  # Instructions section (collapsible)
318
- with st.expander("📖 How to Use", expanded=False):
319
  st.markdown("""
320
  #### 🎯 Using Filters
321
 
@@ -342,74 +373,73 @@ def main():
342
  For more detailed help, see the example questions at the bottom of the page.
343
  """)
344
 
345
- st.markdown("### 🔍 Search Filters")
346
- st.markdown("Select filters to narrow down your search. Leave empty to search all data.")
347
-
348
- st.markdown('<div class="filter-section">', unsafe_allow_html=True)
349
- st.markdown('<div class="filter-title">📄 Specific Reports (Filename Filter)</div>', unsafe_allow_html=True)
350
- st.markdown('<p style="font-size: 0.85em; color: #666;">⚠️ Selecting specific reports will ignore all other filters</p>', unsafe_allow_html=True)
351
- selected_filenames = st.multiselect(
352
- "Select specific reports:",
353
- options=filter_options.get('filenames', []),
354
- default=st.session_state.active_filters.get('filenames', []),
355
- key="filenames_filter",
356
- help="Choose specific reports to search. When enabled, all other filters are ignored."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
357
  )
358
- st.markdown('</div>', unsafe_allow_html=True)
359
-
360
- # Determine if filename filter is active
361
- filename_mode = len(selected_filenames) > 0
362
- # Sources filter
363
- # st.markdown('<div class="filter-section">', unsafe_allow_html=True)
364
- st.markdown('<div class="filter-title">📊 Sources</div>', unsafe_allow_html=True)
365
- selected_sources = st.multiselect(
366
- "Select sources:",
367
- options=filter_options['sources'],
368
- default=st.session_state.active_filters['sources'],
369
- disabled = filename_mode,
370
- key="sources_filter",
371
- help="Choose which types of reports to search"
372
- )
373
- st.markdown('</div>', unsafe_allow_html=True)
374
-
375
- # Years filter
376
- # st.markdown('<div class="filter-section">', unsafe_allow_html=True)
377
- st.markdown('<div class="filter-title">📅 Years</div>', unsafe_allow_html=True)
378
- selected_years = st.multiselect(
379
- "Select years:",
380
- options=filter_options['years'],
381
- default=st.session_state.active_filters['years'],
382
- disabled = filename_mode,
383
- key="years_filter",
384
- help="Choose which years to search"
385
- )
386
- st.markdown('</div>', unsafe_allow_html=True)
387
-
388
- # Districts filter
389
- # st.markdown('<div class="filter-section">', unsafe_allow_html=True)
390
- st.markdown('<div class="filter-title">🏘️ Districts</div>', unsafe_allow_html=True)
391
- selected_districts = st.multiselect(
392
- "Select districts:",
393
- options=filter_options['districts'],
394
- default=st.session_state.active_filters['districts'],
395
- disabled = filename_mode,
396
- key="districts_filter",
397
- help="Choose which districts to search"
398
- )
399
- st.markdown('</div>', unsafe_allow_html=True)
400
 
401
- # Update active filters
402
  st.session_state.active_filters = {
403
  'sources': selected_sources if not filename_mode else [],
404
  'years': selected_years if not filename_mode else [],
405
  'districts': selected_districts if not filename_mode else [],
406
  'filenames': selected_filenames
407
  }
408
-
409
- # Clear filters button
410
- if st.button("🗑️ Clear All Filters", key="clear_filters_button"):
411
- st.session_state.active_filters = {'sources': [], 'years': [], 'districts': [], 'filenames': []}
412
- st.rerun()
413
 
414
  # Main content area with tabs
415
  tab1, tab2 = st.tabs(["💬 Chat", "📄 Retrieved Documents"])
@@ -593,7 +623,7 @@ def main():
593
  # PipelineResult object format
594
  sources = rag_result.sources
595
  elif isinstance(rag_result, dict) and 'sources' in rag_result:
596
- # Dictionary format from multi-agent system
597
  sources = rag_result['sources']
598
 
599
  # For Gemini, also check if we need to format sources from gemini_result
@@ -606,70 +636,88 @@ def main():
606
  elif hasattr(st.session_state.chatbot, '_format_gemini_sources'):
607
  sources = st.session_state.chatbot._format_gemini_sources(gemini_result)
608
 
 
 
609
  if sources and len(sources) > 0:
610
- # Count unique filenames
611
- unique_filenames = set()
612
- for doc in sources:
613
- filename = getattr(doc, 'metadata', {}).get('filename', 'Unknown')
614
- unique_filenames.add(filename)
615
-
616
- st.markdown(f"**Found {len(sources)} document chunks from {len(unique_filenames)} unique documents (showing top 20):**")
617
- if len(unique_filenames) < len(sources):
618
- st.info(f"💡 **Note**: Each document is split into multiple chunks. You're seeing {len(sources)} chunks from {len(unique_filenames)} documents.")
619
-
620
- # Extract and display statistics
621
- stats = extract_chunk_statistics(sources)
622
 
623
- # Show charts for 10+ results, tables for fewer
624
- if len(sources) >= 10:
625
- display_chunk_statistics_charts(stats, "Retrieval Statistics")
626
- # Also show tables below charts for detailed view
627
- st.markdown("---")
628
- display_chunk_statistics_table(stats, "Retrieval Distribution")
629
  else:
630
- display_chunk_statistics_table(stats, "Retrieval Distribution")
631
-
632
- st.markdown("---")
633
- st.markdown("### 📄 Document Details")
634
-
635
- for i, doc in enumerate(sources): # Show all documents
636
- # Get relevance score and ID if available
637
- metadata = getattr(doc, 'metadata', {})
638
- # Handle both standard RAG scores and Gemini scores
639
- score = metadata.get('reranked_score') or metadata.get('original_score') or metadata.get('score')
640
- chunk_id = metadata.get('_id') or metadata.get('chunk_id', 'Unknown')
641
- if score is not None:
642
- try:
643
- score_text = f" (Score: {float(score):.3f})"
644
- except (ValueError, TypeError):
645
- score_text = ""
 
 
 
 
646
  else:
647
- score_text = ""
648
- if chunk_id and chunk_id != 'Unknown':
649
- score_text += f" (ID: {str(chunk_id)[:8]}...)" if score_text else f" (ID: {str(chunk_id)[:8]}...)"
 
650
 
651
- with st.expander(f"📄 Document {i+1}: {getattr(doc, 'metadata', {}).get('filename', 'Unknown')[:50]}...{score_text}"):
652
- # Display document metadata with emojis
653
  metadata = getattr(doc, 'metadata', {})
654
- col1, col2, col3, col4 = st.columns([2, 1.5, 1, 1])
655
-
656
- with col1:
657
- st.write(f"📄 **File:** {metadata.get('filename', 'Unknown')}")
658
- with col2:
659
- st.write(f"🏛️ **Source:** {metadata.get('source', 'Unknown')}")
660
- with col3:
661
- st.write(f"📅 **Year:** {metadata.get('year', 'Unknown')}")
662
- with col4:
663
- # Display page number and chunk ID
664
- page = metadata.get('page_label', metadata.get('page', 'Unknown'))
665
- chunk_id = metadata.get('_id', 'Unknown')
666
- st.write(f"📖 **Page:** {page}")
667
- st.write(f"🆔 **ID:** {chunk_id}")
668
 
669
- # Display full content (no truncation)
670
- content = getattr(doc, 'page_content', 'No content available')
671
- st.write(f"**Full Content:**")
672
- st.text_area("Full Content", value=content, height=300, disabled=True, label_visibility="collapsed", key=f"preview_{i}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
673
  else:
674
  st.info("No documents were retrieved for the last query.")
675
  else:
@@ -1016,10 +1064,8 @@ def main():
1016
  if idx < len(st.session_state.rag_retrieval_history):
1017
  st.markdown("---")
1018
 
1019
- # Example Questions Section
1020
  st.markdown("---")
1021
- st.markdown("### 💡 Example Questions")
1022
- st.markdown("Click on any question below to use it, or modify the editable examples:")
1023
 
1024
  # Initialize example question state
1025
  if 'custom_question_1' not in st.session_state:
@@ -1027,35 +1073,41 @@ def main():
1027
  if 'custom_question_2' not in st.session_state:
1028
  st.session_state.custom_question_2 = "What did the National Coordinator say about the release of funds for PDM administrative costs in the letter dated 29th September 2022 and how did the funding received affect the activities of the PDCs and PDM SACCOs in the FY 2022/23?"
1029
 
1030
- # Question 1: Filename insights (fixed, clickable)
1031
- st.markdown("#### 📄 Question 1: List insights from a specific file")
1032
- col1, col2 = st.columns([3, 1])
1033
- with col1:
 
 
 
 
1034
  example_q1 = "List couple of insights from the filename."
1035
- st.markdown(f"**Example:** `{example_q1}`")
1036
- st.info("💡 **Filter to apply:** Select a Filename from the sidebar panel before asking this question.")
1037
- with col2:
1038
- if st.button("📋 Use This Question", key="use_example_1", use_container_width=True):
1039
- st.session_state.pending_question = example_q1
1040
- st.session_state.input_counter = (st.session_state.get('input_counter', 0) + 1) % 1000
1041
- st.rerun()
 
 
1042
 
1043
  st.markdown("---")
1044
 
1045
- # Questions 2 & 3: Editable examples
1046
- st.markdown("#### ✏️ Customizable Questions (Edit and use)")
 
1047
 
1048
- # Question 2
1049
- # st.markdown("**Question 2:**")
1050
- custom_q1 = st.text_area(
1051
- "Edit question 2:",
1052
- value=st.session_state.custom_question_1,
1053
- height=80,
1054
- key="edit_question_2",
1055
- help="Modify this question to fit your needs, then click 'Use This Question'"
1056
- )
1057
- col1, col2 = st.columns([1, 4])
1058
- with col1:
1059
  if st.button("📋 Use Question 2", key="use_custom_1", use_container_width=True):
1060
  if custom_q1.strip():
1061
  st.session_state.pending_question = custom_q1.strip()
@@ -1064,24 +1116,17 @@ def main():
1064
  st.rerun()
1065
  else:
1066
  st.warning("Please enter a question first!")
1067
- with col2:
1068
- st.caption("💡 Tip: Add specific details like dates, names, or amounts to get more precise answers")
1069
 
1070
- st.info("💡 **Filter to apply:** Select District(s) and Year(s) sidebar panel before asking this question.")
1071
-
1072
- st.markdown("---")
1073
-
1074
- # Question 3
1075
- # st.markdown("**Question 3:**")
1076
- custom_q2 = st.text_area(
1077
- "Edit question 3:",
1078
- value=st.session_state.custom_question_2,
1079
- height=80,
1080
- key="edit_question_3",
1081
- help="Modify this question to fit your needs, then click 'Use This Question'"
1082
- )
1083
- col1, col2 = st.columns([1, 4])
1084
- with col1:
1085
  if st.button("📋 Use Question 3", key="use_custom_2", use_container_width=True):
1086
  if custom_q2.strip():
1087
  st.session_state.pending_question = custom_q2.strip()
@@ -1090,8 +1135,6 @@ def main():
1090
  st.rerun()
1091
  else:
1092
  st.warning("Please enter a question first!")
1093
- with col2:
1094
- st.caption("💡 Tip: Use specific terms from the documents (e.g., 'PDM', 'SACCOs', 'FY 2022/23')")
1095
 
1096
 
1097
  # Store selected question for next render (handled in input section above)
@@ -1132,5 +1175,4 @@ if __name__ == "__main__":
1132
  print("=" * 80)
1133
  import sys
1134
  sys.exit(1)
1135
-
1136
- main()
 
3
  """
4
 
5
  import os
6
+ import warnings
7
+
8
+ # Silence Streamlit deprecation warnings (use_column_width -> use_container_width)
9
+ warnings.filterwarnings("ignore", message=".*use_column_width.*")
10
+ warnings.filterwarnings("ignore", category=DeprecationWarning, module="streamlit")
11
 
12
  import time
13
  import json
 
26
  from langchain_core.messages import HumanMessage, AIMessage
27
 
28
 
29
+ from src.agents import (
30
+ get_multi_agent_chatbot,
31
+ get_smart_chatbot,
32
+ get_gemini_chatbot,
33
+ get_visual_chatbot,
34
+ get_visual_multi_agent_chatbot
35
+ )
36
  from src.feedback import FeedbackManager
37
+ from src.ui_components import (
38
+ get_custom_css,
39
+ display_chunk_statistics_charts,
40
+ display_chunk_statistics_table,
41
+ extract_chunk_statistics,
42
+ display_visual_search_results
43
+ )
44
 
45
  from src.config.paths import (
46
  IS_DEPLOYED,
 
100
  logger = logging.getLogger(__name__)
101
 
102
  # Log environment setup for debugging
103
+ # Informational logs (commented out to reduce noise)
104
+ # logger.info(f"📁 PROJECT_DIR: {PROJECT_DIR}")
105
+ # logger.info(f"🌍 Environment: {'DEPLOYED' if IS_DEPLOYED else 'LOCAL'}")
106
+ # logger.info(f"🔧 OMP_NUM_THREADS: {os.environ.get('OMP_NUM_THREADS', 'NOT SET')}")
107
+ # logger.info(f"📁 HuggingFace cache: {os.environ.get('HF_HOME', 'DEFAULT (not overridden)')}")
108
 
109
 
110
  # Page config
 
116
  )
117
 
118
 
119
+ # GPU check - only log once at startup
120
  import torch, sys
121
+ if "gpu_check" not in st.session_state:
122
+ try:
123
+ cuda_ = torch.cuda.is_available()
124
+ mps_ = torch.backends.mps.is_available() if hasattr(torch.backends, 'mps') else False
125
+ if cuda_:
126
+ print(f"🎮 CUDA available: {torch.cuda.get_device_name(0)}")
127
+ elif mps_:
128
+ print("🍎 MPS (Apple Silicon) available")
129
+ else:
130
+ print("💻 CPU only (no GPU acceleration)")
131
+ except Exception as e:
132
+ print(f"⚠️ GPU check error: {e}", file=sys.stderr)
133
+ finally:
134
+ st.session_state.gpu_check = True
135
 
136
 
137
  st.markdown(get_custom_css(), unsafe_allow_html=True)
 
149
  """Initialize and return the chatbot based on version"""
150
  if version == "beta":
151
  return get_gemini_chatbot()
152
+ elif version == "visual":
153
+ # Use multi-agent architecture for visual mode (same sophisticated logic as v1)
154
+ return get_visual_multi_agent_chatbot()
155
  else:
156
  # Check environment variable for system type (v1)
157
  system = os.environ.get('CHATBOT_SYSTEM', 'multi-agent')
 
231
  # Track RAG retrieval history for feedback
232
  if 'rag_retrieval_history' not in st.session_state:
233
  st.session_state.rag_retrieval_history = []
234
+ # Version selection (v1, beta, or visual)
235
  if 'chatbot_version' not in st.session_state:
236
  st.session_state.chatbot_version = "v1"
237
 
 
248
  try:
249
  # Different spinner messages for different versions
250
  if st.session_state.chatbot_version == "beta":
251
+ spinner_msg = "🔄 Initializing Gemini FSA..."
252
+ elif st.session_state.chatbot_version == "visual":
253
+ spinner_msg = "🎨 Initializing Visual Search ... This may take 20-30 seconds..."
254
  else:
255
  spinner_msg = "🔄 Loading AI models and connecting to database..."
256
 
 
261
  print("✅ AI system ready!")
262
  except Exception as e:
263
  st.error(f"❌ Failed to initialize chatbot: {str(e)}")
264
+ # Show version-specific error messages
265
  if st.session_state.chatbot_version == "beta":
266
  st.error("Please check your environment variables (GEMINI_API_KEY, GEMINI_FILESTORE_NAME for beta)")
267
+ elif st.session_state.chatbot_version == "visual":
268
+ st.error("Please check your environment variables (QDRANT_URL, QDRANT_API_KEY, OPENAI_API_KEY for visual)")
269
+ with st.expander("🐛 Debug Info"):
270
+ import traceback
271
+ st.code(traceback.format_exc())
272
  else:
273
  st.error("Please check your configuration and ensure all required models and databases are accessible.")
274
  # Reset to v1 to prevent infinite loop
 
300
  st.markdown("<br>", unsafe_allow_html=True) # Add some spacing
301
  selected_version = st.radio(
302
  "**Version:**",
303
+ options=["v1", "visual", "beta"],
304
+ index=0 if st.session_state.chatbot_version == "v1" else (1 if st.session_state.chatbot_version == "visual" else 2),
305
  horizontal=True,
306
  key="version_selector",
307
+ help="Select v1 (default RAG), visual (ColPali visual search), or beta (Gemini FSA)"
308
  )
309
 
310
  # Update version if changed
 
328
  # Show version info
329
  if st.session_state.chatbot_version == "beta":
330
  st.info("🔬 **Beta Mode**: Using Google Gemini FSA")
331
+ elif st.session_state.chatbot_version == "visual":
332
+ st.info("🎨 **Visual Mode**: Using Visual Search (Multi-Modal Embeddings)")
333
 
334
  # Session info
335
  duration = int(time.time() - st.session_state.session_start_time)
 
346
  # Sidebar for filters
347
  with st.sidebar:
348
  # Instructions section (collapsible)
349
+ with st.expander("📖 How to Use", expanded=True):
350
  st.markdown("""
351
  #### 🎯 Using Filters
352
 
 
373
  For more detailed help, see the example questions at the bottom of the page.
374
  """)
375
 
376
+ # Filters in a collapsed expander by default
377
+ with st.expander("🔍 Search Filters", expanded=False):
378
+ st.caption("Select filters to narrow down your search. Leave empty to search all data.")
379
+
380
+ st.markdown('<div class="filter-section">', unsafe_allow_html=True)
381
+ st.markdown('<div class="filter-title">📄 Specific Reports (Filename Filter)</div>', unsafe_allow_html=True)
382
+ st.markdown('<p style="font-size: 0.85em; color: #666;">⚠️ Selecting specific reports will ignore all other filters</p>', unsafe_allow_html=True)
383
+ selected_filenames = st.multiselect(
384
+ "Select specific reports:",
385
+ options=filter_options.get('filenames', []),
386
+ default=st.session_state.active_filters.get('filenames', []),
387
+ key="filenames_filter",
388
+ help="Choose specific reports to search. When enabled, all other filters are ignored."
389
+ )
390
+ st.markdown('</div>', unsafe_allow_html=True)
391
+
392
+ # Determine if filename filter is active
393
+ filename_mode = len(selected_filenames) > 0
394
+
395
+ # Sources filter
396
+ st.markdown('<div class="filter-title">📊 Sources</div>', unsafe_allow_html=True)
397
+ selected_sources = st.multiselect(
398
+ "Select sources:",
399
+ options=filter_options['sources'],
400
+ default=st.session_state.active_filters['sources'],
401
+ disabled = filename_mode,
402
+ key="sources_filter",
403
+ help="Choose which types of reports to search"
404
  )
405
+ st.markdown('</div>', unsafe_allow_html=True)
406
+
407
+ # Years filter
408
+ st.markdown('<div class="filter-title">📅 Years</div>', unsafe_allow_html=True)
409
+ selected_years = st.multiselect(
410
+ "Select years:",
411
+ options=filter_options['years'],
412
+ default=st.session_state.active_filters['years'],
413
+ disabled = filename_mode,
414
+ key="years_filter",
415
+ help="Choose which years to search"
416
+ )
417
+ st.markdown('</div>', unsafe_allow_html=True)
418
+
419
+ # Districts filter
420
+ st.markdown('<div class="filter-title">🏘️ Districts</div>', unsafe_allow_html=True)
421
+ selected_districts = st.multiselect(
422
+ "Select districts:",
423
+ options=filter_options['districts'],
424
+ default=st.session_state.active_filters['districts'],
425
+ disabled = filename_mode,
426
+ key="districts_filter",
427
+ help="Choose which districts to search"
428
+ )
429
+ st.markdown('</div>', unsafe_allow_html=True)
430
+
431
+ # Clear filters button
432
+ if st.button("🗑️ Clear All Filters", key="clear_filters_button"):
433
+ st.session_state.active_filters = {'sources': [], 'years': [], 'districts': [], 'filenames': []}
434
+ st.rerun()
 
 
 
 
 
 
 
 
 
 
 
 
435
 
436
+ # Update active filters (outside expander so it always runs)
437
  st.session_state.active_filters = {
438
  'sources': selected_sources if not filename_mode else [],
439
  'years': selected_years if not filename_mode else [],
440
  'districts': selected_districts if not filename_mode else [],
441
  'filenames': selected_filenames
442
  }
 
 
 
 
 
443
 
444
  # Main content area with tabs
445
  tab1, tab2 = st.tabs(["💬 Chat", "📄 Retrieved Documents"])
 
623
  # PipelineResult object format
624
  sources = rag_result.sources
625
  elif isinstance(rag_result, dict) and 'sources' in rag_result:
626
+ # Dictionary format from multi-agent system or visual search
627
  sources = rag_result['sources']
628
 
629
  # For Gemini, also check if we need to format sources from gemini_result
 
636
  elif hasattr(st.session_state.chatbot, '_format_gemini_sources'):
637
  sources = st.session_state.chatbot._format_gemini_sources(gemini_result)
638
 
639
+ # Check if this is visual search results (has visual metadata)
640
+ is_visual_search = False
641
  if sources and len(sources) > 0:
642
+ first_doc_metadata = getattr(sources[0], 'metadata', {})
643
+ is_visual_search = 'num_tiles' in first_doc_metadata or 'num_visual_tokens' in first_doc_metadata
644
+
645
+ if sources and len(sources) > 0:
646
+ # Use visual display for visual search results
647
+ if is_visual_search and st.session_state.chatbot_version == "visual":
648
+ st.markdown("### 🎨 Visual Search Results")
 
 
 
 
 
649
 
650
+ display_visual_search_results(
651
+ sources=sources,
652
+ show_statistics=True,
653
+ show_images=True, # Show Cloudinary images
654
+ max_display=20
655
+ )
656
  else:
657
+ # Standard display for v1/beta results
658
+ # Count unique filenames
659
+ unique_filenames = set()
660
+ for doc in sources:
661
+ filename = getattr(doc, 'metadata', {}).get('filename', 'Unknown')
662
+ unique_filenames.add(filename)
663
+
664
+ st.markdown(f"**Found {len(sources)} document chunks from {len(unique_filenames)} unique documents (showing top 20):**")
665
+ if len(unique_filenames) < len(sources):
666
+ st.info(f"💡 **Note**: Each document is split into multiple chunks. You're seeing {len(sources)} chunks from {len(unique_filenames)} documents.")
667
+
668
+ # Extract and display statistics
669
+ stats = extract_chunk_statistics(sources)
670
+
671
+ # Show charts for 10+ results, tables for fewer
672
+ if len(sources) >= 10:
673
+ display_chunk_statistics_charts(stats, "Retrieval Statistics")
674
+ # Also show tables below charts for detailed view
675
+ st.markdown("---")
676
+ display_chunk_statistics_table(stats, "Retrieval Distribution")
677
  else:
678
+ display_chunk_statistics_table(stats, "Retrieval Distribution")
679
+
680
+ st.markdown("---")
681
+ st.markdown("### 📄 Document Details")
682
 
683
+ for i, doc in enumerate(sources): # Show all documents
684
+ # Get relevance score and ID if available
685
  metadata = getattr(doc, 'metadata', {})
686
+ # Handle both standard RAG scores and Gemini scores
687
+ score = metadata.get('reranked_score') or metadata.get('original_score') or metadata.get('score')
688
+ chunk_id = metadata.get('_id') or metadata.get('chunk_id', 'Unknown')
689
+ if score is not None:
690
+ try:
691
+ score_text = f" (Score: {float(score):.3f})"
692
+ except (ValueError, TypeError):
693
+ score_text = ""
694
+ else:
695
+ score_text = ""
696
+ if chunk_id and chunk_id != 'Unknown':
697
+ score_text += f" (ID: {str(chunk_id)[:8]}...)" if score_text else f" (ID: {str(chunk_id)[:8]}...)"
 
 
698
 
699
+ with st.expander(f"📄 Document {i+1}: {getattr(doc, 'metadata', {}).get('filename', 'Unknown')[:50]}...{score_text}"):
700
+ # Display document metadata with emojis
701
+ metadata = getattr(doc, 'metadata', {})
702
+ col1, col2, col3, col4 = st.columns([2, 1.5, 1, 1])
703
+
704
+ with col1:
705
+ st.write(f"📄 **File:** {metadata.get('filename', 'Unknown')}")
706
+ with col2:
707
+ st.write(f"🏛️ **Source:** {metadata.get('source', 'Unknown')}")
708
+ with col3:
709
+ st.write(f"📅 **Year:** {metadata.get('year', 'Unknown')}")
710
+ with col4:
711
+ # Display page number and chunk ID
712
+ page = metadata.get('page_label', metadata.get('page', 'Unknown'))
713
+ chunk_id = metadata.get('_id', 'Unknown')
714
+ st.write(f"📖 **Page:** {page}")
715
+ st.write(f"🆔 **ID:** {chunk_id}")
716
+
717
+ # Display full content (no truncation)
718
+ content = getattr(doc, 'page_content', 'No content available')
719
+ st.write(f"**Full Content:**")
720
+ st.text_area("Full Content", value=content, height=300, disabled=True, label_visibility="collapsed", key=f"preview_{i}")
721
  else:
722
  st.info("No documents were retrieved for the last query.")
723
  else:
 
1064
  if idx < len(st.session_state.rag_retrieval_history):
1065
  st.markdown("---")
1066
 
1067
+ # Example Questions Section - Compact layout
1068
  st.markdown("---")
 
 
1069
 
1070
  # Initialize example question state
1071
  if 'custom_question_1' not in st.session_state:
 
1073
  if 'custom_question_2' not in st.session_state:
1074
  st.session_state.custom_question_2 = "What did the National Coordinator say about the release of funds for PDM administrative costs in the letter dated 29th September 2022 and how did the funding received affect the activities of the PDCs and PDM SACCOs in the FY 2022/23?"
1075
 
1076
+ # Row 1: Header on left, Question 1 (file insights) on right
1077
+ header_col, q1_col = st.columns([1, 2])
1078
+
1079
+ with header_col:
1080
+ st.markdown("### 💡 Example Questions")
1081
+ st.caption(" Click **Use ...** or edit")
1082
+
1083
+ with q1_col:
1084
  example_q1 = "List couple of insights from the filename."
1085
+ st.markdown("**📄 File Insights** _(select a file first)_")
1086
+ q1_inner1, q1_inner2 = st.columns([3, 1])
1087
+ with q1_inner1:
1088
+ st.code(example_q1, language=None)
1089
+ with q1_inner2:
1090
+ if st.button("📋 Use question !", key="use_example_1", use_container_width=True):
1091
+ st.session_state.pending_question = example_q1
1092
+ st.session_state.input_counter = (st.session_state.get('input_counter', 0) + 1) % 1000
1093
+ st.rerun()
1094
 
1095
  st.markdown("---")
1096
 
1097
+ # Row 2: Questions 2 & 3 side by side
1098
+ st.markdown("#### ✏️ Customizable Questions")
1099
+ q_col1, q_col2 = st.columns(2)
1100
 
1101
+ # Question 2 - Left column (will trigger follow-up)
1102
+ with q_col1:
1103
+ st.caption("🔄 _This question will trigger follow-up prompts for year/district_")
1104
+ custom_q1 = st.text_area(
1105
+ "Question 2:",
1106
+ value=st.session_state.custom_question_1,
1107
+ height=100,
1108
+ key="edit_question_2",
1109
+ help="Modify this question to fit your needs"
1110
+ )
 
1111
  if st.button("📋 Use Question 2", key="use_custom_1", use_container_width=True):
1112
  if custom_q1.strip():
1113
  st.session_state.pending_question = custom_q1.strip()
 
1116
  st.rerun()
1117
  else:
1118
  st.warning("Please enter a question first!")
 
 
1119
 
1120
+ # Question 3 - Right column (has all info, no follow-up)
1121
+ with q_col2:
1122
+ st.caption("✅ _Complete question - has year & context, no follow-up needed_")
1123
+ custom_q2 = st.text_area(
1124
+ "Question 3:",
1125
+ value=st.session_state.custom_question_2,
1126
+ height=100,
1127
+ key="edit_question_3",
1128
+ help="Modify this question to fit your needs"
1129
+ )
 
 
 
 
 
1130
  if st.button("📋 Use Question 3", key="use_custom_2", use_container_width=True):
1131
  if custom_q2.strip():
1132
  st.session_state.pending_question = custom_q2.strip()
 
1135
  st.rerun()
1136
  else:
1137
  st.warning("Please enter a question first!")
 
 
1138
 
1139
 
1140
  # Store selected question for next render (handled in input section above)
 
1175
  print("=" * 80)
1176
  import sys
1177
  sys.exit(1)
1178
+ main()