Spaces:
Sleeping
Sleeping
update UI
Browse files
app.py
CHANGED
|
@@ -3,6 +3,11 @@ Intelligent Audit Report Chatbot UI
|
|
| 3 |
"""
|
| 4 |
|
| 5 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
import time
|
| 8 |
import json
|
|
@@ -21,9 +26,21 @@ import plotly.express as px
|
|
| 21 |
from langchain_core.messages import HumanMessage, AIMessage
|
| 22 |
|
| 23 |
|
| 24 |
-
from src.agents import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
from src.feedback import FeedbackManager
|
| 26 |
-
from src.ui_components import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
from src.config.paths import (
|
| 29 |
IS_DEPLOYED,
|
|
@@ -83,10 +100,11 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
|
|
| 83 |
logger = logging.getLogger(__name__)
|
| 84 |
|
| 85 |
# Log environment setup for debugging
|
| 86 |
-
|
| 87 |
-
logger.info(f"
|
| 88 |
-
logger.info(f"
|
| 89 |
-
logger.info(f"
|
|
|
|
| 90 |
|
| 91 |
|
| 92 |
# Page config
|
|
@@ -98,21 +116,22 @@ st.set_page_config(
|
|
| 98 |
)
|
| 99 |
|
| 100 |
|
|
|
|
| 101 |
import torch, sys
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
if
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
finally:
|
| 115 |
-
|
| 116 |
|
| 117 |
|
| 118 |
st.markdown(get_custom_css(), unsafe_allow_html=True)
|
|
@@ -130,6 +149,9 @@ def get_chatbot(version: str = "v1"):
|
|
| 130 |
"""Initialize and return the chatbot based on version"""
|
| 131 |
if version == "beta":
|
| 132 |
return get_gemini_chatbot()
|
|
|
|
|
|
|
|
|
|
| 133 |
else:
|
| 134 |
# Check environment variable for system type (v1)
|
| 135 |
system = os.environ.get('CHATBOT_SYSTEM', 'multi-agent')
|
|
@@ -209,7 +231,7 @@ def main():
|
|
| 209 |
# Track RAG retrieval history for feedback
|
| 210 |
if 'rag_retrieval_history' not in st.session_state:
|
| 211 |
st.session_state.rag_retrieval_history = []
|
| 212 |
-
# Version selection (v1 or
|
| 213 |
if 'chatbot_version' not in st.session_state:
|
| 214 |
st.session_state.chatbot_version = "v1"
|
| 215 |
|
|
@@ -226,7 +248,9 @@ def main():
|
|
| 226 |
try:
|
| 227 |
# Different spinner messages for different versions
|
| 228 |
if st.session_state.chatbot_version == "beta":
|
| 229 |
-
spinner_msg = "🔄 Initializing Gemini FSA"
|
|
|
|
|
|
|
| 230 |
else:
|
| 231 |
spinner_msg = "🔄 Loading AI models and connecting to database..."
|
| 232 |
|
|
@@ -237,9 +261,14 @@ def main():
|
|
| 237 |
print("✅ AI system ready!")
|
| 238 |
except Exception as e:
|
| 239 |
st.error(f"❌ Failed to initialize chatbot: {str(e)}")
|
| 240 |
-
#
|
| 241 |
if st.session_state.chatbot_version == "beta":
|
| 242 |
st.error("Please check your environment variables (GEMINI_API_KEY, GEMINI_FILESTORE_NAME for beta)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
else:
|
| 244 |
st.error("Please check your configuration and ensure all required models and databases are accessible.")
|
| 245 |
# Reset to v1 to prevent infinite loop
|
|
@@ -271,11 +300,11 @@ def main():
|
|
| 271 |
st.markdown("<br>", unsafe_allow_html=True) # Add some spacing
|
| 272 |
selected_version = st.radio(
|
| 273 |
"**Version:**",
|
| 274 |
-
options=["v1", "beta"],
|
| 275 |
-
index=0 if st.session_state.chatbot_version == "v1" else 1,
|
| 276 |
horizontal=True,
|
| 277 |
key="version_selector",
|
| 278 |
-
help="Select v1 (default RAG
|
| 279 |
)
|
| 280 |
|
| 281 |
# Update version if changed
|
|
@@ -299,6 +328,8 @@ def main():
|
|
| 299 |
# Show version info
|
| 300 |
if st.session_state.chatbot_version == "beta":
|
| 301 |
st.info("🔬 **Beta Mode**: Using Google Gemini FSA")
|
|
|
|
|
|
|
| 302 |
|
| 303 |
# Session info
|
| 304 |
duration = int(time.time() - st.session_state.session_start_time)
|
|
@@ -315,7 +346,7 @@ def main():
|
|
| 315 |
# Sidebar for filters
|
| 316 |
with st.sidebar:
|
| 317 |
# Instructions section (collapsible)
|
| 318 |
-
with st.expander("📖 How to Use", expanded=
|
| 319 |
st.markdown("""
|
| 320 |
#### 🎯 Using Filters
|
| 321 |
|
|
@@ -342,74 +373,73 @@ def main():
|
|
| 342 |
For more detailed help, see the example questions at the bottom of the page.
|
| 343 |
""")
|
| 344 |
|
| 345 |
-
|
| 346 |
-
st.
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 357 |
)
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
# Districts filter
|
| 389 |
-
# st.markdown('<div class="filter-section">', unsafe_allow_html=True)
|
| 390 |
-
st.markdown('<div class="filter-title">🏘️ Districts</div>', unsafe_allow_html=True)
|
| 391 |
-
selected_districts = st.multiselect(
|
| 392 |
-
"Select districts:",
|
| 393 |
-
options=filter_options['districts'],
|
| 394 |
-
default=st.session_state.active_filters['districts'],
|
| 395 |
-
disabled = filename_mode,
|
| 396 |
-
key="districts_filter",
|
| 397 |
-
help="Choose which districts to search"
|
| 398 |
-
)
|
| 399 |
-
st.markdown('</div>', unsafe_allow_html=True)
|
| 400 |
|
| 401 |
-
# Update active filters
|
| 402 |
st.session_state.active_filters = {
|
| 403 |
'sources': selected_sources if not filename_mode else [],
|
| 404 |
'years': selected_years if not filename_mode else [],
|
| 405 |
'districts': selected_districts if not filename_mode else [],
|
| 406 |
'filenames': selected_filenames
|
| 407 |
}
|
| 408 |
-
|
| 409 |
-
# Clear filters button
|
| 410 |
-
if st.button("🗑️ Clear All Filters", key="clear_filters_button"):
|
| 411 |
-
st.session_state.active_filters = {'sources': [], 'years': [], 'districts': [], 'filenames': []}
|
| 412 |
-
st.rerun()
|
| 413 |
|
| 414 |
# Main content area with tabs
|
| 415 |
tab1, tab2 = st.tabs(["💬 Chat", "📄 Retrieved Documents"])
|
|
@@ -593,7 +623,7 @@ def main():
|
|
| 593 |
# PipelineResult object format
|
| 594 |
sources = rag_result.sources
|
| 595 |
elif isinstance(rag_result, dict) and 'sources' in rag_result:
|
| 596 |
-
# Dictionary format from multi-agent system
|
| 597 |
sources = rag_result['sources']
|
| 598 |
|
| 599 |
# For Gemini, also check if we need to format sources from gemini_result
|
|
@@ -606,70 +636,88 @@ def main():
|
|
| 606 |
elif hasattr(st.session_state.chatbot, '_format_gemini_sources'):
|
| 607 |
sources = st.session_state.chatbot._format_gemini_sources(gemini_result)
|
| 608 |
|
|
|
|
|
|
|
| 609 |
if sources and len(sources) > 0:
|
| 610 |
-
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
|
| 615 |
-
|
| 616 |
-
|
| 617 |
-
if len(unique_filenames) < len(sources):
|
| 618 |
-
st.info(f"💡 **Note**: Each document is split into multiple chunks. You're seeing {len(sources)} chunks from {len(unique_filenames)} documents.")
|
| 619 |
-
|
| 620 |
-
# Extract and display statistics
|
| 621 |
-
stats = extract_chunk_statistics(sources)
|
| 622 |
|
| 623 |
-
|
| 624 |
-
|
| 625 |
-
|
| 626 |
-
|
| 627 |
-
|
| 628 |
-
|
| 629 |
else:
|
| 630 |
-
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
|
| 634 |
-
|
| 635 |
-
|
| 636 |
-
|
| 637 |
-
|
| 638 |
-
|
| 639 |
-
|
| 640 |
-
|
| 641 |
-
|
| 642 |
-
|
| 643 |
-
|
| 644 |
-
|
| 645 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 646 |
else:
|
| 647 |
-
|
| 648 |
-
|
| 649 |
-
|
|
|
|
| 650 |
|
| 651 |
-
|
| 652 |
-
#
|
| 653 |
metadata = getattr(doc, 'metadata', {})
|
| 654 |
-
|
| 655 |
-
|
| 656 |
-
|
| 657 |
-
|
| 658 |
-
|
| 659 |
-
|
| 660 |
-
|
| 661 |
-
|
| 662 |
-
|
| 663 |
-
|
| 664 |
-
|
| 665 |
-
chunk_id
|
| 666 |
-
st.write(f"📖 **Page:** {page}")
|
| 667 |
-
st.write(f"🆔 **ID:** {chunk_id}")
|
| 668 |
|
| 669 |
-
|
| 670 |
-
|
| 671 |
-
|
| 672 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 673 |
else:
|
| 674 |
st.info("No documents were retrieved for the last query.")
|
| 675 |
else:
|
|
@@ -1016,10 +1064,8 @@ def main():
|
|
| 1016 |
if idx < len(st.session_state.rag_retrieval_history):
|
| 1017 |
st.markdown("---")
|
| 1018 |
|
| 1019 |
-
# Example Questions Section
|
| 1020 |
st.markdown("---")
|
| 1021 |
-
st.markdown("### 💡 Example Questions")
|
| 1022 |
-
st.markdown("Click on any question below to use it, or modify the editable examples:")
|
| 1023 |
|
| 1024 |
# Initialize example question state
|
| 1025 |
if 'custom_question_1' not in st.session_state:
|
|
@@ -1027,35 +1073,41 @@ def main():
|
|
| 1027 |
if 'custom_question_2' not in st.session_state:
|
| 1028 |
st.session_state.custom_question_2 = "What did the National Coordinator say about the release of funds for PDM administrative costs in the letter dated 29th September 2022 and how did the funding received affect the activities of the PDCs and PDM SACCOs in the FY 2022/23?"
|
| 1029 |
|
| 1030 |
-
#
|
| 1031 |
-
st.
|
| 1032 |
-
|
| 1033 |
-
with
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1034 |
example_q1 = "List couple of insights from the filename."
|
| 1035 |
-
st.markdown(
|
| 1036 |
-
st.
|
| 1037 |
-
|
| 1038 |
-
|
| 1039 |
-
|
| 1040 |
-
|
| 1041 |
-
|
|
|
|
|
|
|
| 1042 |
|
| 1043 |
st.markdown("---")
|
| 1044 |
|
| 1045 |
-
# Questions 2 & 3
|
| 1046 |
-
st.markdown("#### ✏️ Customizable Questions
|
|
|
|
| 1047 |
|
| 1048 |
-
# Question 2
|
| 1049 |
-
|
| 1050 |
-
|
| 1051 |
-
|
| 1052 |
-
|
| 1053 |
-
|
| 1054 |
-
|
| 1055 |
-
|
| 1056 |
-
|
| 1057 |
-
|
| 1058 |
-
with col1:
|
| 1059 |
if st.button("📋 Use Question 2", key="use_custom_1", use_container_width=True):
|
| 1060 |
if custom_q1.strip():
|
| 1061 |
st.session_state.pending_question = custom_q1.strip()
|
|
@@ -1064,24 +1116,17 @@ def main():
|
|
| 1064 |
st.rerun()
|
| 1065 |
else:
|
| 1066 |
st.warning("Please enter a question first!")
|
| 1067 |
-
with col2:
|
| 1068 |
-
st.caption("💡 Tip: Add specific details like dates, names, or amounts to get more precise answers")
|
| 1069 |
|
| 1070 |
-
|
| 1071 |
-
|
| 1072 |
-
|
| 1073 |
-
|
| 1074 |
-
|
| 1075 |
-
|
| 1076 |
-
|
| 1077 |
-
|
| 1078 |
-
|
| 1079 |
-
|
| 1080 |
-
key="edit_question_3",
|
| 1081 |
-
help="Modify this question to fit your needs, then click 'Use This Question'"
|
| 1082 |
-
)
|
| 1083 |
-
col1, col2 = st.columns([1, 4])
|
| 1084 |
-
with col1:
|
| 1085 |
if st.button("📋 Use Question 3", key="use_custom_2", use_container_width=True):
|
| 1086 |
if custom_q2.strip():
|
| 1087 |
st.session_state.pending_question = custom_q2.strip()
|
|
@@ -1090,8 +1135,6 @@ def main():
|
|
| 1090 |
st.rerun()
|
| 1091 |
else:
|
| 1092 |
st.warning("Please enter a question first!")
|
| 1093 |
-
with col2:
|
| 1094 |
-
st.caption("💡 Tip: Use specific terms from the documents (e.g., 'PDM', 'SACCOs', 'FY 2022/23')")
|
| 1095 |
|
| 1096 |
|
| 1097 |
# Store selected question for next render (handled in input section above)
|
|
@@ -1132,5 +1175,4 @@ if __name__ == "__main__":
|
|
| 1132 |
print("=" * 80)
|
| 1133 |
import sys
|
| 1134 |
sys.exit(1)
|
| 1135 |
-
|
| 1136 |
-
main()
|
|
|
|
| 3 |
"""
|
| 4 |
|
| 5 |
import os
|
| 6 |
+
import warnings
|
| 7 |
+
|
| 8 |
+
# Silence Streamlit deprecation warnings (use_column_width -> use_container_width)
|
| 9 |
+
warnings.filterwarnings("ignore", message=".*use_column_width.*")
|
| 10 |
+
warnings.filterwarnings("ignore", category=DeprecationWarning, module="streamlit")
|
| 11 |
|
| 12 |
import time
|
| 13 |
import json
|
|
|
|
| 26 |
from langchain_core.messages import HumanMessage, AIMessage
|
| 27 |
|
| 28 |
|
| 29 |
+
from src.agents import (
|
| 30 |
+
get_multi_agent_chatbot,
|
| 31 |
+
get_smart_chatbot,
|
| 32 |
+
get_gemini_chatbot,
|
| 33 |
+
get_visual_chatbot,
|
| 34 |
+
get_visual_multi_agent_chatbot
|
| 35 |
+
)
|
| 36 |
from src.feedback import FeedbackManager
|
| 37 |
+
from src.ui_components import (
|
| 38 |
+
get_custom_css,
|
| 39 |
+
display_chunk_statistics_charts,
|
| 40 |
+
display_chunk_statistics_table,
|
| 41 |
+
extract_chunk_statistics,
|
| 42 |
+
display_visual_search_results
|
| 43 |
+
)
|
| 44 |
|
| 45 |
from src.config.paths import (
|
| 46 |
IS_DEPLOYED,
|
|
|
|
| 100 |
logger = logging.getLogger(__name__)
|
| 101 |
|
| 102 |
# Log environment setup for debugging
|
| 103 |
+
# Informational logs (commented out to reduce noise)
|
| 104 |
+
# logger.info(f"📁 PROJECT_DIR: {PROJECT_DIR}")
|
| 105 |
+
# logger.info(f"🌍 Environment: {'DEPLOYED' if IS_DEPLOYED else 'LOCAL'}")
|
| 106 |
+
# logger.info(f"🔧 OMP_NUM_THREADS: {os.environ.get('OMP_NUM_THREADS', 'NOT SET')}")
|
| 107 |
+
# logger.info(f"📁 HuggingFace cache: {os.environ.get('HF_HOME', 'DEFAULT (not overridden)')}")
|
| 108 |
|
| 109 |
|
| 110 |
# Page config
|
|
|
|
| 116 |
)
|
| 117 |
|
| 118 |
|
| 119 |
+
# GPU check - only log once at startup
|
| 120 |
import torch, sys
|
| 121 |
+
if "gpu_check" not in st.session_state:
|
| 122 |
+
try:
|
| 123 |
+
cuda_ = torch.cuda.is_available()
|
| 124 |
+
mps_ = torch.backends.mps.is_available() if hasattr(torch.backends, 'mps') else False
|
| 125 |
+
if cuda_:
|
| 126 |
+
print(f"🎮 CUDA available: {torch.cuda.get_device_name(0)}")
|
| 127 |
+
elif mps_:
|
| 128 |
+
print("🍎 MPS (Apple Silicon) available")
|
| 129 |
+
else:
|
| 130 |
+
print("💻 CPU only (no GPU acceleration)")
|
| 131 |
+
except Exception as e:
|
| 132 |
+
print(f"⚠️ GPU check error: {e}", file=sys.stderr)
|
| 133 |
+
finally:
|
| 134 |
+
st.session_state.gpu_check = True
|
| 135 |
|
| 136 |
|
| 137 |
st.markdown(get_custom_css(), unsafe_allow_html=True)
|
|
|
|
| 149 |
"""Initialize and return the chatbot based on version"""
|
| 150 |
if version == "beta":
|
| 151 |
return get_gemini_chatbot()
|
| 152 |
+
elif version == "visual":
|
| 153 |
+
# Use multi-agent architecture for visual mode (same sophisticated logic as v1)
|
| 154 |
+
return get_visual_multi_agent_chatbot()
|
| 155 |
else:
|
| 156 |
# Check environment variable for system type (v1)
|
| 157 |
system = os.environ.get('CHATBOT_SYSTEM', 'multi-agent')
|
|
|
|
| 231 |
# Track RAG retrieval history for feedback
|
| 232 |
if 'rag_retrieval_history' not in st.session_state:
|
| 233 |
st.session_state.rag_retrieval_history = []
|
| 234 |
+
# Version selection (v1, beta, or visual)
|
| 235 |
if 'chatbot_version' not in st.session_state:
|
| 236 |
st.session_state.chatbot_version = "v1"
|
| 237 |
|
|
|
|
| 248 |
try:
|
| 249 |
# Different spinner messages for different versions
|
| 250 |
if st.session_state.chatbot_version == "beta":
|
| 251 |
+
spinner_msg = "🔄 Initializing Gemini FSA..."
|
| 252 |
+
elif st.session_state.chatbot_version == "visual":
|
| 253 |
+
spinner_msg = "🎨 Initializing Visual Search ... This may take 20-30 seconds..."
|
| 254 |
else:
|
| 255 |
spinner_msg = "🔄 Loading AI models and connecting to database..."
|
| 256 |
|
|
|
|
| 261 |
print("✅ AI system ready!")
|
| 262 |
except Exception as e:
|
| 263 |
st.error(f"❌ Failed to initialize chatbot: {str(e)}")
|
| 264 |
+
# Show version-specific error messages
|
| 265 |
if st.session_state.chatbot_version == "beta":
|
| 266 |
st.error("Please check your environment variables (GEMINI_API_KEY, GEMINI_FILESTORE_NAME for beta)")
|
| 267 |
+
elif st.session_state.chatbot_version == "visual":
|
| 268 |
+
st.error("Please check your environment variables (QDRANT_URL, QDRANT_API_KEY, OPENAI_API_KEY for visual)")
|
| 269 |
+
with st.expander("🐛 Debug Info"):
|
| 270 |
+
import traceback
|
| 271 |
+
st.code(traceback.format_exc())
|
| 272 |
else:
|
| 273 |
st.error("Please check your configuration and ensure all required models and databases are accessible.")
|
| 274 |
# Reset to v1 to prevent infinite loop
|
|
|
|
| 300 |
st.markdown("<br>", unsafe_allow_html=True) # Add some spacing
|
| 301 |
selected_version = st.radio(
|
| 302 |
"**Version:**",
|
| 303 |
+
options=["v1", "visual", "beta"],
|
| 304 |
+
index=0 if st.session_state.chatbot_version == "v1" else (1 if st.session_state.chatbot_version == "visual" else 2),
|
| 305 |
horizontal=True,
|
| 306 |
key="version_selector",
|
| 307 |
+
help="Select v1 (default RAG), visual (ColPali visual search), or beta (Gemini FSA)"
|
| 308 |
)
|
| 309 |
|
| 310 |
# Update version if changed
|
|
|
|
| 328 |
# Show version info
|
| 329 |
if st.session_state.chatbot_version == "beta":
|
| 330 |
st.info("🔬 **Beta Mode**: Using Google Gemini FSA")
|
| 331 |
+
elif st.session_state.chatbot_version == "visual":
|
| 332 |
+
st.info("🎨 **Visual Mode**: Using Visual Search (Multi-Modal Embeddings)")
|
| 333 |
|
| 334 |
# Session info
|
| 335 |
duration = int(time.time() - st.session_state.session_start_time)
|
|
|
|
| 346 |
# Sidebar for filters
|
| 347 |
with st.sidebar:
|
| 348 |
# Instructions section (collapsible)
|
| 349 |
+
with st.expander("📖 How to Use", expanded=True):
|
| 350 |
st.markdown("""
|
| 351 |
#### 🎯 Using Filters
|
| 352 |
|
|
|
|
| 373 |
For more detailed help, see the example questions at the bottom of the page.
|
| 374 |
""")
|
| 375 |
|
| 376 |
+
# Filters in a collapsed expander by default
|
| 377 |
+
with st.expander("🔍 Search Filters", expanded=False):
|
| 378 |
+
st.caption("Select filters to narrow down your search. Leave empty to search all data.")
|
| 379 |
+
|
| 380 |
+
st.markdown('<div class="filter-section">', unsafe_allow_html=True)
|
| 381 |
+
st.markdown('<div class="filter-title">📄 Specific Reports (Filename Filter)</div>', unsafe_allow_html=True)
|
| 382 |
+
st.markdown('<p style="font-size: 0.85em; color: #666;">⚠️ Selecting specific reports will ignore all other filters</p>', unsafe_allow_html=True)
|
| 383 |
+
selected_filenames = st.multiselect(
|
| 384 |
+
"Select specific reports:",
|
| 385 |
+
options=filter_options.get('filenames', []),
|
| 386 |
+
default=st.session_state.active_filters.get('filenames', []),
|
| 387 |
+
key="filenames_filter",
|
| 388 |
+
help="Choose specific reports to search. When enabled, all other filters are ignored."
|
| 389 |
+
)
|
| 390 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
| 391 |
+
|
| 392 |
+
# Determine if filename filter is active
|
| 393 |
+
filename_mode = len(selected_filenames) > 0
|
| 394 |
+
|
| 395 |
+
# Sources filter
|
| 396 |
+
st.markdown('<div class="filter-title">📊 Sources</div>', unsafe_allow_html=True)
|
| 397 |
+
selected_sources = st.multiselect(
|
| 398 |
+
"Select sources:",
|
| 399 |
+
options=filter_options['sources'],
|
| 400 |
+
default=st.session_state.active_filters['sources'],
|
| 401 |
+
disabled = filename_mode,
|
| 402 |
+
key="sources_filter",
|
| 403 |
+
help="Choose which types of reports to search"
|
| 404 |
)
|
| 405 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
| 406 |
+
|
| 407 |
+
# Years filter
|
| 408 |
+
st.markdown('<div class="filter-title">📅 Years</div>', unsafe_allow_html=True)
|
| 409 |
+
selected_years = st.multiselect(
|
| 410 |
+
"Select years:",
|
| 411 |
+
options=filter_options['years'],
|
| 412 |
+
default=st.session_state.active_filters['years'],
|
| 413 |
+
disabled = filename_mode,
|
| 414 |
+
key="years_filter",
|
| 415 |
+
help="Choose which years to search"
|
| 416 |
+
)
|
| 417 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
| 418 |
+
|
| 419 |
+
# Districts filter
|
| 420 |
+
st.markdown('<div class="filter-title">🏘️ Districts</div>', unsafe_allow_html=True)
|
| 421 |
+
selected_districts = st.multiselect(
|
| 422 |
+
"Select districts:",
|
| 423 |
+
options=filter_options['districts'],
|
| 424 |
+
default=st.session_state.active_filters['districts'],
|
| 425 |
+
disabled = filename_mode,
|
| 426 |
+
key="districts_filter",
|
| 427 |
+
help="Choose which districts to search"
|
| 428 |
+
)
|
| 429 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
| 430 |
+
|
| 431 |
+
# Clear filters button
|
| 432 |
+
if st.button("🗑️ Clear All Filters", key="clear_filters_button"):
|
| 433 |
+
st.session_state.active_filters = {'sources': [], 'years': [], 'districts': [], 'filenames': []}
|
| 434 |
+
st.rerun()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 435 |
|
| 436 |
+
# Update active filters (outside expander so it always runs)
|
| 437 |
st.session_state.active_filters = {
|
| 438 |
'sources': selected_sources if not filename_mode else [],
|
| 439 |
'years': selected_years if not filename_mode else [],
|
| 440 |
'districts': selected_districts if not filename_mode else [],
|
| 441 |
'filenames': selected_filenames
|
| 442 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 443 |
|
| 444 |
# Main content area with tabs
|
| 445 |
tab1, tab2 = st.tabs(["💬 Chat", "📄 Retrieved Documents"])
|
|
|
|
| 623 |
# PipelineResult object format
|
| 624 |
sources = rag_result.sources
|
| 625 |
elif isinstance(rag_result, dict) and 'sources' in rag_result:
|
| 626 |
+
# Dictionary format from multi-agent system or visual search
|
| 627 |
sources = rag_result['sources']
|
| 628 |
|
| 629 |
# For Gemini, also check if we need to format sources from gemini_result
|
|
|
|
| 636 |
elif hasattr(st.session_state.chatbot, '_format_gemini_sources'):
|
| 637 |
sources = st.session_state.chatbot._format_gemini_sources(gemini_result)
|
| 638 |
|
| 639 |
+
# Check if this is visual search results (has visual metadata)
|
| 640 |
+
is_visual_search = False
|
| 641 |
if sources and len(sources) > 0:
|
| 642 |
+
first_doc_metadata = getattr(sources[0], 'metadata', {})
|
| 643 |
+
is_visual_search = 'num_tiles' in first_doc_metadata or 'num_visual_tokens' in first_doc_metadata
|
| 644 |
+
|
| 645 |
+
if sources and len(sources) > 0:
|
| 646 |
+
# Use visual display for visual search results
|
| 647 |
+
if is_visual_search and st.session_state.chatbot_version == "visual":
|
| 648 |
+
st.markdown("### 🎨 Visual Search Results")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 649 |
|
| 650 |
+
display_visual_search_results(
|
| 651 |
+
sources=sources,
|
| 652 |
+
show_statistics=True,
|
| 653 |
+
show_images=True, # Show Cloudinary images
|
| 654 |
+
max_display=20
|
| 655 |
+
)
|
| 656 |
else:
|
| 657 |
+
# Standard display for v1/beta results
|
| 658 |
+
# Count unique filenames
|
| 659 |
+
unique_filenames = set()
|
| 660 |
+
for doc in sources:
|
| 661 |
+
filename = getattr(doc, 'metadata', {}).get('filename', 'Unknown')
|
| 662 |
+
unique_filenames.add(filename)
|
| 663 |
+
|
| 664 |
+
st.markdown(f"**Found {len(sources)} document chunks from {len(unique_filenames)} unique documents (showing top 20):**")
|
| 665 |
+
if len(unique_filenames) < len(sources):
|
| 666 |
+
st.info(f"💡 **Note**: Each document is split into multiple chunks. You're seeing {len(sources)} chunks from {len(unique_filenames)} documents.")
|
| 667 |
+
|
| 668 |
+
# Extract and display statistics
|
| 669 |
+
stats = extract_chunk_statistics(sources)
|
| 670 |
+
|
| 671 |
+
# Show charts for 10+ results, tables for fewer
|
| 672 |
+
if len(sources) >= 10:
|
| 673 |
+
display_chunk_statistics_charts(stats, "Retrieval Statistics")
|
| 674 |
+
# Also show tables below charts for detailed view
|
| 675 |
+
st.markdown("---")
|
| 676 |
+
display_chunk_statistics_table(stats, "Retrieval Distribution")
|
| 677 |
else:
|
| 678 |
+
display_chunk_statistics_table(stats, "Retrieval Distribution")
|
| 679 |
+
|
| 680 |
+
st.markdown("---")
|
| 681 |
+
st.markdown("### 📄 Document Details")
|
| 682 |
|
| 683 |
+
for i, doc in enumerate(sources): # Show all documents
|
| 684 |
+
# Get relevance score and ID if available
|
| 685 |
metadata = getattr(doc, 'metadata', {})
|
| 686 |
+
# Handle both standard RAG scores and Gemini scores
|
| 687 |
+
score = metadata.get('reranked_score') or metadata.get('original_score') or metadata.get('score')
|
| 688 |
+
chunk_id = metadata.get('_id') or metadata.get('chunk_id', 'Unknown')
|
| 689 |
+
if score is not None:
|
| 690 |
+
try:
|
| 691 |
+
score_text = f" (Score: {float(score):.3f})"
|
| 692 |
+
except (ValueError, TypeError):
|
| 693 |
+
score_text = ""
|
| 694 |
+
else:
|
| 695 |
+
score_text = ""
|
| 696 |
+
if chunk_id and chunk_id != 'Unknown':
|
| 697 |
+
score_text += f" (ID: {str(chunk_id)[:8]}...)" if score_text else f" (ID: {str(chunk_id)[:8]}...)"
|
|
|
|
|
|
|
| 698 |
|
| 699 |
+
with st.expander(f"📄 Document {i+1}: {getattr(doc, 'metadata', {}).get('filename', 'Unknown')[:50]}...{score_text}"):
|
| 700 |
+
# Display document metadata with emojis
|
| 701 |
+
metadata = getattr(doc, 'metadata', {})
|
| 702 |
+
col1, col2, col3, col4 = st.columns([2, 1.5, 1, 1])
|
| 703 |
+
|
| 704 |
+
with col1:
|
| 705 |
+
st.write(f"📄 **File:** {metadata.get('filename', 'Unknown')}")
|
| 706 |
+
with col2:
|
| 707 |
+
st.write(f"🏛️ **Source:** {metadata.get('source', 'Unknown')}")
|
| 708 |
+
with col3:
|
| 709 |
+
st.write(f"📅 **Year:** {metadata.get('year', 'Unknown')}")
|
| 710 |
+
with col4:
|
| 711 |
+
# Display page number and chunk ID
|
| 712 |
+
page = metadata.get('page_label', metadata.get('page', 'Unknown'))
|
| 713 |
+
chunk_id = metadata.get('_id', 'Unknown')
|
| 714 |
+
st.write(f"📖 **Page:** {page}")
|
| 715 |
+
st.write(f"🆔 **ID:** {chunk_id}")
|
| 716 |
+
|
| 717 |
+
# Display full content (no truncation)
|
| 718 |
+
content = getattr(doc, 'page_content', 'No content available')
|
| 719 |
+
st.write(f"**Full Content:**")
|
| 720 |
+
st.text_area("Full Content", value=content, height=300, disabled=True, label_visibility="collapsed", key=f"preview_{i}")
|
| 721 |
else:
|
| 722 |
st.info("No documents were retrieved for the last query.")
|
| 723 |
else:
|
|
|
|
| 1064 |
if idx < len(st.session_state.rag_retrieval_history):
|
| 1065 |
st.markdown("---")
|
| 1066 |
|
| 1067 |
+
# Example Questions Section - Compact layout
|
| 1068 |
st.markdown("---")
|
|
|
|
|
|
|
| 1069 |
|
| 1070 |
# Initialize example question state
|
| 1071 |
if 'custom_question_1' not in st.session_state:
|
|
|
|
| 1073 |
if 'custom_question_2' not in st.session_state:
|
| 1074 |
st.session_state.custom_question_2 = "What did the National Coordinator say about the release of funds for PDM administrative costs in the letter dated 29th September 2022 and how did the funding received affect the activities of the PDCs and PDM SACCOs in the FY 2022/23?"
|
| 1075 |
|
| 1076 |
+
# Row 1: Header on left, Question 1 (file insights) on right
|
| 1077 |
+
header_col, q1_col = st.columns([1, 2])
|
| 1078 |
+
|
| 1079 |
+
with header_col:
|
| 1080 |
+
st.markdown("### 💡 Example Questions")
|
| 1081 |
+
st.caption(" Click **Use ...** or edit")
|
| 1082 |
+
|
| 1083 |
+
with q1_col:
|
| 1084 |
example_q1 = "List couple of insights from the filename."
|
| 1085 |
+
st.markdown("**📄 File Insights** _(select a file first)_")
|
| 1086 |
+
q1_inner1, q1_inner2 = st.columns([3, 1])
|
| 1087 |
+
with q1_inner1:
|
| 1088 |
+
st.code(example_q1, language=None)
|
| 1089 |
+
with q1_inner2:
|
| 1090 |
+
if st.button("📋 Use question !", key="use_example_1", use_container_width=True):
|
| 1091 |
+
st.session_state.pending_question = example_q1
|
| 1092 |
+
st.session_state.input_counter = (st.session_state.get('input_counter', 0) + 1) % 1000
|
| 1093 |
+
st.rerun()
|
| 1094 |
|
| 1095 |
st.markdown("---")
|
| 1096 |
|
| 1097 |
+
# Row 2: Questions 2 & 3 side by side
|
| 1098 |
+
st.markdown("#### ✏️ Customizable Questions")
|
| 1099 |
+
q_col1, q_col2 = st.columns(2)
|
| 1100 |
|
| 1101 |
+
# Question 2 - Left column (will trigger follow-up)
|
| 1102 |
+
with q_col1:
|
| 1103 |
+
st.caption("🔄 _This question will trigger follow-up prompts for year/district_")
|
| 1104 |
+
custom_q1 = st.text_area(
|
| 1105 |
+
"Question 2:",
|
| 1106 |
+
value=st.session_state.custom_question_1,
|
| 1107 |
+
height=100,
|
| 1108 |
+
key="edit_question_2",
|
| 1109 |
+
help="Modify this question to fit your needs"
|
| 1110 |
+
)
|
|
|
|
| 1111 |
if st.button("📋 Use Question 2", key="use_custom_1", use_container_width=True):
|
| 1112 |
if custom_q1.strip():
|
| 1113 |
st.session_state.pending_question = custom_q1.strip()
|
|
|
|
| 1116 |
st.rerun()
|
| 1117 |
else:
|
| 1118 |
st.warning("Please enter a question first!")
|
|
|
|
|
|
|
| 1119 |
|
| 1120 |
+
# Question 3 - Right column (has all info, no follow-up)
|
| 1121 |
+
with q_col2:
|
| 1122 |
+
st.caption("✅ _Complete question - has year & context, no follow-up needed_")
|
| 1123 |
+
custom_q2 = st.text_area(
|
| 1124 |
+
"Question 3:",
|
| 1125 |
+
value=st.session_state.custom_question_2,
|
| 1126 |
+
height=100,
|
| 1127 |
+
key="edit_question_3",
|
| 1128 |
+
help="Modify this question to fit your needs"
|
| 1129 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1130 |
if st.button("📋 Use Question 3", key="use_custom_2", use_container_width=True):
|
| 1131 |
if custom_q2.strip():
|
| 1132 |
st.session_state.pending_question = custom_q2.strip()
|
|
|
|
| 1135 |
st.rerun()
|
| 1136 |
else:
|
| 1137 |
st.warning("Please enter a question first!")
|
|
|
|
|
|
|
| 1138 |
|
| 1139 |
|
| 1140 |
# Store selected question for next render (handled in input section above)
|
|
|
|
| 1175 |
print("=" * 80)
|
| 1176 |
import sys
|
| 1177 |
sys.exit(1)
|
| 1178 |
+
main()
|
|
|