Spaces:

akryldigital
/

audit_assistant

Sleeping

App Files Files Community

akryldigital commited on 18 days ago

Commit

27e8dcc

verified ·

1 Parent(s): fa33a8f

update UI

Browse files

Files changed (1) hide show

app.py +235 -193

app.py CHANGED Viewed

@@ -3,6 +3,11 @@ Intelligent Audit Report Chatbot UI
 """
 import os
 import time
 import json
@@ -21,9 +26,21 @@ import plotly.express as px
 from langchain_core.messages import HumanMessage, AIMessage
-from src.agents import get_multi_agent_chatbot, get_smart_chatbot, get_gemini_chatbot
 from src.feedback import FeedbackManager
-from src.ui_components import get_custom_css, display_chunk_statistics_charts, display_chunk_statistics_table, extract_chunk_statistics
 from src.config.paths import (
     IS_DEPLOYED,
@@ -83,10 +100,11 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
 logger = logging.getLogger(__name__)
 # Log environment setup for debugging
-logger.info(f"📁 PROJECT_DIR: {PROJECT_DIR}")
-logger.info(f"🌍 Environment: {'DEPLOYED' if IS_DEPLOYED else 'LOCAL'}")
-logger.info(f"🔧 OMP_NUM_THREADS: {os.environ.get('OMP_NUM_THREADS', 'NOT SET')}")
-logger.info(f"📁 HuggingFace cache: {os.environ.get('HF_HOME', 'DEFAULT (not overridden)')}")
 # Page config
@@ -98,21 +116,22 @@ st.set_page_config(
 )
 import torch, sys
-try:
-    cuda_ = torch.cuda.is_available()
-    print("CUDA:", cuda_)
-    if cuda_:
-        if "gpu_check" not in st.session_state:
-            st.write(f"Device: {torch.cuda.get_device_name(0)}")
-        print("Device:", torch.cuda.get_device_name(0))
-except Exception as e:
-    if "gpu_check" not in st.session_state:
-        st.write(f"GPU check skipped: {e.__str__()}")
-    traceback.print_exc()
-    print("GPU check skipped:", e, file=sys.stderr)
-finally:
-    st.session_state.gpu_check = True
 st.markdown(get_custom_css(), unsafe_allow_html=True)
@@ -130,6 +149,9 @@ def get_chatbot(version: str = "v1"):
     """Initialize and return the chatbot based on version"""
     if version == "beta":
         return get_gemini_chatbot()
     else:
         # Check environment variable for system type (v1)
         system = os.environ.get('CHATBOT_SYSTEM', 'multi-agent')
@@ -209,7 +231,7 @@ def main():
     # Track RAG retrieval history for feedback
     if 'rag_retrieval_history' not in st.session_state:
         st.session_state.rag_retrieval_history = []
-    # Version selection (v1 or beta)
     if 'chatbot_version' not in st.session_state:
         st.session_state.chatbot_version = "v1"
@@ -226,7 +248,9 @@ def main():
         try:
             # Different spinner messages for different versions
             if st.session_state.chatbot_version == "beta":
-                spinner_msg = "🔄 Initializing Gemini FSA"
             else:
                 spinner_msg = "🔄 Loading AI models and connecting to database..."
@@ -237,9 +261,14 @@ def main():
             print("✅ AI system ready!")
         except Exception as e:
             st.error(f"❌ Failed to initialize chatbot: {str(e)}")
-            # Only show Gemini-specific error message for beta version
             if st.session_state.chatbot_version == "beta":
                 st.error("Please check your environment variables (GEMINI_API_KEY, GEMINI_FILESTORE_NAME for beta)")
             else:
                 st.error("Please check your configuration and ensure all required models and databases are accessible.")
             # Reset to v1 to prevent infinite loop
@@ -271,11 +300,11 @@ def main():
         st.markdown("<br>", unsafe_allow_html=True)  # Add some spacing
         selected_version = st.radio(
             "**Version:**",
-            options=["v1", "beta"],
-            index=0 if st.session_state.chatbot_version == "v1" else 1,
             horizontal=True,
             key="version_selector",
-            help="Select v1 (default RAG system) or beta (Gemini FSA)"
         )
         # Update version if changed
@@ -299,6 +328,8 @@ def main():
     # Show version info
     if st.session_state.chatbot_version == "beta":
         st.info("🔬 **Beta Mode**: Using Google Gemini FSA")
     # Session info
     duration = int(time.time() - st.session_state.session_start_time)
@@ -315,7 +346,7 @@ def main():
     # Sidebar for filters
     with st.sidebar:
         # Instructions section (collapsible)
-        with st.expander("📖 How to Use", expanded=False):
             st.markdown("""
             #### 🎯 Using Filters
@@ -342,74 +373,73 @@ def main():
             For more detailed help, see the example questions at the bottom of the page.
             """)
-        st.markdown("### 🔍 Search Filters")
-        st.markdown("Select filters to narrow down your search. Leave empty to search all data.")
-        st.markdown('<div class="filter-section">', unsafe_allow_html=True)
-        st.markdown('<div class="filter-title">📄 Specific Reports (Filename Filter)</div>', unsafe_allow_html=True)
-        st.markdown('<p style="font-size: 0.85em; color: #666;">⚠️ Selecting specific reports will ignore all other filters</p>', unsafe_allow_html=True)
-        selected_filenames = st.multiselect(
-                "Select specific reports:",
-                options=filter_options.get('filenames', []),
-                default=st.session_state.active_filters.get('filenames', []),
-                key="filenames_filter",
-                help="Choose specific reports to search. When enabled, all other filters are ignored."
             )
-        st.markdown('</div>', unsafe_allow_html=True)
-        # Determine if filename filter is active
-        filename_mode = len(selected_filenames) > 0
-        # Sources filter
-        # st.markdown('<div class="filter-section">', unsafe_allow_html=True)
-        st.markdown('<div class="filter-title">📊 Sources</div>', unsafe_allow_html=True)
-        selected_sources = st.multiselect(
-            "Select sources:",
-            options=filter_options['sources'],
-            default=st.session_state.active_filters['sources'],
-            disabled = filename_mode,
-            key="sources_filter",
-            help="Choose which types of reports to search"
-        )
-        st.markdown('</div>', unsafe_allow_html=True)
-        # Years filter
-        # st.markdown('<div class="filter-section">', unsafe_allow_html=True)
-        st.markdown('<div class="filter-title">📅 Years</div>', unsafe_allow_html=True)
-        selected_years = st.multiselect(
-            "Select years:",
-            options=filter_options['years'],
-            default=st.session_state.active_filters['years'],
-            disabled = filename_mode,
-            key="years_filter",
-            help="Choose which years to search"
-        )
-        st.markdown('</div>', unsafe_allow_html=True)
-        # Districts filter
-        # st.markdown('<div class="filter-section">', unsafe_allow_html=True)
-        st.markdown('<div class="filter-title">🏘️ Districts</div>', unsafe_allow_html=True)
-        selected_districts = st.multiselect(
-            "Select districts:",
-            options=filter_options['districts'],
-            default=st.session_state.active_filters['districts'],
-            disabled = filename_mode,
-            key="districts_filter",
-            help="Choose which districts to search"
-        )
-        st.markdown('</div>', unsafe_allow_html=True)
-        # Update active filters
         st.session_state.active_filters = {
             'sources': selected_sources if not filename_mode else [],
             'years': selected_years if not filename_mode else [],
             'districts': selected_districts if not filename_mode else [],
             'filenames': selected_filenames
         }
-        # Clear filters button
-        if st.button("🗑️ Clear All Filters", key="clear_filters_button"):
-            st.session_state.active_filters = {'sources': [], 'years': [], 'districts': [], 'filenames': []}
-            st.rerun()
     # Main content area with tabs
     tab1, tab2 = st.tabs(["💬 Chat", "📄 Retrieved Documents"])
@@ -593,7 +623,7 @@ def main():
                 # PipelineResult object format
                 sources = rag_result.sources
             elif isinstance(rag_result, dict) and 'sources' in rag_result:
-                # Dictionary format from multi-agent system
                 sources = rag_result['sources']
             # For Gemini, also check if we need to format sources from gemini_result
@@ -606,70 +636,88 @@ def main():
                     elif hasattr(st.session_state.chatbot, '_format_gemini_sources'):
                         sources = st.session_state.chatbot._format_gemini_sources(gemini_result)
             if sources and len(sources) > 0:
-                # Count unique filenames
-                unique_filenames = set()
-                for doc in sources:
-                    filename = getattr(doc, 'metadata', {}).get('filename', 'Unknown')
-                    unique_filenames.add(filename)
-                st.markdown(f"**Found {len(sources)} document chunks from {len(unique_filenames)} unique documents (showing top 20):**")
-                if len(unique_filenames) < len(sources):
-                    st.info(f"💡 **Note**: Each document is split into multiple chunks. You're seeing {len(sources)} chunks from {len(unique_filenames)} documents.")
-                # Extract and display statistics
-                stats = extract_chunk_statistics(sources)
-                # Show charts for 10+ results, tables for fewer
-                if len(sources) >= 10:
-                    display_chunk_statistics_charts(stats, "Retrieval Statistics")
-                    # Also show tables below charts for detailed view
-                    st.markdown("---")
-                    display_chunk_statistics_table(stats, "Retrieval Distribution")
                 else:
-                    display_chunk_statistics_table(stats, "Retrieval Distribution")
-                st.markdown("---")
-                st.markdown("### 📄 Document Details")
-                for i, doc in enumerate(sources):  # Show all documents
-                    # Get relevance score and ID if available
-                    metadata = getattr(doc, 'metadata', {})
-                    # Handle both standard RAG scores and Gemini scores
-                    score = metadata.get('reranked_score') or metadata.get('original_score') or metadata.get('score')
-                    chunk_id = metadata.get('_id') or metadata.get('chunk_id', 'Unknown')
-                    if score is not None:
-                        try:
-                            score_text = f" (Score: {float(score):.3f})"
-                        except (ValueError, TypeError):
-                            score_text = ""
                     else:
-                        score_text = ""
-                    if chunk_id and chunk_id != 'Unknown':
-                        score_text += f" (ID: {str(chunk_id)[:8]}...)" if score_text else f" (ID: {str(chunk_id)[:8]}...)"
-                    with st.expander(f"📄 Document {i+1}: {getattr(doc, 'metadata', {}).get('filename', 'Unknown')[:50]}...{score_text}"):
-                        # Display document metadata with emojis
                         metadata = getattr(doc, 'metadata', {})
-                        col1, col2, col3, col4 = st.columns([2, 1.5, 1, 1])
-                        with col1:
-                            st.write(f"📄 **File:** {metadata.get('filename', 'Unknown')}")
-                        with col2:
-                            st.write(f"🏛️ **Source:** {metadata.get('source', 'Unknown')}")
-                        with col3:
-                            st.write(f"📅 **Year:** {metadata.get('year', 'Unknown')}")
-                        with col4:
-                            # Display page number and chunk ID
-                            page = metadata.get('page_label', metadata.get('page', 'Unknown'))
-                            chunk_id = metadata.get('_id', 'Unknown')
-                            st.write(f"📖 **Page:** {page}")
-                            st.write(f"🆔 **ID:** {chunk_id}")
-                        # Display full content (no truncation)
-                        content = getattr(doc, 'page_content', 'No content available')
-                        st.write(f"**Full Content:**")
-                        st.text_area("Full Content", value=content, height=300, disabled=True, label_visibility="collapsed", key=f"preview_{i}")
             else:
                 st.info("No documents were retrieved for the last query.")
         else:
@@ -1016,10 +1064,8 @@ def main():
                 if idx < len(st.session_state.rag_retrieval_history):
                     st.markdown("---")
-    # Example Questions Section
     st.markdown("---")
-    st.markdown("### 💡 Example Questions")
-    st.markdown("Click on any question below to use it, or modify the editable examples:")
     # Initialize example question state
     if 'custom_question_1' not in st.session_state:
@@ -1027,35 +1073,41 @@ def main():
     if 'custom_question_2' not in st.session_state:
         st.session_state.custom_question_2 = "What did the National Coordinator say about the release of funds for PDM administrative costs in the letter dated 29th September 2022 and how did the funding received affect the activities of the PDCs and PDM SACCOs in the FY 2022/23?"
-    # Question 1: Filename insights (fixed, clickable)
-    st.markdown("#### 📄 Question 1: List insights from a specific file")
-    col1, col2 = st.columns([3, 1])
-    with col1:
         example_q1 = "List couple of insights from the filename."
-        st.markdown(f"**Example:** `{example_q1}`")
-        st.info("💡 **Filter to apply:** Select a Filename from the sidebar panel before asking this question.")
-    with col2:
-        if st.button("📋 Use This Question", key="use_example_1", use_container_width=True):
-            st.session_state.pending_question = example_q1
-            st.session_state.input_counter = (st.session_state.get('input_counter', 0) + 1) % 1000
-            st.rerun()
     st.markdown("---")
-    # Questions 2 & 3: Editable examples
-    st.markdown("#### ✏️ Customizable Questions (Edit and use)")
-    # Question 2
-    # st.markdown("**Question 2:**")
-    custom_q1 = st.text_area(
-        "Edit question 2:",
-        value=st.session_state.custom_question_1,
-        height=80,
-        key="edit_question_2",
-        help="Modify this question to fit your needs, then click 'Use This Question'"
-    )
-    col1, col2 = st.columns([1, 4])
-    with col1:
         if st.button("📋 Use Question 2", key="use_custom_1", use_container_width=True):
             if custom_q1.strip():
                 st.session_state.pending_question = custom_q1.strip()
@@ -1064,24 +1116,17 @@ def main():
                 st.rerun()
             else:
                 st.warning("Please enter a question first!")
-    with col2:
-        st.caption("💡 Tip: Add specific details like dates, names, or amounts to get more precise answers")
-    st.info("💡 **Filter to apply:** Select District(s) and Year(s) sidebar panel before asking this question.")
-    st.markdown("---")
-    # Question 3
-    # st.markdown("**Question 3:**")
-    custom_q2 = st.text_area(
-        "Edit question 3:",
-        value=st.session_state.custom_question_2,
-        height=80,
-        key="edit_question_3",
-        help="Modify this question to fit your needs, then click 'Use This Question'"
-    )
-    col1, col2 = st.columns([1, 4])
-    with col1:
         if st.button("📋 Use Question 3", key="use_custom_2", use_container_width=True):
             if custom_q2.strip():
                 st.session_state.pending_question = custom_q2.strip()
@@ -1090,8 +1135,6 @@ def main():
                 st.rerun()
             else:
                 st.warning("Please enter a question first!")
-    with col2:
-        st.caption("💡 Tip: Use specific terms from the documents (e.g., 'PDM', 'SACCOs', 'FY 2022/23')")
     # Store selected question for next render (handled in input section above)
@@ -1132,5 +1175,4 @@ if __name__ == "__main__":
         print("=" * 80)
         import sys
         sys.exit(1)
-    main()

 """
 import os
+import warnings
+# Silence Streamlit deprecation warnings (use_column_width -> use_container_width)
+warnings.filterwarnings("ignore", message=".*use_column_width.*")
+warnings.filterwarnings("ignore", category=DeprecationWarning, module="streamlit")
 import time
 import json
 from langchain_core.messages import HumanMessage, AIMessage
+from src.agents import (
+    get_multi_agent_chatbot,
+    get_smart_chatbot,
+    get_gemini_chatbot,
+    get_visual_chatbot,
+    get_visual_multi_agent_chatbot
+)
 from src.feedback import FeedbackManager
+from src.ui_components import (
+    get_custom_css,
+    display_chunk_statistics_charts,
+    display_chunk_statistics_table,
+    extract_chunk_statistics,
+    display_visual_search_results
+)
 from src.config.paths import (
     IS_DEPLOYED,
 logger = logging.getLogger(__name__)
 # Log environment setup for debugging
+# Informational logs (commented out to reduce noise)
+# logger.info(f"📁 PROJECT_DIR: {PROJECT_DIR}")
+# logger.info(f"🌍 Environment: {'DEPLOYED' if IS_DEPLOYED else 'LOCAL'}")
+# logger.info(f"🔧 OMP_NUM_THREADS: {os.environ.get('OMP_NUM_THREADS', 'NOT SET')}")
+# logger.info(f"📁 HuggingFace cache: {os.environ.get('HF_HOME', 'DEFAULT (not overridden)')}")
 # Page config
 )
+# GPU check - only log once at startup
 import torch, sys
+if "gpu_check" not in st.session_state:
+    try:
+        cuda_ = torch.cuda.is_available()
+        mps_ = torch.backends.mps.is_available() if hasattr(torch.backends, 'mps') else False
+        if cuda_:
+            print(f"🎮 CUDA available: {torch.cuda.get_device_name(0)}")
+        elif mps_:
+            print("🍎 MPS (Apple Silicon) available")
+        else:
+            print("💻 CPU only (no GPU acceleration)")
+    except Exception as e:
+        print(f"⚠️ GPU check error: {e}", file=sys.stderr)
+    finally:
+        st.session_state.gpu_check = True
 st.markdown(get_custom_css(), unsafe_allow_html=True)
     """Initialize and return the chatbot based on version"""
     if version == "beta":
         return get_gemini_chatbot()
+    elif version == "visual":
+        # Use multi-agent architecture for visual mode (same sophisticated logic as v1)
+        return get_visual_multi_agent_chatbot()
     else:
         # Check environment variable for system type (v1)
         system = os.environ.get('CHATBOT_SYSTEM', 'multi-agent')
     # Track RAG retrieval history for feedback
     if 'rag_retrieval_history' not in st.session_state:
         st.session_state.rag_retrieval_history = []
+    # Version selection (v1, beta, or visual)
     if 'chatbot_version' not in st.session_state:
         st.session_state.chatbot_version = "v1"
         try:
             # Different spinner messages for different versions
             if st.session_state.chatbot_version == "beta":
+                spinner_msg = "🔄 Initializing Gemini FSA..."
+            elif st.session_state.chatbot_version == "visual":
+                spinner_msg = "🎨 Initializing Visual Search ... This may take 20-30 seconds..."
             else:
                 spinner_msg = "🔄 Loading AI models and connecting to database..."
             print("✅ AI system ready!")
         except Exception as e:
             st.error(f"❌ Failed to initialize chatbot: {str(e)}")
+            # Show version-specific error messages
             if st.session_state.chatbot_version == "beta":
                 st.error("Please check your environment variables (GEMINI_API_KEY, GEMINI_FILESTORE_NAME for beta)")
+            elif st.session_state.chatbot_version == "visual":
+                st.error("Please check your environment variables (QDRANT_URL, QDRANT_API_KEY, OPENAI_API_KEY for visual)")
+                with st.expander("🐛 Debug Info"):
+                    import traceback
+                    st.code(traceback.format_exc())
             else:
                 st.error("Please check your configuration and ensure all required models and databases are accessible.")
             # Reset to v1 to prevent infinite loop
         st.markdown("<br>", unsafe_allow_html=True)  # Add some spacing
         selected_version = st.radio(
             "**Version:**",
+            options=["v1", "visual", "beta"],
+            index=0 if st.session_state.chatbot_version == "v1" else (1 if st.session_state.chatbot_version == "visual" else 2),
             horizontal=True,
             key="version_selector",
+            help="Select v1 (default RAG), visual (ColPali visual search), or beta (Gemini FSA)"
         )
         # Update version if changed
     # Show version info
     if st.session_state.chatbot_version == "beta":
         st.info("🔬 **Beta Mode**: Using Google Gemini FSA")
+    elif st.session_state.chatbot_version == "visual":
+        st.info("🎨 **Visual Mode**: Using Visual Search (Multi-Modal Embeddings)")
     # Session info
     duration = int(time.time() - st.session_state.session_start_time)
     # Sidebar for filters
     with st.sidebar:
         # Instructions section (collapsible)
+        with st.expander("📖 How to Use", expanded=True):
             st.markdown("""
             #### 🎯 Using Filters
             For more detailed help, see the example questions at the bottom of the page.
             """)
+        # Filters in a collapsed expander by default
+        with st.expander("🔍 Search Filters", expanded=False):
+            st.caption("Select filters to narrow down your search. Leave empty to search all data.")
+            st.markdown('<div class="filter-section">', unsafe_allow_html=True)
+            st.markdown('<div class="filter-title">📄 Specific Reports (Filename Filter)</div>', unsafe_allow_html=True)
+            st.markdown('<p style="font-size: 0.85em; color: #666;">⚠️ Selecting specific reports will ignore all other filters</p>', unsafe_allow_html=True)
+            selected_filenames = st.multiselect(
+                    "Select specific reports:",
+                    options=filter_options.get('filenames', []),
+                    default=st.session_state.active_filters.get('filenames', []),
+                    key="filenames_filter",
+                    help="Choose specific reports to search. When enabled, all other filters are ignored."
+                )
+            st.markdown('</div>', unsafe_allow_html=True)
+            # Determine if filename filter is active
+            filename_mode = len(selected_filenames) > 0
+            # Sources filter
+            st.markdown('<div class="filter-title">📊 Sources</div>', unsafe_allow_html=True)
+            selected_sources = st.multiselect(
+                "Select sources:",
+                options=filter_options['sources'],
+                default=st.session_state.active_filters['sources'],
+                disabled = filename_mode,
+                key="sources_filter",
+                help="Choose which types of reports to search"
             )
+            st.markdown('</div>', unsafe_allow_html=True)
+            # Years filter
+            st.markdown('<div class="filter-title">📅 Years</div>', unsafe_allow_html=True)
+            selected_years = st.multiselect(
+                "Select years:",
+                options=filter_options['years'],
+                default=st.session_state.active_filters['years'],
+                disabled = filename_mode,
+                key="years_filter",
+                help="Choose which years to search"
+            )
+            st.markdown('</div>', unsafe_allow_html=True)
+            # Districts filter
+            st.markdown('<div class="filter-title">🏘️ Districts</div>', unsafe_allow_html=True)
+            selected_districts = st.multiselect(
+                "Select districts:",
+                options=filter_options['districts'],
+                default=st.session_state.active_filters['districts'],
+                disabled = filename_mode,
+                key="districts_filter",
+                help="Choose which districts to search"
+            )
+            st.markdown('</div>', unsafe_allow_html=True)
+            # Clear filters button
+            if st.button("🗑️ Clear All Filters", key="clear_filters_button"):
+                st.session_state.active_filters = {'sources': [], 'years': [], 'districts': [], 'filenames': []}
+                st.rerun()
+        # Update active filters (outside expander so it always runs)
         st.session_state.active_filters = {
             'sources': selected_sources if not filename_mode else [],
             'years': selected_years if not filename_mode else [],
             'districts': selected_districts if not filename_mode else [],
             'filenames': selected_filenames
         }
     # Main content area with tabs
     tab1, tab2 = st.tabs(["💬 Chat", "📄 Retrieved Documents"])
                 # PipelineResult object format
                 sources = rag_result.sources
             elif isinstance(rag_result, dict) and 'sources' in rag_result:
+                # Dictionary format from multi-agent system or visual search
                 sources = rag_result['sources']
             # For Gemini, also check if we need to format sources from gemini_result
                     elif hasattr(st.session_state.chatbot, '_format_gemini_sources'):
                         sources = st.session_state.chatbot._format_gemini_sources(gemini_result)
+            # Check if this is visual search results (has visual metadata)
+            is_visual_search = False
             if sources and len(sources) > 0:
+                first_doc_metadata = getattr(sources[0], 'metadata', {})
+                is_visual_search = 'num_tiles' in first_doc_metadata or 'num_visual_tokens' in first_doc_metadata
+            if sources and len(sources) > 0:
+                # Use visual display for visual search results
+                if is_visual_search and st.session_state.chatbot_version == "visual":
+                    st.markdown("### 🎨 Visual Search Results")
+                    display_visual_search_results(
+                        sources=sources,
+                        show_statistics=True,
+                        show_images=True,  # Show Cloudinary images
+                        max_display=20
+                    )
                 else:
+                    # Standard display for v1/beta results
+                    # Count unique filenames
+                    unique_filenames = set()
+                    for doc in sources:
+                        filename = getattr(doc, 'metadata', {}).get('filename', 'Unknown')
+                        unique_filenames.add(filename)
+                    st.markdown(f"**Found {len(sources)} document chunks from {len(unique_filenames)} unique documents (showing top 20):**")
+                    if len(unique_filenames) < len(sources):
+                        st.info(f"💡 **Note**: Each document is split into multiple chunks. You're seeing {len(sources)} chunks from {len(unique_filenames)} documents.")
+                    # Extract and display statistics
+                    stats = extract_chunk_statistics(sources)
+                    # Show charts for 10+ results, tables for fewer
+                    if len(sources) >= 10:
+                        display_chunk_statistics_charts(stats, "Retrieval Statistics")
+                        # Also show tables below charts for detailed view
+                        st.markdown("---")
+                        display_chunk_statistics_table(stats, "Retrieval Distribution")
                     else:
+                        display_chunk_statistics_table(stats, "Retrieval Distribution")
+                    st.markdown("---")
+                    st.markdown("### 📄 Document Details")
+                    for i, doc in enumerate(sources):  # Show all documents
+                        # Get relevance score and ID if available
                         metadata = getattr(doc, 'metadata', {})
+                        # Handle both standard RAG scores and Gemini scores
+                        score = metadata.get('reranked_score') or metadata.get('original_score') or metadata.get('score')
+                        chunk_id = metadata.get('_id') or metadata.get('chunk_id', 'Unknown')
+                        if score is not None:
+                            try:
+                                score_text = f" (Score: {float(score):.3f})"
+                            except (ValueError, TypeError):
+                                score_text = ""
+                        else:
+                            score_text = ""
+                        if chunk_id and chunk_id != 'Unknown':
+                            score_text += f" (ID: {str(chunk_id)[:8]}...)" if score_text else f" (ID: {str(chunk_id)[:8]}...)"
+                        with st.expander(f"📄 Document {i+1}: {getattr(doc, 'metadata', {}).get('filename', 'Unknown')[:50]}...{score_text}"):
+                            # Display document metadata with emojis
+                            metadata = getattr(doc, 'metadata', {})
+                            col1, col2, col3, col4 = st.columns([2, 1.5, 1, 1])
+                            with col1:
+                                st.write(f"📄 **File:** {metadata.get('filename', 'Unknown')}")
+                            with col2:
+                                st.write(f"🏛️ **Source:** {metadata.get('source', 'Unknown')}")
+                            with col3:
+                                st.write(f"📅 **Year:** {metadata.get('year', 'Unknown')}")
+                            with col4:
+                                # Display page number and chunk ID
+                                page = metadata.get('page_label', metadata.get('page', 'Unknown'))
+                                chunk_id = metadata.get('_id', 'Unknown')
+                                st.write(f"📖 **Page:** {page}")
+                                st.write(f"🆔 **ID:** {chunk_id}")
+                            # Display full content (no truncation)
+                            content = getattr(doc, 'page_content', 'No content available')
+                            st.write(f"**Full Content:**")
+                            st.text_area("Full Content", value=content, height=300, disabled=True, label_visibility="collapsed", key=f"preview_{i}")
             else:
                 st.info("No documents were retrieved for the last query.")
         else:
                 if idx < len(st.session_state.rag_retrieval_history):
                     st.markdown("---")
+    # Example Questions Section - Compact layout
     st.markdown("---")
     # Initialize example question state
     if 'custom_question_1' not in st.session_state:
     if 'custom_question_2' not in st.session_state:
         st.session_state.custom_question_2 = "What did the National Coordinator say about the release of funds for PDM administrative costs in the letter dated 29th September 2022 and how did the funding received affect the activities of the PDCs and PDM SACCOs in the FY 2022/23?"
+    # Row 1: Header on left, Question 1 (file insights) on right
+    header_col, q1_col = st.columns([1, 2])
+    with header_col:
+        st.markdown("### 💡 Example Questions")
+        st.caption("      Click **Use ...** or edit")
+    with q1_col:
         example_q1 = "List couple of insights from the filename."
+        st.markdown("**📄 File Insights** _(select a file first)_")
+        q1_inner1, q1_inner2 = st.columns([3, 1])
+        with q1_inner1:
+            st.code(example_q1, language=None)
+        with q1_inner2:
+            if st.button("📋 Use question !", key="use_example_1", use_container_width=True):
+                st.session_state.pending_question = example_q1
+                st.session_state.input_counter = (st.session_state.get('input_counter', 0) + 1) % 1000
+                st.rerun()
     st.markdown("---")
+    # Row 2: Questions 2 & 3 side by side
+    st.markdown("#### ✏️ Customizable Questions")
+    q_col1, q_col2 = st.columns(2)
+    # Question 2 - Left column (will trigger follow-up)
+    with q_col1:
+        st.caption("🔄 _This question will trigger follow-up prompts for year/district_")
+        custom_q1 = st.text_area(
+            "Question 2:",
+            value=st.session_state.custom_question_1,
+            height=100,
+            key="edit_question_2",
+            help="Modify this question to fit your needs"
+        )
         if st.button("📋 Use Question 2", key="use_custom_1", use_container_width=True):
             if custom_q1.strip():
                 st.session_state.pending_question = custom_q1.strip()
                 st.rerun()
             else:
                 st.warning("Please enter a question first!")
+    # Question 3 - Right column (has all info, no follow-up)
+    with q_col2:
+        st.caption("✅ _Complete question - has year & context, no follow-up needed_")
+        custom_q2 = st.text_area(
+            "Question 3:",
+            value=st.session_state.custom_question_2,
+            height=100,
+            key="edit_question_3",
+            help="Modify this question to fit your needs"
+        )
         if st.button("📋 Use Question 3", key="use_custom_2", use_container_width=True):
             if custom_q2.strip():
                 st.session_state.pending_question = custom_q2.strip()
                 st.rerun()
             else:
                 st.warning("Please enter a question first!")
     # Store selected question for next render (handled in input section above)
         print("=" * 80)
         import sys
         sys.exit(1)
+    main()