Spaces:

jmzlx
/

dd-poc

Sleeping

App Files Files Community

Juan Salas commited on Sep 16, 2025

Commit

1ececa6

1 Parent(s): 52ef528

Fixed agent citations and progress tracking

Browse files

Files changed (7) hide show

app/ai/citation_manager.py +24 -19
app/handlers/ai_handler.py +5 -65
app/ui/session_manager.py +2 -1
app/ui/tabs/company_analysis_tab.py +39 -47
app/ui/tabs/qa_tab.py +21 -5
app/ui/tabs/questions_tab.py +0 -5
app/ui/ui_components.py +77 -154

app/ai/citation_manager.py CHANGED Viewed

@@ -46,12 +46,8 @@ class CitationManager:
     def format_report_with_citations(self, report_text: str, tool_citations: Dict[str, List[Dict[str, Any]]]) -> Tuple[str, List[Dict[str, Any]]]:
         """Format report text with inline download links instead of numbered citations"""
-        # DEBUG: Log input
-        logger.info(f"FORMAT_REPORT_WITH_CITATIONS input: tool_citations keys={list(tool_citations.keys())}, total_citations={sum(len(citations) for citations in tool_citations.values())}")
         # Process citations from tools
         for tool_name, citations_list in tool_citations.items():
-            logger.info(f"Processing {len(citations_list)} citations from tool {tool_name}")
             for citation in citations_list:
                 self.add_citation(citation)
@@ -71,21 +67,42 @@ class CitationManager:
             # We'll use a simple format that can be processed by Streamlit
             inline_link = self._create_inline_download_link(clean_doc_name, doc_path, doc_id)
-            # Map both full and clean names to the same inline link
             doc_replacements[doc_name] = inline_link
             doc_replacements[clean_doc_name] = inline_link
         # Sort by longest document name first to avoid partial matches
         sorted_docs = sorted(doc_replacements.keys(), key=len, reverse=True)
         for doc_name in sorted_docs:
             inline_link = doc_replacements[doc_name]
             # Simple string replacement for {Document Name} format
             citation_marker = f"{{{doc_name}}}"
-            # Replace all instances of the citation marker with inline link
             formatted_text = formatted_text.replace(citation_marker, inline_link)
         # For compatibility, still return citation list (but it won't be used for bottom section)
         citation_list = []
@@ -101,9 +118,6 @@ class CitationManager:
         citation_list.sort(key=lambda x: x['id'])
-        # DEBUG: Log output
-        logger.info(f"FORMAT_REPORT_WITH_CITATIONS output: formatted_text with inline links, citation_list={len(citation_list)} items for compatibility")
         return formatted_text, citation_list
     def _create_inline_download_link(self, clean_name: str, doc_path: str, doc_id: str) -> str:
@@ -202,7 +216,6 @@ def extract_tool_citations(tools: List[Any]) -> Dict[str, List[Dict[str, Any]]]:
         if citations:
             all_citations[tool_name] = citations
     return all_citations
@@ -212,13 +225,8 @@ def create_comprehensive_report(agent_output: str, tools: List[Any], report_type
     # Extract citations from tools
     tool_citations = extract_tool_citations(tools)
-    # Debug logging
-    logger.info(f"Extracted tool citations: {len(tool_citations)} tools with citations")
     total_citations = sum(len(citations) for citations in tool_citations.values())
-    for tool_name, citations_list in tool_citations.items():
-        logger.info(f"Tool {tool_name}: {len(citations_list)} citations")
-        for i, citation in enumerate(citations_list[:2]):  # Log first 2 for debugging
-            logger.info(f"  Citation {i+1}: {citation.get('name', 'No name')} - {citation.get('excerpt', 'No excerpt')[:50]}...")
     # Create citation manager
     citation_manager = CitationManager()
@@ -243,7 +251,4 @@ def create_comprehensive_report(agent_output: str, tools: List[Any], report_type
         'total_count': total_citations
     }
-    # DEBUG: Log exactly what we're returning
-    logger.info(f"CREATE_COMPREHENSIVE_REPORT returning: final_report={final_report is not None} ({len(final_report) if final_report else 0} chars), citation_info={citation_info}")
     return final_report, citation_info

     def format_report_with_citations(self, report_text: str, tool_citations: Dict[str, List[Dict[str, Any]]]) -> Tuple[str, List[Dict[str, Any]]]:
         """Format report text with inline download links instead of numbered citations"""
         # Process citations from tools
         for tool_name, citations_list in tool_citations.items():
             for citation in citations_list:
                 self.add_citation(citation)
             # We'll use a simple format that can be processed by Streamlit
             inline_link = self._create_inline_download_link(clean_doc_name, doc_path, doc_id)
+            # Map ALL possible variations to the same inline link for more flexible matching
             doc_replacements[doc_name] = inline_link
             doc_replacements[clean_doc_name] = inline_link
+            # Also map common variations
+            # Remove common prefixes/suffixes from file names
+            base_name = doc_name
+            if '.' in base_name:
+                base_name = base_name.split('.')[0]  # Everything before first dot
+            if base_name != doc_name and base_name != clean_doc_name:
+                doc_replacements[base_name] = inline_link
+            # Also handle path-based names (just the filename part)
+            from pathlib import Path
+            if doc_path:
+                path_filename = Path(doc_path).name
+                path_clean = path_filename.replace('.pdf', '').replace('.docx', '').replace('.doc', '')
+                if path_clean not in doc_replacements:
+                    doc_replacements[path_clean] = inline_link
         # Sort by longest document name first to avoid partial matches
         sorted_docs = sorted(doc_replacements.keys(), key=len, reverse=True)
+        replacements_made = 0
         for doc_name in sorted_docs:
             inline_link = doc_replacements[doc_name]
             # Simple string replacement for {Document Name} format
             citation_marker = f"{{{doc_name}}}"
+            # Count and replace
+            before_count = formatted_text.count(citation_marker)
             formatted_text = formatted_text.replace(citation_marker, inline_link)
+            actual_replacements = before_count - formatted_text.count(citation_marker)
+            replacements_made += actual_replacements
         # For compatibility, still return citation list (but it won't be used for bottom section)
         citation_list = []
         citation_list.sort(key=lambda x: x['id'])
         return formatted_text, citation_list
     def _create_inline_download_link(self, clean_name: str, doc_path: str, doc_id: str) -> str:
         if citations:
             all_citations[tool_name] = citations
     return all_citations
     # Extract citations from tools
     tool_citations = extract_tool_citations(tools)
+    # Calculate total citations
     total_citations = sum(len(citations) for citations in tool_citations.values())
     # Create citation manager
     citation_manager = CitationManager()
         'total_count': total_citations
     }
     return final_report, citation_info

app/handlers/ai_handler.py CHANGED Viewed

@@ -483,13 +483,9 @@ IMPORTANT: You must provide a FINAL ANALYSIS REPORT in proper format, not just t
 Your final response should be a complete, well-structured report following the format specified in your instructions."""
-            # Run the comprehensive ReAct agent with progress tracking
             logger.info(f"Starting ReAct AI Agent for comprehensive due diligence analysis...")
-            # Add progress indicator for user
-            progress_placeholder = st.empty()
-            progress_placeholder.info("🧠 **AI Agent Starting:** Initializing comprehensive analysis tools...")
             # Configure recursion limit and other settings
             config = {
                 "recursion_limit": 25,  # Allow enough steps for 8-10 tool calls + comprehensive synthesis
@@ -498,66 +494,14 @@ Your final response should be a complete, well-structured report following the f
                 }
             }
-            # Update progress
-            progress_placeholder.info("🔍 **AI Agent Working:** Analyzing documents and gathering intelligence...")
             result = agent.invoke({
                 "messages": [HumanMessage(content=analysis_request)]
             }, config=config)
-            # Final progress update
-            progress_placeholder.info("📊 **AI Agent Finalizing:** Synthesizing findings and generating report...")
-            # Clear progress indicator
-            progress_placeholder.empty()
-            # Debug: Log the complete result structure
-            logger.info(f"ReAct agent result type: {type(result)}")
-            logger.info(f"ReAct agent result keys: {result.keys() if isinstance(result, dict) else 'Not a dict'}")
-            # Extract the agent's final response with enhanced debugging
             agent_output = ""
             if result and "messages" in result:
-                logger.info(f"Found {len(result['messages'])} messages in result")
-                # Log all messages for debugging with more detail
-                for i, message in enumerate(result["messages"]):
-                    msg_type = type(message).__name__
-                    has_content = hasattr(message, 'content')
-                    # Handle both string and list content types for debugging
-                    content_text = ""
-                    content_length = 0
-                    if has_content and message.content:
-                        if isinstance(message.content, list):
-                            # If content is a list, extract text parts for logging
-                            text_parts = []
-                            for item in message.content:
-                                if isinstance(item, dict) and 'text' in item:
-                                    text_parts.append(item['text'])
-                                elif isinstance(item, str):
-                                    text_parts.append(item)
-                            content_text = ' '.join(text_parts)
-                        else:
-                            content_text = str(message.content)
-                        content_length = len(content_text)
-                    logger.info(f"Message {i}: Type={msg_type}, Length={content_length}")
-                    if content_text:
-                        content_preview = content_text[:150]
-                        logger.info(f"Message {i} preview: {content_preview}...")
-                        # Check if this looks like a final report
-                        if (content_length > 500 and
-                            ('# Company Analysis' in content_text or '## Executive Summary' in content_text)):
-                            logger.info(f"Message {i} appears to be a FINAL REPORT")
-                        elif 'Analysis - ' in content_text[:50]:
-                            logger.info(f"Message {i} appears to be TOOL OUTPUT")
-                        elif content_text.startswith('I '):
-                            logger.info(f"Message {i} appears to be REASONING")
                 # Get the final analysis report (not tool outputs)
                 final_report = None
@@ -638,12 +582,8 @@ Your final response should be a complete, well-structured report following the f
                 agent_output, tools, report_type
             )
-            logger.info(f"ReAct agent analysis completed with agent output: {len(agent_output)} characters")
-            logger.info(f"Formatted report length: {len(formatted_report)} characters")
-            logger.info(f"Citation info: {citation_info}")
-            # DEBUG: Log exactly what we're about to return
-            logger.info(f"GENERATE_REACT_REPORT about to return: formatted_report={formatted_report is not None} ({len(formatted_report) if formatted_report else 0} chars), citation_info={citation_info}")
             return formatted_report, citation_info

 Your final response should be a complete, well-structured report following the format specified in your instructions."""
+            # Run the comprehensive ReAct agent
             logger.info(f"Starting ReAct AI Agent for comprehensive due diligence analysis...")
             # Configure recursion limit and other settings
             config = {
                 "recursion_limit": 25,  # Allow enough steps for 8-10 tool calls + comprehensive synthesis
                 }
             }
             result = agent.invoke({
                 "messages": [HumanMessage(content=analysis_request)]
             }, config=config)
+            # Extract the agent's final response
             agent_output = ""
             if result and "messages" in result:
+                logger.debug(f"Processing {len(result['messages'])} messages from ReAct agent")
                 # Get the final analysis report (not tool outputs)
                 final_report = None
                 agent_output, tools, report_type
             )
+            logger.info(f"ReAct agent analysis completed: {len(agent_output)} chars → {len(formatted_report)} chars formatted")
+            logger.debug(f"Citation count: {citation_info.get('total_count', 0)} citations")
             return formatted_report, citation_info

app/ui/session_manager.py CHANGED Viewed

@@ -50,6 +50,7 @@ class SessionManager:
     overview_summary = SessionProperty("")
     strategic_summary = SessionProperty("")
     strategic_company_summary = SessionProperty("")
     # Note: Citations are now inline in the strategic_company_summary content
     # User selections
@@ -109,7 +110,7 @@ class SessionManager:
         """Reset analysis results and cached data for fresh analysis."""
         self.overview_summary = ""
         self.strategic_summary = ""
-        # Note: strategic_company_summary and citations are preserved across document reprocessing
         # They are only cleared when explicitly generating new company analysis
         self.checklist_results = {}
         self.question_answers = {}

     overview_summary = SessionProperty("")
     strategic_summary = SessionProperty("")
     strategic_company_summary = SessionProperty("")
+    strategic_company_citations = SessionProperty([])  # CRITICAL FIX: Make citations persistent across reloads
     # Note: Citations are now inline in the strategic_company_summary content
     # User selections
         """Reset analysis results and cached data for fresh analysis."""
         self.overview_summary = ""
         self.strategic_summary = ""
+        # Note: strategic_company_summary and strategic_company_citations are preserved across document reprocessing
         # They are only cleared when explicitly generating new company analysis
         self.checklist_results = {}
         self.question_answers = {}

app/ui/tabs/company_analysis_tab.py CHANGED Viewed

@@ -10,7 +10,7 @@ import streamlit as st
 from typing import List, Dict, Any
 from app.ui.tabs.tab_base import TabBase
-from app.ui.ui_components import status_message, progress_status_tracker
 from app.core.logging import logger
@@ -51,27 +51,35 @@ class CompanyAnalysisTab(TabBase):
         self._set_processing_active(True)
         try:
-            # STEP 1: Prepare comprehensive context by auto-running missing analyses
-            with st.spinner("🔄 Preparing analysis context..."):
-                self._prepare_comprehensive_context()
-            # STEP 2: Generate comprehensive analysis with all available context
-            analysis_progress = progress_status_tracker()
-            analysis_steps = [
                 "Initialize AI agent",
                 "Analyze documents",
                 "Generate report",
                 "Validate citations"
             ]
-            analysis_progress.initialize(analysis_steps, "🤖 AI Agent Analysis")
             # Use vdr_store for proper vector store access
             data_room_name = getattr(self.session, 'vdr_store', None) or self._get_data_room_name()
-            analysis_progress.start_step(0, "🤖 Booting up AI ReAct Agent with advanced reasoning...")
-            analysis_progress.complete_step(0, f"🎯 AI Agent ready - targeting {data_room_name}")
-            analysis_progress.start_step(1, "🧠 AI Agent reading documents, extracting insights, reasoning about findings...")
             # Note: This step will run for the longest time, so we keep it in progress
             # Use comprehensive ReAct agent with full prepared context
@@ -84,27 +92,13 @@ class CompanyAnalysisTab(TabBase):
                 project_info={'company_name': data_room_name, 'data_room_path': self.session.data_room_path}
             )
-            analysis_progress.complete_step(1, "Document analysis completed")
-            analysis_progress.start_step(2, "Generating report...")
-            analysis_progress.complete_step(2, f"Report generated ({len(report_content) if report_content else 0} chars)")
-            analysis_progress.start_step(3, "Validating citations...")
-            # DEBUG: Log what was actually returned
-            logger.info(f"RETURNED from generate_react_report: report_content={report_content is not None} ({len(report_content) if report_content else 0} chars), citation_info={citation_info}")
-            # Validate that we have citations (they're now inline in the report)
-            if not citation_info.get('has_citations', False):
-                analysis_progress.error_step(3, "No citations found in analysis")
-                logger.error("CRITICAL: No citations found in ReAct agent analysis")
-                raise ValueError("Company analysis must include citations from source documents. No citations were found in the agent's analysis.")
-            analysis_progress.complete_step(3, f"Citations validated: {len(citation_info.get('citations', []))} sources")
             # Store comprehensive analysis and citation info for rendering
             self.session.strategic_company_summary = report_content
-            # Store citation info separately for download functionality
-            setattr(self.session, 'strategic_company_citations', citation_info.get('citations', []))
             status_message("✅ Company analysis completed successfully!", "success")
             st.rerun()
@@ -115,24 +109,21 @@ class CompanyAnalysisTab(TabBase):
             # Always reset processing state
             self._set_processing_active(False)
-    def _prepare_comprehensive_context(self):
         """Prepare comprehensive context by auto-running missing analyses and vectorizing results"""
-        # Initialize progress tracker
-        progress_tracker = progress_status_tracker()
-        # Define all steps for better progress visualization
-        steps = [
-            "Verify data room processing",
-            "Check vector store availability",
-            "Validate session data",
-            "Check strategy context",
-            "Run checklist analysis",
-            "Run Q&A analysis",
-            "Vectorize analysis results"
-        ]
-        progress_tracker.initialize(steps, "🔄 Preparing Analysis Context")
         try:
             # STEP 1: Verify data room is processed
@@ -440,6 +431,7 @@ Key Finding: {answer[:200]}...
             logger.info(f"Rendering company analysis content: {len(content)} characters")
             logger.info(f"Available citations for download: {len(citations)}")
             # Import the simple clickable file rendering function
             from app.ui.ui_components import render_content_with_clickable_citations

 from typing import List, Dict, Any
 from app.ui.tabs.tab_base import TabBase
+from app.ui.ui_components import status_message, ProgressTracker
 from app.core.logging import logger
         self._set_processing_active(True)
         try:
+            # Create single unified progress tracker for entire process
+            unified_progress = ProgressTracker()
+            all_steps = [
+                "Verify data room processing",
+                "Check vector store availability",
+                "Validate session data",
+                "Check strategy context",
+                "Run checklist analysis",
+                "Run Q&A analysis",
+                "Vectorize analysis results",
                 "Initialize AI agent",
                 "Analyze documents",
                 "Generate report",
                 "Validate citations"
             ]
+            unified_progress.initialize(all_steps, "🔄 Comprehensive Analysis")
+            # STEP 1: Prepare comprehensive context by auto-running missing analyses
+            self._prepare_comprehensive_context(unified_progress)
+            # STEP 2: Generate comprehensive analysis with all available context
             # Use vdr_store for proper vector store access
             data_room_name = getattr(self.session, 'vdr_store', None) or self._get_data_room_name()
+            unified_progress.start_step(7, "🤖 Booting up AI ReAct Agent with advanced reasoning...")
+            unified_progress.complete_step(7, f"🎯 AI Agent ready - targeting {data_room_name}")
+            unified_progress.start_step(8, "🧠 AI Agent reading documents, extracting insights, reasoning about findings...")
             # Note: This step will run for the longest time, so we keep it in progress
             # Use comprehensive ReAct agent with full prepared context
                 project_info={'company_name': data_room_name, 'data_room_path': self.session.data_room_path}
             )
             # Store comprehensive analysis and citation info for rendering
             self.session.strategic_company_summary = report_content
+            # Store citation info for download functionality
+            citations_to_store = citation_info.get('citations', []) if citation_info else []
+            self.session.strategic_company_citations = citations_to_store
             status_message("✅ Company analysis completed successfully!", "success")
             st.rerun()
             # Always reset processing state
             self._set_processing_active(False)
+    def _prepare_comprehensive_context(self, progress_tracker=None):
         """Prepare comprehensive context by auto-running missing analyses and vectorizing results"""
+        if progress_tracker is None:
+            # If no progress tracker provided, create a simple one for standalone usage
+            progress_tracker = ProgressTracker()
+            progress_tracker.initialize([
+                "Verify data room processing",
+                "Check vector store availability",
+                "Validate session data",
+                "Check strategy context",
+                "Run checklist analysis",
+                "Run Q&A analysis",
+                "Vectorize analysis results"
+            ], "🔄 Preparing Analysis Context")
         try:
             # STEP 1: Verify data room is processed
             logger.info(f"Rendering company analysis content: {len(content)} characters")
             logger.info(f"Available citations for download: {len(citations)}")
             # Import the simple clickable file rendering function
             from app.ui.ui_components import render_content_with_clickable_citations

app/ui/tabs/qa_tab.py CHANGED Viewed

@@ -145,17 +145,33 @@ class QATab:
         # Display source documents with download buttons in collapsed expanders
         for i, result in enumerate(results[:3], 1):
-            doc_source = result.get('source', 'Unknown')
-            citation = result.get('citation', '')
-            doc_title = f"{i}. {doc_source} ({citation})" if citation else f"{i}. {doc_source}"
             # Use expander to show documents collapsed by default
             with st.expander(f"📄 {doc_title}", expanded=False):
                 col1, col2 = st.columns([5, 1])
                 with col1:
-                    text_content = result.get('text', '')
                     excerpt = text_content[:500] + "..." if len(text_content) > 500 else text_content
-                    st.markdown(f"\"{excerpt}\"")
                 with col2:
                     # Only show one download button

         # Display source documents with download buttons in collapsed expanders
         for i, result in enumerate(results[:3], 1):
+            # Handle different result formats defensively
+            if not isinstance(result, dict):
+                # Result might be malformed - show debug info
+                st.error(f"Debug - Result {i} is not a dict: {type(result)} = {result}")
+                continue
+            doc_source = result.get('source', result.get('name', 'Unknown Document'))
+            score = result.get('score', 0.0)
+            citation = f"Score: {score:.3f}"
+            doc_title = f"{i}. {doc_source} ({citation})"
             # Use expander to show documents collapsed by default
             with st.expander(f"📄 {doc_title}", expanded=False):
                 col1, col2 = st.columns([5, 1])
                 with col1:
+                    # Handle different result formats defensively
+                    text_content = result.get('text', result.get('document', result.get('content', '')))
+                    if not text_content and isinstance(result, dict):
+                        # Debug: show the actual structure if text is missing
+                        st.error(f"Debug - Result structure: {list(result.keys())}")
+                        text_content = str(result.get('page_content', result))
                     excerpt = text_content[:500] + "..." if len(text_content) > 500 else text_content
+                    if excerpt:
+                        st.markdown(f"\"{excerpt}\"")
+                    else:
+                        st.warning("No text content available for this document")
                 with col2:
                     # Only show one download button

app/ui/tabs/questions_tab.py CHANGED Viewed

@@ -99,7 +99,6 @@ class QuestionsTab:
                     from pathlib import Path
                     # Step 1: Load pre-parsed questions (no LLM needed)
-                    st.info("📋 Loading pre-parsed questions...")
                     # Extract filename from questions path
                     if hasattr(self.session, 'questions_path') and self.session.questions_path:
@@ -109,17 +108,13 @@ class QuestionsTab:
                     questions = load_prebuilt_questions(questions_filename)
                     self.session.questions = questions
-                    st.info(f"Found {len(questions)} questions to process")
                     # Step 2: Use pre-built FAISS index
-                    st.info("🔍 Setting up document search...")
                     if not document_processor.vector_store:
                         raise ValueError("No pre-built FAISS index loaded. Please ensure data room is processed first.")
                     vector_store = document_processor.vector_store
                     # Step 3: Process questions with batch processing
-                    st.info("🤖 **AI Agent Processing:** Running batch analysis with ReAct reasoning...")
-                    st.info("🧠 **Agent Status:** Using concurrent processing for faster results...")
                     question_answers = search_and_analyze(
                         questions,

                     from pathlib import Path
                     # Step 1: Load pre-parsed questions (no LLM needed)
                     # Extract filename from questions path
                     if hasattr(self.session, 'questions_path') and self.session.questions_path:
                     questions = load_prebuilt_questions(questions_filename)
                     self.session.questions = questions
                     # Step 2: Use pre-built FAISS index
                     if not document_processor.vector_store:
                         raise ValueError("No pre-built FAISS index loaded. Please ensure data room is processed first.")
                     vector_store = document_processor.vector_store
                     # Step 3: Process questions with batch processing
                     question_answers = search_and_analyze(
                         questions,

app/ui/ui_components.py CHANGED Viewed

@@ -34,36 +34,21 @@ def _resolve_document_path(doc_path: str) -> Optional[Path]:
     if path_obj.is_absolute():
         return path_obj if path_obj.exists() else None
-    # For relative paths, try to resolve against the data room path
-    data_room_path = getattr(st.session_state, 'data_room_path', None)
-    if data_room_path:
-        resolved_path = Path(data_room_path) / path_obj
-        if resolved_path.exists():
-            return resolved_path
-    # Fallback: try relative to data directory
-    data_dir = Path('data')
-    fallback_path = data_dir / path_obj
-    if fallback_path.exists():
-        return fallback_path
-    # Enhanced search: Look in the currently selected data room only
-    # This handles cases where files like "company-profile.pdf" are stored with just filename
-    # but should only be resolved within the current data room context
-    # Try using the data room path from session state
-    current_data_room = getattr(st.session_state, 'data_room_path', None)
-    if current_data_room and Path(current_data_room).exists():
-        potential_path = Path(current_data_room) / path_obj
-        if potential_path.exists():
-            return potential_path
-    # Also check for selected_data_room_path as fallback
-    selected_data_room = getattr(st.session_state, 'selected_data_room_path', None)
-    if selected_data_room and Path(selected_data_room).exists():
-        potential_path = Path(selected_data_room) / path_obj
-        if potential_path.exists():
-            return potential_path
     # Last resort: check if original path exists as-is
     if path_obj.exists():
@@ -284,16 +269,6 @@ def status_message(message: str, message_type: str = "info"):
         st.info(message)
-def progress_indicator():
-    """
-    Create a progress indicator placeholder.
-    Returns:
-        A context manager for progress indication
-    """
-    return st.empty()
 def compact_status_display(status_items: list, title: str = "Status"):
     """
     Display a compact list of status items with minimal vertical spacing.
@@ -344,16 +319,6 @@ def compact_status_display(status_items: list, title: str = "Status"):
             """, unsafe_allow_html=True)
-def progress_status_tracker():
-    """
-    Create a progress status tracker that can be updated dynamically.
-    Returns:
-        A class instance that can track and update progress
-    """
-    return ProgressTracker()
 class ProgressTracker:
     """A class to track and display progress with real indicators"""
@@ -367,7 +332,7 @@ class ProgressTracker:
     def initialize(self, steps: list, title: str = "Progress"):
         """Initialize progress tracker with steps"""
         self.status_items = [
-            {'message': step, 'status': 'pending', 'icon': '⏳'}
             for step in steps
         ]
         self.total_steps = len(steps)
@@ -378,7 +343,6 @@ class ProgressTracker:
         """Mark a step as in progress"""
         if step_index < len(self.status_items):
             self.status_items[step_index]['status'] = 'in_progress'
-            self.status_items[step_index]['icon'] = '🔄'
             if message:
                 self.status_items[step_index]['message'] = message
             self.current_step = step_index
@@ -388,7 +352,6 @@ class ProgressTracker:
         """Mark a step as completed"""
         if step_index < len(self.status_items):
             self.status_items[step_index]['status'] = 'completed'
-            self.status_items[step_index]['icon'] = '✅'
             if message:
                 self.status_items[step_index]['message'] = message
             self._render()
@@ -397,11 +360,11 @@ class ProgressTracker:
         """Mark a step as error"""
         if step_index < len(self.status_items):
             self.status_items[step_index]['status'] = 'error'
-            self.status_items[step_index]['icon'] = '❌'
             if message:
                 self.status_items[step_index]['message'] = message
             self._render()
     def _render(self, title: str = "Progress"):
         """Internal method to render current progress"""
         with self.container.container():
@@ -515,59 +478,41 @@ def render_file_selector(directory: str, file_type: str, key_suffix: str, icon:
 # ERROR HANDLING COMPONENTS - Standardized error message patterns
 # =============================================================================
-def display_generation_error(operation_type: str, error: Exception = None):
     """
-    Display a standardized error message for generation failures.
     Args:
-        operation_type: Type of operation that failed (e.g., "question analysis", "checklist analysis")
         error: The exception that occurred (optional)
     """
     if error:
-        st.error(f"❌ Failed to generate {operation_type}: {str(error)}")
     else:
-        st.error(f"❌ Failed to generate {operation_type}")
-def display_processing_error(operation_type: str, error: Exception = None):
-    """
-    Display a standardized error message for processing failures.
-    Args:
-        operation_type: Type of operation that failed (e.g., "question", "data room")
-        error: The exception that occurred (optional)
-    """
-    if error:
-        st.error(f"❌ Failed to process {operation_type}: {str(error)}")
-    else:
-        st.error(f"❌ Failed to process {operation_type}")
-def display_initialization_error(component_type: str, error: Exception = None):
-    """
-    Display a standardized error message for initialization failures.
-    Args:
-        component_type: Type of component that failed to initialize (e.g., "document processor")
-        error: The exception that occurred (optional)
-    """
-    if error:
-        st.error(f"❌ Failed to initialize {component_type}: {str(error)}")
-    else:
-        st.error(f"❌ Failed to initialize {component_type}")
 def display_download_error(error: Exception = None):
-    """
-    Display a standardized error message for download failures.
-    Args:
-        error: The exception that occurred (optional)
-    """
-    if error:
-        st.error(f"❌ Download failed: {str(error)}")
-    else:
-        st.error("❌ Download failed")
 # =============================================================================
@@ -629,21 +574,9 @@ def render_checklist_results(results: dict, relevancy_threshold: float):
                             col1, col2 = st.columns([4, 1])
                             with col1:
-                                resolved_path = _resolve_document_path(doc_path)
-                                if resolved_path and resolved_path.exists():
-                                    try:
-                                        with open(resolved_path, 'rb') as f:
-                                            st.download_button(
-                                                f"📄 {doc_name}",
-                                                data=f.read(),
-                                                file_name=resolved_path.name,
-                                                mime="application/octet-stream",
-                                                key=f"download_{hash(doc_path) % 10000}_{item_idx}"
-                                            )
-                                    except Exception:
-                                        st.write(f"📄 {doc_name} (unavailable)")
-                                else:
-                                    st.write(f"📄 {doc_name} (unavailable)")
                             with col2:
                                 st.caption(f"{score:.3f}")
                     else:
@@ -667,7 +600,7 @@ def render_question_results(answers: dict):
         answer = answer_data.get('answer', 'No answer available')
         sources = answer_data.get('sources', [])
-        with st.expander(f"**{question}**", expanded=True):
             if answer:
                 st.markdown(f"**Answer:** {answer}")
@@ -680,55 +613,64 @@ def render_question_results(answers: dict):
                     col1, col2 = st.columns([4, 1])
                     with col1:
-                        resolved_path = _resolve_document_path(doc_path)
-                        if resolved_path and resolved_path.exists():
-                            try:
-                                with open(resolved_path, 'rb') as f:
-                                    st.download_button(
-                                        f"📄 {doc_name}",
-                                        data=f.read(),
-                                        file_name=resolved_path.name,
-                                        mime="application/octet-stream",
-                                        key=f"q_download_{hash(doc_path) % 10000}_{i}"
-                                    )
-                            except Exception:
-                                st.write(f"📄 {doc_name} (unavailable)")
-                        else:
-                            st.write(f"📄 {doc_name} (unavailable)")
                     with col2:
                         st.caption(f"{score:.3f}")
-def create_document_link(doc_path: str, doc_name: str, doc_title: str, unique_key: str):
     """
-    Create a download link for a document.
     Args:
         doc_path: Path to the document file (can be relative or absolute)
         doc_name: Display name for the document
-        doc_title: Title for the document
         unique_key: Unique key for the download button
     """
-    import streamlit as st
-    from pathlib import Path
-    # Resolve the path - handle both relative and absolute paths
     resolved_path = _resolve_document_path(doc_path)
     if resolved_path and resolved_path.exists():
         try:
             with open(resolved_path, 'rb') as f:
                 st.download_button(
-                    f"📄 {doc_title}",
                     data=f.read(),
                     file_name=resolved_path.name,
-                    mime="application/octet-stream",
-                    key=f"link_{unique_key}"
                 )
-        except Exception as e:
-            st.error(f"Error reading document: {doc_name}")
     else:
-        st.write(f"📄 {doc_title} (unavailable)")
 def render_content_with_clickable_citations(content: str, citations: List[Dict[str, Any]]):
@@ -771,30 +713,11 @@ def render_content_with_clickable_citations(content: str, citations: List[Dict[s
                 cols = st.columns(len(mentioned_docs))
                 for i, (doc_name, doc_path) in enumerate(mentioned_docs):
                     with cols[i]:
-                        _render_simple_download_button(doc_name, doc_path, f"para_{para_idx}_{i}")
         else:
             st.markdown("")
-def _render_simple_download_button(doc_name: str, doc_path: str, unique_key: str):
-    """Simple inline download button"""
-    resolved_path = _resolve_document_path(doc_path)
-    if resolved_path and resolved_path.exists():
-        try:
-            with open(resolved_path, 'rb') as f:
-                st.download_button(
-                    label=f"📄 {doc_name}",
-                    data=f.read(),
-                    file_name=resolved_path.name,
-                    mime="application/pdf" if doc_path.lower().endswith('.pdf') else "application/octet-stream",
-                    key=f"simple_download_{unique_key}",
-                    help=f"Download: {doc_name}"
-                )
-        except Exception:
-            st.caption(f"📄 {doc_name} (unavailable)")
 # =============================================================================
 # GENERATE/REGENERATE BUTTON COMPONENTS - Common 2-column button layout
 # =============================================================================

     if path_obj.is_absolute():
         return path_obj if path_obj.exists() else None
+    # For relative paths, try different resolution strategies
+    resolution_paths = [
+        # Try current data room path
+        getattr(st.session_state, 'data_room_path', None),
+        # Try selected data room path as fallback
+        getattr(st.session_state, 'selected_data_room_path', None),
+        # Try relative to data directory
+        str(Path('data'))
+    ]
+    for base_path in resolution_paths:
+        if base_path and Path(base_path).exists():
+            potential_path = Path(base_path) / path_obj
+            if potential_path.exists():
+                return potential_path
     # Last resort: check if original path exists as-is
     if path_obj.exists():
         st.info(message)
 def compact_status_display(status_items: list, title: str = "Status"):
     """
     Display a compact list of status items with minimal vertical spacing.
             """, unsafe_allow_html=True)
 class ProgressTracker:
     """A class to track and display progress with real indicators"""
     def initialize(self, steps: list, title: str = "Progress"):
         """Initialize progress tracker with steps"""
         self.status_items = [
+            {'message': step, 'status': 'pending'}
             for step in steps
         ]
         self.total_steps = len(steps)
         """Mark a step as in progress"""
         if step_index < len(self.status_items):
             self.status_items[step_index]['status'] = 'in_progress'
             if message:
                 self.status_items[step_index]['message'] = message
             self.current_step = step_index
         """Mark a step as completed"""
         if step_index < len(self.status_items):
             self.status_items[step_index]['status'] = 'completed'
             if message:
                 self.status_items[step_index]['message'] = message
             self._render()
         """Mark a step as error"""
         if step_index < len(self.status_items):
             self.status_items[step_index]['status'] = 'error'
             if message:
                 self.status_items[step_index]['message'] = message
             self._render()
     def _render(self, title: str = "Progress"):
         """Internal method to render current progress"""
         with self.container.container():
 # ERROR HANDLING COMPONENTS - Standardized error message patterns
 # =============================================================================
+def display_error(operation_type: str, action: str = "process", error: Exception = None, icon: str = "❌"):
     """
+    Display a standardized error message for various operation failures.
     Args:
+        operation_type: Type of operation that failed (e.g., "question analysis", "data room", "document processor")
+        action: Action that failed (e.g., "generate", "process", "initialize", "download")
         error: The exception that occurred (optional)
+        icon: Icon to display with error message (default: "❌")
     """
     if error:
+        st.error(f"{icon} Failed to {action} {operation_type}: {str(error)}")
     else:
+        st.error(f"{icon} Failed to {action} {operation_type}")
+# Convenience functions for backward compatibility
+def display_generation_error(operation_type: str, error: Exception = None):
+    """DEPRECATED: Use display_error with action='generate' instead."""
+    display_error(operation_type, "generate", error)
+def display_processing_error(operation_type: str, error: Exception = None):
+    """DEPRECATED: Use display_error with action='process' instead."""
+    display_error(operation_type, "process", error)
+def display_initialization_error(component_type: str, error: Exception = None):
+    """DEPRECATED: Use display_error with action='initialize' instead."""
+    display_error(component_type, "initialize", error)
 def display_download_error(error: Exception = None):
+    """DEPRECATED: Use display_error with action='download' instead."""
+    display_error("download", "complete", error)
 # =============================================================================
                             col1, col2 = st.columns([4, 1])
                             with col1:
+                                render_document_download_button(
+                                    doc_path, doc_name, f"checklist_{hash(doc_path) % 10000}_{item_idx}"
+                                )
                             with col2:
                                 st.caption(f"{score:.3f}")
                     else:
         answer = answer_data.get('answer', 'No answer available')
         sources = answer_data.get('sources', [])
+        with st.expander(f"**{question}**", expanded=False):
             if answer:
                 st.markdown(f"**Answer:** {answer}")
                     col1, col2 = st.columns([4, 1])
                     with col1:
+                        render_document_download_button(
+                            doc_path, doc_name, f"question_{hash(doc_path) % 10000}_{i}"
+                        )
                     with col2:
                         st.caption(f"{score:.3f}")
+def render_document_download_button(doc_path: str, doc_name: str, unique_key: str,
+                                   label_prefix: str = "📄", show_unavailable: bool = True,
+                                   button_help: str = None) -> bool:
     """
+    Consolidated function to render a document download button.
     Args:
         doc_path: Path to the document file (can be relative or absolute)
         doc_name: Display name for the document
         unique_key: Unique key for the download button
+        label_prefix: Prefix for the button label (default: "📄")
+        show_unavailable: Whether to show unavailable documents as text/caption
+        button_help: Optional help text for the button
+    Returns:
+        True if document was available and button was rendered, False otherwise
     """
     resolved_path = _resolve_document_path(doc_path)
     if resolved_path and resolved_path.exists():
         try:
             with open(resolved_path, 'rb') as f:
+                # Determine MIME type based on file extension
+                mime_type = "application/pdf" if doc_path.lower().endswith('.pdf') else "application/octet-stream"
                 st.download_button(
+                    label=f"{label_prefix} {doc_name}",
                     data=f.read(),
                     file_name=resolved_path.name,
+                    mime=mime_type,
+                    key=f"download_{unique_key}",
+                    help=button_help or f"Download: {doc_name}"
                 )
+                return True
+        except Exception:
+            if show_unavailable:
+                st.caption(f"{label_prefix} {doc_name} (unavailable)")
+            return False
     else:
+        if show_unavailable:
+            st.write(f"{label_prefix} {doc_name} (unavailable)")
+        return False
+def create_document_link(doc_path: str, doc_name: str, doc_title: str, unique_key: str):
+    """
+    Create a download link for a document.
+    DEPRECATED: Use render_document_download_button instead.
+    """
+    render_document_download_button(doc_path, doc_title, unique_key, "📄", True)
 def render_content_with_clickable_citations(content: str, citations: List[Dict[str, Any]]):
                 cols = st.columns(len(mentioned_docs))
                 for i, (doc_name, doc_path) in enumerate(mentioned_docs):
                     with cols[i]:
+                        render_document_download_button(doc_path, doc_name, f"para_{para_idx}_{i}")
         else:
             st.markdown("")
 # =============================================================================
 # GENERATE/REGENERATE BUTTON COMPONENTS - Common 2-column button layout
 # =============================================================================