Juan Salas commited on
Commit
1ececa6
·
1 Parent(s): 52ef528

Fixed agent citations and progress tracking

Browse files
app/ai/citation_manager.py CHANGED
@@ -46,12 +46,8 @@ class CitationManager:
46
  def format_report_with_citations(self, report_text: str, tool_citations: Dict[str, List[Dict[str, Any]]]) -> Tuple[str, List[Dict[str, Any]]]:
47
  """Format report text with inline download links instead of numbered citations"""
48
 
49
- # DEBUG: Log input
50
- logger.info(f"FORMAT_REPORT_WITH_CITATIONS input: tool_citations keys={list(tool_citations.keys())}, total_citations={sum(len(citations) for citations in tool_citations.values())}")
51
-
52
  # Process citations from tools
53
  for tool_name, citations_list in tool_citations.items():
54
- logger.info(f"Processing {len(citations_list)} citations from tool {tool_name}")
55
  for citation in citations_list:
56
  self.add_citation(citation)
57
 
@@ -71,21 +67,42 @@ class CitationManager:
71
  # We'll use a simple format that can be processed by Streamlit
72
  inline_link = self._create_inline_download_link(clean_doc_name, doc_path, doc_id)
73
 
74
- # Map both full and clean names to the same inline link
75
  doc_replacements[doc_name] = inline_link
76
  doc_replacements[clean_doc_name] = inline_link
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
  # Sort by longest document name first to avoid partial matches
79
  sorted_docs = sorted(doc_replacements.keys(), key=len, reverse=True)
80
 
 
81
  for doc_name in sorted_docs:
82
  inline_link = doc_replacements[doc_name]
83
 
84
  # Simple string replacement for {Document Name} format
85
  citation_marker = f"{{{doc_name}}}"
86
 
87
- # Replace all instances of the citation marker with inline link
 
88
  formatted_text = formatted_text.replace(citation_marker, inline_link)
 
 
 
89
 
90
  # For compatibility, still return citation list (but it won't be used for bottom section)
91
  citation_list = []
@@ -101,9 +118,6 @@ class CitationManager:
101
 
102
  citation_list.sort(key=lambda x: x['id'])
103
 
104
- # DEBUG: Log output
105
- logger.info(f"FORMAT_REPORT_WITH_CITATIONS output: formatted_text with inline links, citation_list={len(citation_list)} items for compatibility")
106
-
107
  return formatted_text, citation_list
108
 
109
  def _create_inline_download_link(self, clean_name: str, doc_path: str, doc_id: str) -> str:
@@ -202,7 +216,6 @@ def extract_tool_citations(tools: List[Any]) -> Dict[str, List[Dict[str, Any]]]:
202
 
203
  if citations:
204
  all_citations[tool_name] = citations
205
-
206
  return all_citations
207
 
208
 
@@ -212,13 +225,8 @@ def create_comprehensive_report(agent_output: str, tools: List[Any], report_type
212
  # Extract citations from tools
213
  tool_citations = extract_tool_citations(tools)
214
 
215
- # Debug logging
216
- logger.info(f"Extracted tool citations: {len(tool_citations)} tools with citations")
217
  total_citations = sum(len(citations) for citations in tool_citations.values())
218
- for tool_name, citations_list in tool_citations.items():
219
- logger.info(f"Tool {tool_name}: {len(citations_list)} citations")
220
- for i, citation in enumerate(citations_list[:2]): # Log first 2 for debugging
221
- logger.info(f" Citation {i+1}: {citation.get('name', 'No name')} - {citation.get('excerpt', 'No excerpt')[:50]}...")
222
 
223
  # Create citation manager
224
  citation_manager = CitationManager()
@@ -243,7 +251,4 @@ def create_comprehensive_report(agent_output: str, tools: List[Any], report_type
243
  'total_count': total_citations
244
  }
245
 
246
- # DEBUG: Log exactly what we're returning
247
- logger.info(f"CREATE_COMPREHENSIVE_REPORT returning: final_report={final_report is not None} ({len(final_report) if final_report else 0} chars), citation_info={citation_info}")
248
-
249
  return final_report, citation_info
 
46
  def format_report_with_citations(self, report_text: str, tool_citations: Dict[str, List[Dict[str, Any]]]) -> Tuple[str, List[Dict[str, Any]]]:
47
  """Format report text with inline download links instead of numbered citations"""
48
 
 
 
 
49
  # Process citations from tools
50
  for tool_name, citations_list in tool_citations.items():
 
51
  for citation in citations_list:
52
  self.add_citation(citation)
53
 
 
67
  # We'll use a simple format that can be processed by Streamlit
68
  inline_link = self._create_inline_download_link(clean_doc_name, doc_path, doc_id)
69
 
70
+ # Map ALL possible variations to the same inline link for more flexible matching
71
  doc_replacements[doc_name] = inline_link
72
  doc_replacements[clean_doc_name] = inline_link
73
+
74
+ # Also map common variations
75
+ # Remove common prefixes/suffixes from file names
76
+ base_name = doc_name
77
+ if '.' in base_name:
78
+ base_name = base_name.split('.')[0] # Everything before first dot
79
+ if base_name != doc_name and base_name != clean_doc_name:
80
+ doc_replacements[base_name] = inline_link
81
+
82
+ # Also handle path-based names (just the filename part)
83
+ from pathlib import Path
84
+ if doc_path:
85
+ path_filename = Path(doc_path).name
86
+ path_clean = path_filename.replace('.pdf', '').replace('.docx', '').replace('.doc', '')
87
+ if path_clean not in doc_replacements:
88
+ doc_replacements[path_clean] = inline_link
89
 
90
  # Sort by longest document name first to avoid partial matches
91
  sorted_docs = sorted(doc_replacements.keys(), key=len, reverse=True)
92
 
93
+ replacements_made = 0
94
  for doc_name in sorted_docs:
95
  inline_link = doc_replacements[doc_name]
96
 
97
  # Simple string replacement for {Document Name} format
98
  citation_marker = f"{{{doc_name}}}"
99
 
100
+ # Count and replace
101
+ before_count = formatted_text.count(citation_marker)
102
  formatted_text = formatted_text.replace(citation_marker, inline_link)
103
+ actual_replacements = before_count - formatted_text.count(citation_marker)
104
+ replacements_made += actual_replacements
105
+
106
 
107
  # For compatibility, still return citation list (but it won't be used for bottom section)
108
  citation_list = []
 
118
 
119
  citation_list.sort(key=lambda x: x['id'])
120
 
 
 
 
121
  return formatted_text, citation_list
122
 
123
  def _create_inline_download_link(self, clean_name: str, doc_path: str, doc_id: str) -> str:
 
216
 
217
  if citations:
218
  all_citations[tool_name] = citations
 
219
  return all_citations
220
 
221
 
 
225
  # Extract citations from tools
226
  tool_citations = extract_tool_citations(tools)
227
 
228
+ # Calculate total citations
 
229
  total_citations = sum(len(citations) for citations in tool_citations.values())
 
 
 
 
230
 
231
  # Create citation manager
232
  citation_manager = CitationManager()
 
251
  'total_count': total_citations
252
  }
253
 
 
 
 
254
  return final_report, citation_info
app/handlers/ai_handler.py CHANGED
@@ -483,13 +483,9 @@ IMPORTANT: You must provide a FINAL ANALYSIS REPORT in proper format, not just t
483
 
484
  Your final response should be a complete, well-structured report following the format specified in your instructions."""
485
 
486
- # Run the comprehensive ReAct agent with progress tracking
487
  logger.info(f"Starting ReAct AI Agent for comprehensive due diligence analysis...")
488
 
489
- # Add progress indicator for user
490
- progress_placeholder = st.empty()
491
- progress_placeholder.info("🧠 **AI Agent Starting:** Initializing comprehensive analysis tools...")
492
-
493
  # Configure recursion limit and other settings
494
  config = {
495
  "recursion_limit": 25, # Allow enough steps for 8-10 tool calls + comprehensive synthesis
@@ -498,66 +494,14 @@ Your final response should be a complete, well-structured report following the f
498
  }
499
  }
500
 
501
- # Update progress
502
- progress_placeholder.info("🔍 **AI Agent Working:** Analyzing documents and gathering intelligence...")
503
-
504
  result = agent.invoke({
505
  "messages": [HumanMessage(content=analysis_request)]
506
  }, config=config)
507
 
508
- # Final progress update
509
- progress_placeholder.info("📊 **AI Agent Finalizing:** Synthesizing findings and generating report...")
510
-
511
- # Clear progress indicator
512
- progress_placeholder.empty()
513
-
514
- # Debug: Log the complete result structure
515
- logger.info(f"ReAct agent result type: {type(result)}")
516
- logger.info(f"ReAct agent result keys: {result.keys() if isinstance(result, dict) else 'Not a dict'}")
517
-
518
- # Extract the agent's final response with enhanced debugging
519
  agent_output = ""
520
  if result and "messages" in result:
521
- logger.info(f"Found {len(result['messages'])} messages in result")
522
-
523
- # Log all messages for debugging with more detail
524
- for i, message in enumerate(result["messages"]):
525
- msg_type = type(message).__name__
526
- has_content = hasattr(message, 'content')
527
-
528
- # Handle both string and list content types for debugging
529
- content_text = ""
530
- content_length = 0
531
-
532
- if has_content and message.content:
533
- if isinstance(message.content, list):
534
- # If content is a list, extract text parts for logging
535
- text_parts = []
536
- for item in message.content:
537
- if isinstance(item, dict) and 'text' in item:
538
- text_parts.append(item['text'])
539
- elif isinstance(item, str):
540
- text_parts.append(item)
541
- content_text = ' '.join(text_parts)
542
- else:
543
- content_text = str(message.content)
544
-
545
- content_length = len(content_text)
546
-
547
- logger.info(f"Message {i}: Type={msg_type}, Length={content_length}")
548
-
549
- if content_text:
550
- content_preview = content_text[:150]
551
- logger.info(f"Message {i} preview: {content_preview}...")
552
-
553
- # Check if this looks like a final report
554
- if (content_length > 500 and
555
- ('# Company Analysis' in content_text or '## Executive Summary' in content_text)):
556
- logger.info(f"Message {i} appears to be a FINAL REPORT")
557
- elif 'Analysis - ' in content_text[:50]:
558
- logger.info(f"Message {i} appears to be TOOL OUTPUT")
559
- elif content_text.startswith('I '):
560
- logger.info(f"Message {i} appears to be REASONING")
561
 
562
  # Get the final analysis report (not tool outputs)
563
  final_report = None
@@ -638,12 +582,8 @@ Your final response should be a complete, well-structured report following the f
638
  agent_output, tools, report_type
639
  )
640
 
641
- logger.info(f"ReAct agent analysis completed with agent output: {len(agent_output)} characters")
642
- logger.info(f"Formatted report length: {len(formatted_report)} characters")
643
- logger.info(f"Citation info: {citation_info}")
644
-
645
- # DEBUG: Log exactly what we're about to return
646
- logger.info(f"GENERATE_REACT_REPORT about to return: formatted_report={formatted_report is not None} ({len(formatted_report) if formatted_report else 0} chars), citation_info={citation_info}")
647
 
648
  return formatted_report, citation_info
649
 
 
483
 
484
  Your final response should be a complete, well-structured report following the format specified in your instructions."""
485
 
486
+ # Run the comprehensive ReAct agent
487
  logger.info(f"Starting ReAct AI Agent for comprehensive due diligence analysis...")
488
 
 
 
 
 
489
  # Configure recursion limit and other settings
490
  config = {
491
  "recursion_limit": 25, # Allow enough steps for 8-10 tool calls + comprehensive synthesis
 
494
  }
495
  }
496
 
 
 
 
497
  result = agent.invoke({
498
  "messages": [HumanMessage(content=analysis_request)]
499
  }, config=config)
500
 
501
+ # Extract the agent's final response
 
 
 
 
 
 
 
 
 
 
502
  agent_output = ""
503
  if result and "messages" in result:
504
+ logger.debug(f"Processing {len(result['messages'])} messages from ReAct agent")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
505
 
506
  # Get the final analysis report (not tool outputs)
507
  final_report = None
 
582
  agent_output, tools, report_type
583
  )
584
 
585
+ logger.info(f"ReAct agent analysis completed: {len(agent_output)} chars {len(formatted_report)} chars formatted")
586
+ logger.debug(f"Citation count: {citation_info.get('total_count', 0)} citations")
 
 
 
 
587
 
588
  return formatted_report, citation_info
589
 
app/ui/session_manager.py CHANGED
@@ -50,6 +50,7 @@ class SessionManager:
50
  overview_summary = SessionProperty("")
51
  strategic_summary = SessionProperty("")
52
  strategic_company_summary = SessionProperty("")
 
53
  # Note: Citations are now inline in the strategic_company_summary content
54
 
55
  # User selections
@@ -109,7 +110,7 @@ class SessionManager:
109
  """Reset analysis results and cached data for fresh analysis."""
110
  self.overview_summary = ""
111
  self.strategic_summary = ""
112
- # Note: strategic_company_summary and citations are preserved across document reprocessing
113
  # They are only cleared when explicitly generating new company analysis
114
  self.checklist_results = {}
115
  self.question_answers = {}
 
50
  overview_summary = SessionProperty("")
51
  strategic_summary = SessionProperty("")
52
  strategic_company_summary = SessionProperty("")
53
+ strategic_company_citations = SessionProperty([]) # CRITICAL FIX: Make citations persistent across reloads
54
  # Note: Citations are now inline in the strategic_company_summary content
55
 
56
  # User selections
 
110
  """Reset analysis results and cached data for fresh analysis."""
111
  self.overview_summary = ""
112
  self.strategic_summary = ""
113
+ # Note: strategic_company_summary and strategic_company_citations are preserved across document reprocessing
114
  # They are only cleared when explicitly generating new company analysis
115
  self.checklist_results = {}
116
  self.question_answers = {}
app/ui/tabs/company_analysis_tab.py CHANGED
@@ -10,7 +10,7 @@ import streamlit as st
10
  from typing import List, Dict, Any
11
 
12
  from app.ui.tabs.tab_base import TabBase
13
- from app.ui.ui_components import status_message, progress_status_tracker
14
  from app.core.logging import logger
15
 
16
 
@@ -51,27 +51,35 @@ class CompanyAnalysisTab(TabBase):
51
  self._set_processing_active(True)
52
 
53
  try:
54
- # STEP 1: Prepare comprehensive context by auto-running missing analyses
55
- with st.spinner("🔄 Preparing analysis context..."):
56
- self._prepare_comprehensive_context()
57
-
58
- # STEP 2: Generate comprehensive analysis with all available context
59
- analysis_progress = progress_status_tracker()
60
- analysis_steps = [
 
 
 
61
  "Initialize AI agent",
62
  "Analyze documents",
63
  "Generate report",
64
  "Validate citations"
65
  ]
66
- analysis_progress.initialize(analysis_steps, "🤖 AI Agent Analysis")
 
 
 
 
 
67
 
68
  # Use vdr_store for proper vector store access
69
  data_room_name = getattr(self.session, 'vdr_store', None) or self._get_data_room_name()
70
 
71
- analysis_progress.start_step(0, "🤖 Booting up AI ReAct Agent with advanced reasoning...")
72
- analysis_progress.complete_step(0, f"🎯 AI Agent ready - targeting {data_room_name}")
73
 
74
- analysis_progress.start_step(1, "🧠 AI Agent reading documents, extracting insights, reasoning about findings...")
75
  # Note: This step will run for the longest time, so we keep it in progress
76
 
77
  # Use comprehensive ReAct agent with full prepared context
@@ -84,27 +92,13 @@ class CompanyAnalysisTab(TabBase):
84
  project_info={'company_name': data_room_name, 'data_room_path': self.session.data_room_path}
85
  )
86
 
87
- analysis_progress.complete_step(1, "Document analysis completed")
88
- analysis_progress.start_step(2, "Generating report...")
89
- analysis_progress.complete_step(2, f"Report generated ({len(report_content) if report_content else 0} chars)")
90
-
91
- analysis_progress.start_step(3, "Validating citations...")
92
-
93
- # DEBUG: Log what was actually returned
94
- logger.info(f"RETURNED from generate_react_report: report_content={report_content is not None} ({len(report_content) if report_content else 0} chars), citation_info={citation_info}")
95
-
96
- # Validate that we have citations (they're now inline in the report)
97
- if not citation_info.get('has_citations', False):
98
- analysis_progress.error_step(3, "No citations found in analysis")
99
- logger.error("CRITICAL: No citations found in ReAct agent analysis")
100
- raise ValueError("Company analysis must include citations from source documents. No citations were found in the agent's analysis.")
101
-
102
- analysis_progress.complete_step(3, f"Citations validated: {len(citation_info.get('citations', []))} sources")
103
-
104
  # Store comprehensive analysis and citation info for rendering
105
  self.session.strategic_company_summary = report_content
106
- # Store citation info separately for download functionality
107
- setattr(self.session, 'strategic_company_citations', citation_info.get('citations', []))
 
 
 
108
  status_message("✅ Company analysis completed successfully!", "success")
109
  st.rerun()
110
 
@@ -115,24 +109,21 @@ class CompanyAnalysisTab(TabBase):
115
  # Always reset processing state
116
  self._set_processing_active(False)
117
 
118
- def _prepare_comprehensive_context(self):
119
  """Prepare comprehensive context by auto-running missing analyses and vectorizing results"""
120
 
121
- # Initialize progress tracker
122
- progress_tracker = progress_status_tracker()
123
-
124
- # Define all steps for better progress visualization
125
- steps = [
126
- "Verify data room processing",
127
- "Check vector store availability",
128
- "Validate session data",
129
- "Check strategy context",
130
- "Run checklist analysis",
131
- "Run Q&A analysis",
132
- "Vectorize analysis results"
133
- ]
134
-
135
- progress_tracker.initialize(steps, "🔄 Preparing Analysis Context")
136
 
137
  try:
138
  # STEP 1: Verify data room is processed
@@ -440,6 +431,7 @@ Key Finding: {answer[:200]}...
440
  logger.info(f"Rendering company analysis content: {len(content)} characters")
441
  logger.info(f"Available citations for download: {len(citations)}")
442
 
 
443
  # Import the simple clickable file rendering function
444
  from app.ui.ui_components import render_content_with_clickable_citations
445
 
 
10
  from typing import List, Dict, Any
11
 
12
  from app.ui.tabs.tab_base import TabBase
13
+ from app.ui.ui_components import status_message, ProgressTracker
14
  from app.core.logging import logger
15
 
16
 
 
51
  self._set_processing_active(True)
52
 
53
  try:
54
+ # Create single unified progress tracker for entire process
55
+ unified_progress = ProgressTracker()
56
+ all_steps = [
57
+ "Verify data room processing",
58
+ "Check vector store availability",
59
+ "Validate session data",
60
+ "Check strategy context",
61
+ "Run checklist analysis",
62
+ "Run Q&A analysis",
63
+ "Vectorize analysis results",
64
  "Initialize AI agent",
65
  "Analyze documents",
66
  "Generate report",
67
  "Validate citations"
68
  ]
69
+ unified_progress.initialize(all_steps, "🔄 Comprehensive Analysis")
70
+
71
+ # STEP 1: Prepare comprehensive context by auto-running missing analyses
72
+ self._prepare_comprehensive_context(unified_progress)
73
+
74
+ # STEP 2: Generate comprehensive analysis with all available context
75
 
76
  # Use vdr_store for proper vector store access
77
  data_room_name = getattr(self.session, 'vdr_store', None) or self._get_data_room_name()
78
 
79
+ unified_progress.start_step(7, "🤖 Booting up AI ReAct Agent with advanced reasoning...")
80
+ unified_progress.complete_step(7, f"🎯 AI Agent ready - targeting {data_room_name}")
81
 
82
+ unified_progress.start_step(8, "🧠 AI Agent reading documents, extracting insights, reasoning about findings...")
83
  # Note: This step will run for the longest time, so we keep it in progress
84
 
85
  # Use comprehensive ReAct agent with full prepared context
 
92
  project_info={'company_name': data_room_name, 'data_room_path': self.session.data_room_path}
93
  )
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  # Store comprehensive analysis and citation info for rendering
96
  self.session.strategic_company_summary = report_content
97
+
98
+ # Store citation info for download functionality
99
+ citations_to_store = citation_info.get('citations', []) if citation_info else []
100
+ self.session.strategic_company_citations = citations_to_store
101
+
102
  status_message("✅ Company analysis completed successfully!", "success")
103
  st.rerun()
104
 
 
109
  # Always reset processing state
110
  self._set_processing_active(False)
111
 
112
+ def _prepare_comprehensive_context(self, progress_tracker=None):
113
  """Prepare comprehensive context by auto-running missing analyses and vectorizing results"""
114
 
115
+ if progress_tracker is None:
116
+ # If no progress tracker provided, create a simple one for standalone usage
117
+ progress_tracker = ProgressTracker()
118
+ progress_tracker.initialize([
119
+ "Verify data room processing",
120
+ "Check vector store availability",
121
+ "Validate session data",
122
+ "Check strategy context",
123
+ "Run checklist analysis",
124
+ "Run Q&A analysis",
125
+ "Vectorize analysis results"
126
+ ], "🔄 Preparing Analysis Context")
 
 
 
127
 
128
  try:
129
  # STEP 1: Verify data room is processed
 
431
  logger.info(f"Rendering company analysis content: {len(content)} characters")
432
  logger.info(f"Available citations for download: {len(citations)}")
433
 
434
+
435
  # Import the simple clickable file rendering function
436
  from app.ui.ui_components import render_content_with_clickable_citations
437
 
app/ui/tabs/qa_tab.py CHANGED
@@ -145,17 +145,33 @@ class QATab:
145
 
146
  # Display source documents with download buttons in collapsed expanders
147
  for i, result in enumerate(results[:3], 1):
148
- doc_source = result.get('source', 'Unknown')
149
- citation = result.get('citation', '')
150
- doc_title = f"{i}. {doc_source} ({citation})" if citation else f"{i}. {doc_source}"
 
 
 
 
 
 
 
151
 
152
  # Use expander to show documents collapsed by default
153
  with st.expander(f"📄 {doc_title}", expanded=False):
154
  col1, col2 = st.columns([5, 1])
155
  with col1:
156
- text_content = result.get('text', '')
 
 
 
 
 
 
157
  excerpt = text_content[:500] + "..." if len(text_content) > 500 else text_content
158
- st.markdown(f"\"{excerpt}\"")
 
 
 
159
 
160
  with col2:
161
  # Only show one download button
 
145
 
146
  # Display source documents with download buttons in collapsed expanders
147
  for i, result in enumerate(results[:3], 1):
148
+ # Handle different result formats defensively
149
+ if not isinstance(result, dict):
150
+ # Result might be malformed - show debug info
151
+ st.error(f"Debug - Result {i} is not a dict: {type(result)} = {result}")
152
+ continue
153
+
154
+ doc_source = result.get('source', result.get('name', 'Unknown Document'))
155
+ score = result.get('score', 0.0)
156
+ citation = f"Score: {score:.3f}"
157
+ doc_title = f"{i}. {doc_source} ({citation})"
158
 
159
  # Use expander to show documents collapsed by default
160
  with st.expander(f"📄 {doc_title}", expanded=False):
161
  col1, col2 = st.columns([5, 1])
162
  with col1:
163
+ # Handle different result formats defensively
164
+ text_content = result.get('text', result.get('document', result.get('content', '')))
165
+ if not text_content and isinstance(result, dict):
166
+ # Debug: show the actual structure if text is missing
167
+ st.error(f"Debug - Result structure: {list(result.keys())}")
168
+ text_content = str(result.get('page_content', result))
169
+
170
  excerpt = text_content[:500] + "..." if len(text_content) > 500 else text_content
171
+ if excerpt:
172
+ st.markdown(f"\"{excerpt}\"")
173
+ else:
174
+ st.warning("No text content available for this document")
175
 
176
  with col2:
177
  # Only show one download button
app/ui/tabs/questions_tab.py CHANGED
@@ -99,7 +99,6 @@ class QuestionsTab:
99
  from pathlib import Path
100
 
101
  # Step 1: Load pre-parsed questions (no LLM needed)
102
- st.info("📋 Loading pre-parsed questions...")
103
 
104
  # Extract filename from questions path
105
  if hasattr(self.session, 'questions_path') and self.session.questions_path:
@@ -109,17 +108,13 @@ class QuestionsTab:
109
 
110
  questions = load_prebuilt_questions(questions_filename)
111
  self.session.questions = questions
112
- st.info(f"Found {len(questions)} questions to process")
113
 
114
  # Step 2: Use pre-built FAISS index
115
- st.info("🔍 Setting up document search...")
116
  if not document_processor.vector_store:
117
  raise ValueError("No pre-built FAISS index loaded. Please ensure data room is processed first.")
118
  vector_store = document_processor.vector_store
119
 
120
  # Step 3: Process questions with batch processing
121
- st.info("🤖 **AI Agent Processing:** Running batch analysis with ReAct reasoning...")
122
- st.info("🧠 **Agent Status:** Using concurrent processing for faster results...")
123
 
124
  question_answers = search_and_analyze(
125
  questions,
 
99
  from pathlib import Path
100
 
101
  # Step 1: Load pre-parsed questions (no LLM needed)
 
102
 
103
  # Extract filename from questions path
104
  if hasattr(self.session, 'questions_path') and self.session.questions_path:
 
108
 
109
  questions = load_prebuilt_questions(questions_filename)
110
  self.session.questions = questions
 
111
 
112
  # Step 2: Use pre-built FAISS index
 
113
  if not document_processor.vector_store:
114
  raise ValueError("No pre-built FAISS index loaded. Please ensure data room is processed first.")
115
  vector_store = document_processor.vector_store
116
 
117
  # Step 3: Process questions with batch processing
 
 
118
 
119
  question_answers = search_and_analyze(
120
  questions,
app/ui/ui_components.py CHANGED
@@ -34,36 +34,21 @@ def _resolve_document_path(doc_path: str) -> Optional[Path]:
34
  if path_obj.is_absolute():
35
  return path_obj if path_obj.exists() else None
36
 
37
- # For relative paths, try to resolve against the data room path
38
- data_room_path = getattr(st.session_state, 'data_room_path', None)
39
- if data_room_path:
40
- resolved_path = Path(data_room_path) / path_obj
41
- if resolved_path.exists():
42
- return resolved_path
43
-
44
- # Fallback: try relative to data directory
45
- data_dir = Path('data')
46
- fallback_path = data_dir / path_obj
47
- if fallback_path.exists():
48
- return fallback_path
49
-
50
- # Enhanced search: Look in the currently selected data room only
51
- # This handles cases where files like "company-profile.pdf" are stored with just filename
52
- # but should only be resolved within the current data room context
53
-
54
- # Try using the data room path from session state
55
- current_data_room = getattr(st.session_state, 'data_room_path', None)
56
- if current_data_room and Path(current_data_room).exists():
57
- potential_path = Path(current_data_room) / path_obj
58
- if potential_path.exists():
59
- return potential_path
60
 
61
- # Also check for selected_data_room_path as fallback
62
- selected_data_room = getattr(st.session_state, 'selected_data_room_path', None)
63
- if selected_data_room and Path(selected_data_room).exists():
64
- potential_path = Path(selected_data_room) / path_obj
65
- if potential_path.exists():
66
- return potential_path
67
 
68
  # Last resort: check if original path exists as-is
69
  if path_obj.exists():
@@ -284,16 +269,6 @@ def status_message(message: str, message_type: str = "info"):
284
  st.info(message)
285
 
286
 
287
- def progress_indicator():
288
- """
289
- Create a progress indicator placeholder.
290
-
291
- Returns:
292
- A context manager for progress indication
293
- """
294
- return st.empty()
295
-
296
-
297
  def compact_status_display(status_items: list, title: str = "Status"):
298
  """
299
  Display a compact list of status items with minimal vertical spacing.
@@ -344,16 +319,6 @@ def compact_status_display(status_items: list, title: str = "Status"):
344
  """, unsafe_allow_html=True)
345
 
346
 
347
- def progress_status_tracker():
348
- """
349
- Create a progress status tracker that can be updated dynamically.
350
-
351
- Returns:
352
- A class instance that can track and update progress
353
- """
354
- return ProgressTracker()
355
-
356
-
357
  class ProgressTracker:
358
  """A class to track and display progress with real indicators"""
359
 
@@ -367,7 +332,7 @@ class ProgressTracker:
367
  def initialize(self, steps: list, title: str = "Progress"):
368
  """Initialize progress tracker with steps"""
369
  self.status_items = [
370
- {'message': step, 'status': 'pending', 'icon': '⏳'}
371
  for step in steps
372
  ]
373
  self.total_steps = len(steps)
@@ -378,7 +343,6 @@ class ProgressTracker:
378
  """Mark a step as in progress"""
379
  if step_index < len(self.status_items):
380
  self.status_items[step_index]['status'] = 'in_progress'
381
- self.status_items[step_index]['icon'] = '🔄'
382
  if message:
383
  self.status_items[step_index]['message'] = message
384
  self.current_step = step_index
@@ -388,7 +352,6 @@ class ProgressTracker:
388
  """Mark a step as completed"""
389
  if step_index < len(self.status_items):
390
  self.status_items[step_index]['status'] = 'completed'
391
- self.status_items[step_index]['icon'] = '✅'
392
  if message:
393
  self.status_items[step_index]['message'] = message
394
  self._render()
@@ -397,11 +360,11 @@ class ProgressTracker:
397
  """Mark a step as error"""
398
  if step_index < len(self.status_items):
399
  self.status_items[step_index]['status'] = 'error'
400
- self.status_items[step_index]['icon'] = '❌'
401
  if message:
402
  self.status_items[step_index]['message'] = message
403
  self._render()
404
 
 
405
  def _render(self, title: str = "Progress"):
406
  """Internal method to render current progress"""
407
  with self.container.container():
@@ -515,59 +478,41 @@ def render_file_selector(directory: str, file_type: str, key_suffix: str, icon:
515
  # ERROR HANDLING COMPONENTS - Standardized error message patterns
516
  # =============================================================================
517
 
518
- def display_generation_error(operation_type: str, error: Exception = None):
519
  """
520
- Display a standardized error message for generation failures.
521
 
522
  Args:
523
- operation_type: Type of operation that failed (e.g., "question analysis", "checklist analysis")
 
524
  error: The exception that occurred (optional)
 
525
  """
526
  if error:
527
- st.error(f" Failed to generate {operation_type}: {str(error)}")
528
  else:
529
- st.error(f" Failed to generate {operation_type}")
530
 
531
 
532
- def display_processing_error(operation_type: str, error: Exception = None):
533
- """
534
- Display a standardized error message for processing failures.
 
535
 
536
- Args:
537
- operation_type: Type of operation that failed (e.g., "question", "data room")
538
- error: The exception that occurred (optional)
539
- """
540
- if error:
541
- st.error(f"❌ Failed to process {operation_type}: {str(error)}")
542
- else:
543
- st.error(f"❌ Failed to process {operation_type}")
544
 
 
 
 
545
 
546
- def display_initialization_error(component_type: str, error: Exception = None):
547
- """
548
- Display a standardized error message for initialization failures.
549
 
550
- Args:
551
- component_type: Type of component that failed to initialize (e.g., "document processor")
552
- error: The exception that occurred (optional)
553
- """
554
- if error:
555
- st.error(f"❌ Failed to initialize {component_type}: {str(error)}")
556
- else:
557
- st.error(f"❌ Failed to initialize {component_type}")
558
 
559
 
560
  def display_download_error(error: Exception = None):
561
- """
562
- Display a standardized error message for download failures.
563
-
564
- Args:
565
- error: The exception that occurred (optional)
566
- """
567
- if error:
568
- st.error(f"❌ Download failed: {str(error)}")
569
- else:
570
- st.error("❌ Download failed")
571
 
572
 
573
  # =============================================================================
@@ -629,21 +574,9 @@ def render_checklist_results(results: dict, relevancy_threshold: float):
629
 
630
  col1, col2 = st.columns([4, 1])
631
  with col1:
632
- resolved_path = _resolve_document_path(doc_path)
633
- if resolved_path and resolved_path.exists():
634
- try:
635
- with open(resolved_path, 'rb') as f:
636
- st.download_button(
637
- f"📄 {doc_name}",
638
- data=f.read(),
639
- file_name=resolved_path.name,
640
- mime="application/octet-stream",
641
- key=f"download_{hash(doc_path) % 10000}_{item_idx}"
642
- )
643
- except Exception:
644
- st.write(f"📄 {doc_name} (unavailable)")
645
- else:
646
- st.write(f"📄 {doc_name} (unavailable)")
647
  with col2:
648
  st.caption(f"{score:.3f}")
649
  else:
@@ -667,7 +600,7 @@ def render_question_results(answers: dict):
667
  answer = answer_data.get('answer', 'No answer available')
668
  sources = answer_data.get('sources', [])
669
 
670
- with st.expander(f"**{question}**", expanded=True):
671
  if answer:
672
  st.markdown(f"**Answer:** {answer}")
673
 
@@ -680,55 +613,64 @@ def render_question_results(answers: dict):
680
 
681
  col1, col2 = st.columns([4, 1])
682
  with col1:
683
- resolved_path = _resolve_document_path(doc_path)
684
- if resolved_path and resolved_path.exists():
685
- try:
686
- with open(resolved_path, 'rb') as f:
687
- st.download_button(
688
- f"📄 {doc_name}",
689
- data=f.read(),
690
- file_name=resolved_path.name,
691
- mime="application/octet-stream",
692
- key=f"q_download_{hash(doc_path) % 10000}_{i}"
693
- )
694
- except Exception:
695
- st.write(f"📄 {doc_name} (unavailable)")
696
- else:
697
- st.write(f"📄 {doc_name} (unavailable)")
698
  with col2:
699
  st.caption(f"{score:.3f}")
700
 
701
 
702
- def create_document_link(doc_path: str, doc_name: str, doc_title: str, unique_key: str):
 
 
703
  """
704
- Create a download link for a document.
705
 
706
  Args:
707
  doc_path: Path to the document file (can be relative or absolute)
708
  doc_name: Display name for the document
709
- doc_title: Title for the document
710
  unique_key: Unique key for the download button
 
 
 
 
 
 
711
  """
712
- import streamlit as st
713
- from pathlib import Path
714
-
715
- # Resolve the path - handle both relative and absolute paths
716
  resolved_path = _resolve_document_path(doc_path)
717
 
718
  if resolved_path and resolved_path.exists():
719
  try:
720
  with open(resolved_path, 'rb') as f:
 
 
 
721
  st.download_button(
722
- f"📄 {doc_title}",
723
  data=f.read(),
724
  file_name=resolved_path.name,
725
- mime="application/octet-stream",
726
- key=f"link_{unique_key}"
 
727
  )
728
- except Exception as e:
729
- st.error(f"Error reading document: {doc_name}")
 
 
 
730
  else:
731
- st.write(f"📄 {doc_title} (unavailable)")
 
 
 
 
 
 
 
 
 
 
 
732
 
733
 
734
  def render_content_with_clickable_citations(content: str, citations: List[Dict[str, Any]]):
@@ -771,30 +713,11 @@ def render_content_with_clickable_citations(content: str, citations: List[Dict[s
771
  cols = st.columns(len(mentioned_docs))
772
  for i, (doc_name, doc_path) in enumerate(mentioned_docs):
773
  with cols[i]:
774
- _render_simple_download_button(doc_name, doc_path, f"para_{para_idx}_{i}")
775
  else:
776
  st.markdown("")
777
 
778
 
779
- def _render_simple_download_button(doc_name: str, doc_path: str, unique_key: str):
780
- """Simple inline download button"""
781
- resolved_path = _resolve_document_path(doc_path)
782
-
783
- if resolved_path and resolved_path.exists():
784
- try:
785
- with open(resolved_path, 'rb') as f:
786
- st.download_button(
787
- label=f"📄 {doc_name}",
788
- data=f.read(),
789
- file_name=resolved_path.name,
790
- mime="application/pdf" if doc_path.lower().endswith('.pdf') else "application/octet-stream",
791
- key=f"simple_download_{unique_key}",
792
- help=f"Download: {doc_name}"
793
- )
794
- except Exception:
795
- st.caption(f"📄 {doc_name} (unavailable)")
796
-
797
-
798
  # =============================================================================
799
  # GENERATE/REGENERATE BUTTON COMPONENTS - Common 2-column button layout
800
  # =============================================================================
 
34
  if path_obj.is_absolute():
35
  return path_obj if path_obj.exists() else None
36
 
37
+ # For relative paths, try different resolution strategies
38
+ resolution_paths = [
39
+ # Try current data room path
40
+ getattr(st.session_state, 'data_room_path', None),
41
+ # Try selected data room path as fallback
42
+ getattr(st.session_state, 'selected_data_room_path', None),
43
+ # Try relative to data directory
44
+ str(Path('data'))
45
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
+ for base_path in resolution_paths:
48
+ if base_path and Path(base_path).exists():
49
+ potential_path = Path(base_path) / path_obj
50
+ if potential_path.exists():
51
+ return potential_path
 
52
 
53
  # Last resort: check if original path exists as-is
54
  if path_obj.exists():
 
269
  st.info(message)
270
 
271
 
 
 
 
 
 
 
 
 
 
 
272
  def compact_status_display(status_items: list, title: str = "Status"):
273
  """
274
  Display a compact list of status items with minimal vertical spacing.
 
319
  """, unsafe_allow_html=True)
320
 
321
 
 
 
 
 
 
 
 
 
 
 
322
  class ProgressTracker:
323
  """A class to track and display progress with real indicators"""
324
 
 
332
  def initialize(self, steps: list, title: str = "Progress"):
333
  """Initialize progress tracker with steps"""
334
  self.status_items = [
335
+ {'message': step, 'status': 'pending'}
336
  for step in steps
337
  ]
338
  self.total_steps = len(steps)
 
343
  """Mark a step as in progress"""
344
  if step_index < len(self.status_items):
345
  self.status_items[step_index]['status'] = 'in_progress'
 
346
  if message:
347
  self.status_items[step_index]['message'] = message
348
  self.current_step = step_index
 
352
  """Mark a step as completed"""
353
  if step_index < len(self.status_items):
354
  self.status_items[step_index]['status'] = 'completed'
 
355
  if message:
356
  self.status_items[step_index]['message'] = message
357
  self._render()
 
360
  """Mark a step as error"""
361
  if step_index < len(self.status_items):
362
  self.status_items[step_index]['status'] = 'error'
 
363
  if message:
364
  self.status_items[step_index]['message'] = message
365
  self._render()
366
 
367
+
368
  def _render(self, title: str = "Progress"):
369
  """Internal method to render current progress"""
370
  with self.container.container():
 
478
  # ERROR HANDLING COMPONENTS - Standardized error message patterns
479
  # =============================================================================
480
 
481
+ def display_error(operation_type: str, action: str = "process", error: Exception = None, icon: str = "❌"):
482
  """
483
+ Display a standardized error message for various operation failures.
484
 
485
  Args:
486
+ operation_type: Type of operation that failed (e.g., "question analysis", "data room", "document processor")
487
+ action: Action that failed (e.g., "generate", "process", "initialize", "download")
488
  error: The exception that occurred (optional)
489
+ icon: Icon to display with error message (default: "❌")
490
  """
491
  if error:
492
+ st.error(f"{icon} Failed to {action} {operation_type}: {str(error)}")
493
  else:
494
+ st.error(f"{icon} Failed to {action} {operation_type}")
495
 
496
 
497
+ # Convenience functions for backward compatibility
498
+ def display_generation_error(operation_type: str, error: Exception = None):
499
+ """DEPRECATED: Use display_error with action='generate' instead."""
500
+ display_error(operation_type, "generate", error)
501
 
 
 
 
 
 
 
 
 
502
 
503
+ def display_processing_error(operation_type: str, error: Exception = None):
504
+ """DEPRECATED: Use display_error with action='process' instead."""
505
+ display_error(operation_type, "process", error)
506
 
 
 
 
507
 
508
+ def display_initialization_error(component_type: str, error: Exception = None):
509
+ """DEPRECATED: Use display_error with action='initialize' instead."""
510
+ display_error(component_type, "initialize", error)
 
 
 
 
 
511
 
512
 
513
  def display_download_error(error: Exception = None):
514
+ """DEPRECATED: Use display_error with action='download' instead."""
515
+ display_error("download", "complete", error)
 
 
 
 
 
 
 
 
516
 
517
 
518
  # =============================================================================
 
574
 
575
  col1, col2 = st.columns([4, 1])
576
  with col1:
577
+ render_document_download_button(
578
+ doc_path, doc_name, f"checklist_{hash(doc_path) % 10000}_{item_idx}"
579
+ )
 
 
 
 
 
 
 
 
 
 
 
 
580
  with col2:
581
  st.caption(f"{score:.3f}")
582
  else:
 
600
  answer = answer_data.get('answer', 'No answer available')
601
  sources = answer_data.get('sources', [])
602
 
603
+ with st.expander(f"**{question}**", expanded=False):
604
  if answer:
605
  st.markdown(f"**Answer:** {answer}")
606
 
 
613
 
614
  col1, col2 = st.columns([4, 1])
615
  with col1:
616
+ render_document_download_button(
617
+ doc_path, doc_name, f"question_{hash(doc_path) % 10000}_{i}"
618
+ )
 
 
 
 
 
 
 
 
 
 
 
 
619
  with col2:
620
  st.caption(f"{score:.3f}")
621
 
622
 
623
+ def render_document_download_button(doc_path: str, doc_name: str, unique_key: str,
624
+ label_prefix: str = "📄", show_unavailable: bool = True,
625
+ button_help: str = None) -> bool:
626
  """
627
+ Consolidated function to render a document download button.
628
 
629
  Args:
630
  doc_path: Path to the document file (can be relative or absolute)
631
  doc_name: Display name for the document
 
632
  unique_key: Unique key for the download button
633
+ label_prefix: Prefix for the button label (default: "📄")
634
+ show_unavailable: Whether to show unavailable documents as text/caption
635
+ button_help: Optional help text for the button
636
+
637
+ Returns:
638
+ True if document was available and button was rendered, False otherwise
639
  """
 
 
 
 
640
  resolved_path = _resolve_document_path(doc_path)
641
 
642
  if resolved_path and resolved_path.exists():
643
  try:
644
  with open(resolved_path, 'rb') as f:
645
+ # Determine MIME type based on file extension
646
+ mime_type = "application/pdf" if doc_path.lower().endswith('.pdf') else "application/octet-stream"
647
+
648
  st.download_button(
649
+ label=f"{label_prefix} {doc_name}",
650
  data=f.read(),
651
  file_name=resolved_path.name,
652
+ mime=mime_type,
653
+ key=f"download_{unique_key}",
654
+ help=button_help or f"Download: {doc_name}"
655
  )
656
+ return True
657
+ except Exception:
658
+ if show_unavailable:
659
+ st.caption(f"{label_prefix} {doc_name} (unavailable)")
660
+ return False
661
  else:
662
+ if show_unavailable:
663
+ st.write(f"{label_prefix} {doc_name} (unavailable)")
664
+ return False
665
+
666
+
667
+ def create_document_link(doc_path: str, doc_name: str, doc_title: str, unique_key: str):
668
+ """
669
+ Create a download link for a document.
670
+
671
+ DEPRECATED: Use render_document_download_button instead.
672
+ """
673
+ render_document_download_button(doc_path, doc_title, unique_key, "📄", True)
674
 
675
 
676
  def render_content_with_clickable_citations(content: str, citations: List[Dict[str, Any]]):
 
713
  cols = st.columns(len(mentioned_docs))
714
  for i, (doc_name, doc_path) in enumerate(mentioned_docs):
715
  with cols[i]:
716
+ render_document_download_button(doc_path, doc_name, f"para_{para_idx}_{i}")
717
  else:
718
  st.markdown("")
719
 
720
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
721
  # =============================================================================
722
  # GENERATE/REGENERATE BUTTON COMPONENTS - Common 2-column button layout
723
  # =============================================================================