Juan Salas commited on
Commit ·
1ececa6
1
Parent(s): 52ef528
Fixed agent citations and progress tracking
Browse files- app/ai/citation_manager.py +24 -19
- app/handlers/ai_handler.py +5 -65
- app/ui/session_manager.py +2 -1
- app/ui/tabs/company_analysis_tab.py +39 -47
- app/ui/tabs/qa_tab.py +21 -5
- app/ui/tabs/questions_tab.py +0 -5
- app/ui/ui_components.py +77 -154
app/ai/citation_manager.py
CHANGED
|
@@ -46,12 +46,8 @@ class CitationManager:
|
|
| 46 |
def format_report_with_citations(self, report_text: str, tool_citations: Dict[str, List[Dict[str, Any]]]) -> Tuple[str, List[Dict[str, Any]]]:
|
| 47 |
"""Format report text with inline download links instead of numbered citations"""
|
| 48 |
|
| 49 |
-
# DEBUG: Log input
|
| 50 |
-
logger.info(f"FORMAT_REPORT_WITH_CITATIONS input: tool_citations keys={list(tool_citations.keys())}, total_citations={sum(len(citations) for citations in tool_citations.values())}")
|
| 51 |
-
|
| 52 |
# Process citations from tools
|
| 53 |
for tool_name, citations_list in tool_citations.items():
|
| 54 |
-
logger.info(f"Processing {len(citations_list)} citations from tool {tool_name}")
|
| 55 |
for citation in citations_list:
|
| 56 |
self.add_citation(citation)
|
| 57 |
|
|
@@ -71,21 +67,42 @@ class CitationManager:
|
|
| 71 |
# We'll use a simple format that can be processed by Streamlit
|
| 72 |
inline_link = self._create_inline_download_link(clean_doc_name, doc_path, doc_id)
|
| 73 |
|
| 74 |
-
# Map
|
| 75 |
doc_replacements[doc_name] = inline_link
|
| 76 |
doc_replacements[clean_doc_name] = inline_link
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
|
| 78 |
# Sort by longest document name first to avoid partial matches
|
| 79 |
sorted_docs = sorted(doc_replacements.keys(), key=len, reverse=True)
|
| 80 |
|
|
|
|
| 81 |
for doc_name in sorted_docs:
|
| 82 |
inline_link = doc_replacements[doc_name]
|
| 83 |
|
| 84 |
# Simple string replacement for {Document Name} format
|
| 85 |
citation_marker = f"{{{doc_name}}}"
|
| 86 |
|
| 87 |
-
#
|
|
|
|
| 88 |
formatted_text = formatted_text.replace(citation_marker, inline_link)
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
# For compatibility, still return citation list (but it won't be used for bottom section)
|
| 91 |
citation_list = []
|
|
@@ -101,9 +118,6 @@ class CitationManager:
|
|
| 101 |
|
| 102 |
citation_list.sort(key=lambda x: x['id'])
|
| 103 |
|
| 104 |
-
# DEBUG: Log output
|
| 105 |
-
logger.info(f"FORMAT_REPORT_WITH_CITATIONS output: formatted_text with inline links, citation_list={len(citation_list)} items for compatibility")
|
| 106 |
-
|
| 107 |
return formatted_text, citation_list
|
| 108 |
|
| 109 |
def _create_inline_download_link(self, clean_name: str, doc_path: str, doc_id: str) -> str:
|
|
@@ -202,7 +216,6 @@ def extract_tool_citations(tools: List[Any]) -> Dict[str, List[Dict[str, Any]]]:
|
|
| 202 |
|
| 203 |
if citations:
|
| 204 |
all_citations[tool_name] = citations
|
| 205 |
-
|
| 206 |
return all_citations
|
| 207 |
|
| 208 |
|
|
@@ -212,13 +225,8 @@ def create_comprehensive_report(agent_output: str, tools: List[Any], report_type
|
|
| 212 |
# Extract citations from tools
|
| 213 |
tool_citations = extract_tool_citations(tools)
|
| 214 |
|
| 215 |
-
#
|
| 216 |
-
logger.info(f"Extracted tool citations: {len(tool_citations)} tools with citations")
|
| 217 |
total_citations = sum(len(citations) for citations in tool_citations.values())
|
| 218 |
-
for tool_name, citations_list in tool_citations.items():
|
| 219 |
-
logger.info(f"Tool {tool_name}: {len(citations_list)} citations")
|
| 220 |
-
for i, citation in enumerate(citations_list[:2]): # Log first 2 for debugging
|
| 221 |
-
logger.info(f" Citation {i+1}: {citation.get('name', 'No name')} - {citation.get('excerpt', 'No excerpt')[:50]}...")
|
| 222 |
|
| 223 |
# Create citation manager
|
| 224 |
citation_manager = CitationManager()
|
|
@@ -243,7 +251,4 @@ def create_comprehensive_report(agent_output: str, tools: List[Any], report_type
|
|
| 243 |
'total_count': total_citations
|
| 244 |
}
|
| 245 |
|
| 246 |
-
# DEBUG: Log exactly what we're returning
|
| 247 |
-
logger.info(f"CREATE_COMPREHENSIVE_REPORT returning: final_report={final_report is not None} ({len(final_report) if final_report else 0} chars), citation_info={citation_info}")
|
| 248 |
-
|
| 249 |
return final_report, citation_info
|
|
|
|
| 46 |
def format_report_with_citations(self, report_text: str, tool_citations: Dict[str, List[Dict[str, Any]]]) -> Tuple[str, List[Dict[str, Any]]]:
|
| 47 |
"""Format report text with inline download links instead of numbered citations"""
|
| 48 |
|
|
|
|
|
|
|
|
|
|
| 49 |
# Process citations from tools
|
| 50 |
for tool_name, citations_list in tool_citations.items():
|
|
|
|
| 51 |
for citation in citations_list:
|
| 52 |
self.add_citation(citation)
|
| 53 |
|
|
|
|
| 67 |
# We'll use a simple format that can be processed by Streamlit
|
| 68 |
inline_link = self._create_inline_download_link(clean_doc_name, doc_path, doc_id)
|
| 69 |
|
| 70 |
+
# Map ALL possible variations to the same inline link for more flexible matching
|
| 71 |
doc_replacements[doc_name] = inline_link
|
| 72 |
doc_replacements[clean_doc_name] = inline_link
|
| 73 |
+
|
| 74 |
+
# Also map common variations
|
| 75 |
+
# Remove common prefixes/suffixes from file names
|
| 76 |
+
base_name = doc_name
|
| 77 |
+
if '.' in base_name:
|
| 78 |
+
base_name = base_name.split('.')[0] # Everything before first dot
|
| 79 |
+
if base_name != doc_name and base_name != clean_doc_name:
|
| 80 |
+
doc_replacements[base_name] = inline_link
|
| 81 |
+
|
| 82 |
+
# Also handle path-based names (just the filename part)
|
| 83 |
+
from pathlib import Path
|
| 84 |
+
if doc_path:
|
| 85 |
+
path_filename = Path(doc_path).name
|
| 86 |
+
path_clean = path_filename.replace('.pdf', '').replace('.docx', '').replace('.doc', '')
|
| 87 |
+
if path_clean not in doc_replacements:
|
| 88 |
+
doc_replacements[path_clean] = inline_link
|
| 89 |
|
| 90 |
# Sort by longest document name first to avoid partial matches
|
| 91 |
sorted_docs = sorted(doc_replacements.keys(), key=len, reverse=True)
|
| 92 |
|
| 93 |
+
replacements_made = 0
|
| 94 |
for doc_name in sorted_docs:
|
| 95 |
inline_link = doc_replacements[doc_name]
|
| 96 |
|
| 97 |
# Simple string replacement for {Document Name} format
|
| 98 |
citation_marker = f"{{{doc_name}}}"
|
| 99 |
|
| 100 |
+
# Count and replace
|
| 101 |
+
before_count = formatted_text.count(citation_marker)
|
| 102 |
formatted_text = formatted_text.replace(citation_marker, inline_link)
|
| 103 |
+
actual_replacements = before_count - formatted_text.count(citation_marker)
|
| 104 |
+
replacements_made += actual_replacements
|
| 105 |
+
|
| 106 |
|
| 107 |
# For compatibility, still return citation list (but it won't be used for bottom section)
|
| 108 |
citation_list = []
|
|
|
|
| 118 |
|
| 119 |
citation_list.sort(key=lambda x: x['id'])
|
| 120 |
|
|
|
|
|
|
|
|
|
|
| 121 |
return formatted_text, citation_list
|
| 122 |
|
| 123 |
def _create_inline_download_link(self, clean_name: str, doc_path: str, doc_id: str) -> str:
|
|
|
|
| 216 |
|
| 217 |
if citations:
|
| 218 |
all_citations[tool_name] = citations
|
|
|
|
| 219 |
return all_citations
|
| 220 |
|
| 221 |
|
|
|
|
| 225 |
# Extract citations from tools
|
| 226 |
tool_citations = extract_tool_citations(tools)
|
| 227 |
|
| 228 |
+
# Calculate total citations
|
|
|
|
| 229 |
total_citations = sum(len(citations) for citations in tool_citations.values())
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
|
| 231 |
# Create citation manager
|
| 232 |
citation_manager = CitationManager()
|
|
|
|
| 251 |
'total_count': total_citations
|
| 252 |
}
|
| 253 |
|
|
|
|
|
|
|
|
|
|
| 254 |
return final_report, citation_info
|
app/handlers/ai_handler.py
CHANGED
|
@@ -483,13 +483,9 @@ IMPORTANT: You must provide a FINAL ANALYSIS REPORT in proper format, not just t
|
|
| 483 |
|
| 484 |
Your final response should be a complete, well-structured report following the format specified in your instructions."""
|
| 485 |
|
| 486 |
-
# Run the comprehensive ReAct agent
|
| 487 |
logger.info(f"Starting ReAct AI Agent for comprehensive due diligence analysis...")
|
| 488 |
|
| 489 |
-
# Add progress indicator for user
|
| 490 |
-
progress_placeholder = st.empty()
|
| 491 |
-
progress_placeholder.info("🧠 **AI Agent Starting:** Initializing comprehensive analysis tools...")
|
| 492 |
-
|
| 493 |
# Configure recursion limit and other settings
|
| 494 |
config = {
|
| 495 |
"recursion_limit": 25, # Allow enough steps for 8-10 tool calls + comprehensive synthesis
|
|
@@ -498,66 +494,14 @@ Your final response should be a complete, well-structured report following the f
|
|
| 498 |
}
|
| 499 |
}
|
| 500 |
|
| 501 |
-
# Update progress
|
| 502 |
-
progress_placeholder.info("🔍 **AI Agent Working:** Analyzing documents and gathering intelligence...")
|
| 503 |
-
|
| 504 |
result = agent.invoke({
|
| 505 |
"messages": [HumanMessage(content=analysis_request)]
|
| 506 |
}, config=config)
|
| 507 |
|
| 508 |
-
#
|
| 509 |
-
progress_placeholder.info("📊 **AI Agent Finalizing:** Synthesizing findings and generating report...")
|
| 510 |
-
|
| 511 |
-
# Clear progress indicator
|
| 512 |
-
progress_placeholder.empty()
|
| 513 |
-
|
| 514 |
-
# Debug: Log the complete result structure
|
| 515 |
-
logger.info(f"ReAct agent result type: {type(result)}")
|
| 516 |
-
logger.info(f"ReAct agent result keys: {result.keys() if isinstance(result, dict) else 'Not a dict'}")
|
| 517 |
-
|
| 518 |
-
# Extract the agent's final response with enhanced debugging
|
| 519 |
agent_output = ""
|
| 520 |
if result and "messages" in result:
|
| 521 |
-
logger.
|
| 522 |
-
|
| 523 |
-
# Log all messages for debugging with more detail
|
| 524 |
-
for i, message in enumerate(result["messages"]):
|
| 525 |
-
msg_type = type(message).__name__
|
| 526 |
-
has_content = hasattr(message, 'content')
|
| 527 |
-
|
| 528 |
-
# Handle both string and list content types for debugging
|
| 529 |
-
content_text = ""
|
| 530 |
-
content_length = 0
|
| 531 |
-
|
| 532 |
-
if has_content and message.content:
|
| 533 |
-
if isinstance(message.content, list):
|
| 534 |
-
# If content is a list, extract text parts for logging
|
| 535 |
-
text_parts = []
|
| 536 |
-
for item in message.content:
|
| 537 |
-
if isinstance(item, dict) and 'text' in item:
|
| 538 |
-
text_parts.append(item['text'])
|
| 539 |
-
elif isinstance(item, str):
|
| 540 |
-
text_parts.append(item)
|
| 541 |
-
content_text = ' '.join(text_parts)
|
| 542 |
-
else:
|
| 543 |
-
content_text = str(message.content)
|
| 544 |
-
|
| 545 |
-
content_length = len(content_text)
|
| 546 |
-
|
| 547 |
-
logger.info(f"Message {i}: Type={msg_type}, Length={content_length}")
|
| 548 |
-
|
| 549 |
-
if content_text:
|
| 550 |
-
content_preview = content_text[:150]
|
| 551 |
-
logger.info(f"Message {i} preview: {content_preview}...")
|
| 552 |
-
|
| 553 |
-
# Check if this looks like a final report
|
| 554 |
-
if (content_length > 500 and
|
| 555 |
-
('# Company Analysis' in content_text or '## Executive Summary' in content_text)):
|
| 556 |
-
logger.info(f"Message {i} appears to be a FINAL REPORT")
|
| 557 |
-
elif 'Analysis - ' in content_text[:50]:
|
| 558 |
-
logger.info(f"Message {i} appears to be TOOL OUTPUT")
|
| 559 |
-
elif content_text.startswith('I '):
|
| 560 |
-
logger.info(f"Message {i} appears to be REASONING")
|
| 561 |
|
| 562 |
# Get the final analysis report (not tool outputs)
|
| 563 |
final_report = None
|
|
@@ -638,12 +582,8 @@ Your final response should be a complete, well-structured report following the f
|
|
| 638 |
agent_output, tools, report_type
|
| 639 |
)
|
| 640 |
|
| 641 |
-
logger.info(f"ReAct agent analysis completed
|
| 642 |
-
logger.
|
| 643 |
-
logger.info(f"Citation info: {citation_info}")
|
| 644 |
-
|
| 645 |
-
# DEBUG: Log exactly what we're about to return
|
| 646 |
-
logger.info(f"GENERATE_REACT_REPORT about to return: formatted_report={formatted_report is not None} ({len(formatted_report) if formatted_report else 0} chars), citation_info={citation_info}")
|
| 647 |
|
| 648 |
return formatted_report, citation_info
|
| 649 |
|
|
|
|
| 483 |
|
| 484 |
Your final response should be a complete, well-structured report following the format specified in your instructions."""
|
| 485 |
|
| 486 |
+
# Run the comprehensive ReAct agent
|
| 487 |
logger.info(f"Starting ReAct AI Agent for comprehensive due diligence analysis...")
|
| 488 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 489 |
# Configure recursion limit and other settings
|
| 490 |
config = {
|
| 491 |
"recursion_limit": 25, # Allow enough steps for 8-10 tool calls + comprehensive synthesis
|
|
|
|
| 494 |
}
|
| 495 |
}
|
| 496 |
|
|
|
|
|
|
|
|
|
|
| 497 |
result = agent.invoke({
|
| 498 |
"messages": [HumanMessage(content=analysis_request)]
|
| 499 |
}, config=config)
|
| 500 |
|
| 501 |
+
# Extract the agent's final response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 502 |
agent_output = ""
|
| 503 |
if result and "messages" in result:
|
| 504 |
+
logger.debug(f"Processing {len(result['messages'])} messages from ReAct agent")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 505 |
|
| 506 |
# Get the final analysis report (not tool outputs)
|
| 507 |
final_report = None
|
|
|
|
| 582 |
agent_output, tools, report_type
|
| 583 |
)
|
| 584 |
|
| 585 |
+
logger.info(f"ReAct agent analysis completed: {len(agent_output)} chars → {len(formatted_report)} chars formatted")
|
| 586 |
+
logger.debug(f"Citation count: {citation_info.get('total_count', 0)} citations")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 587 |
|
| 588 |
return formatted_report, citation_info
|
| 589 |
|
app/ui/session_manager.py
CHANGED
|
@@ -50,6 +50,7 @@ class SessionManager:
|
|
| 50 |
overview_summary = SessionProperty("")
|
| 51 |
strategic_summary = SessionProperty("")
|
| 52 |
strategic_company_summary = SessionProperty("")
|
|
|
|
| 53 |
# Note: Citations are now inline in the strategic_company_summary content
|
| 54 |
|
| 55 |
# User selections
|
|
@@ -109,7 +110,7 @@ class SessionManager:
|
|
| 109 |
"""Reset analysis results and cached data for fresh analysis."""
|
| 110 |
self.overview_summary = ""
|
| 111 |
self.strategic_summary = ""
|
| 112 |
-
# Note: strategic_company_summary and
|
| 113 |
# They are only cleared when explicitly generating new company analysis
|
| 114 |
self.checklist_results = {}
|
| 115 |
self.question_answers = {}
|
|
|
|
| 50 |
overview_summary = SessionProperty("")
|
| 51 |
strategic_summary = SessionProperty("")
|
| 52 |
strategic_company_summary = SessionProperty("")
|
| 53 |
+
strategic_company_citations = SessionProperty([]) # CRITICAL FIX: Make citations persistent across reloads
|
| 54 |
# Note: Citations are now inline in the strategic_company_summary content
|
| 55 |
|
| 56 |
# User selections
|
|
|
|
| 110 |
"""Reset analysis results and cached data for fresh analysis."""
|
| 111 |
self.overview_summary = ""
|
| 112 |
self.strategic_summary = ""
|
| 113 |
+
# Note: strategic_company_summary and strategic_company_citations are preserved across document reprocessing
|
| 114 |
# They are only cleared when explicitly generating new company analysis
|
| 115 |
self.checklist_results = {}
|
| 116 |
self.question_answers = {}
|
app/ui/tabs/company_analysis_tab.py
CHANGED
|
@@ -10,7 +10,7 @@ import streamlit as st
|
|
| 10 |
from typing import List, Dict, Any
|
| 11 |
|
| 12 |
from app.ui.tabs.tab_base import TabBase
|
| 13 |
-
from app.ui.ui_components import status_message,
|
| 14 |
from app.core.logging import logger
|
| 15 |
|
| 16 |
|
|
@@ -51,27 +51,35 @@ class CompanyAnalysisTab(TabBase):
|
|
| 51 |
self._set_processing_active(True)
|
| 52 |
|
| 53 |
try:
|
| 54 |
-
#
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
| 61 |
"Initialize AI agent",
|
| 62 |
"Analyze documents",
|
| 63 |
"Generate report",
|
| 64 |
"Validate citations"
|
| 65 |
]
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
# Use vdr_store for proper vector store access
|
| 69 |
data_room_name = getattr(self.session, 'vdr_store', None) or self._get_data_room_name()
|
| 70 |
|
| 71 |
-
|
| 72 |
-
|
| 73 |
|
| 74 |
-
|
| 75 |
# Note: This step will run for the longest time, so we keep it in progress
|
| 76 |
|
| 77 |
# Use comprehensive ReAct agent with full prepared context
|
|
@@ -84,27 +92,13 @@ class CompanyAnalysisTab(TabBase):
|
|
| 84 |
project_info={'company_name': data_room_name, 'data_room_path': self.session.data_room_path}
|
| 85 |
)
|
| 86 |
|
| 87 |
-
analysis_progress.complete_step(1, "Document analysis completed")
|
| 88 |
-
analysis_progress.start_step(2, "Generating report...")
|
| 89 |
-
analysis_progress.complete_step(2, f"Report generated ({len(report_content) if report_content else 0} chars)")
|
| 90 |
-
|
| 91 |
-
analysis_progress.start_step(3, "Validating citations...")
|
| 92 |
-
|
| 93 |
-
# DEBUG: Log what was actually returned
|
| 94 |
-
logger.info(f"RETURNED from generate_react_report: report_content={report_content is not None} ({len(report_content) if report_content else 0} chars), citation_info={citation_info}")
|
| 95 |
-
|
| 96 |
-
# Validate that we have citations (they're now inline in the report)
|
| 97 |
-
if not citation_info.get('has_citations', False):
|
| 98 |
-
analysis_progress.error_step(3, "No citations found in analysis")
|
| 99 |
-
logger.error("CRITICAL: No citations found in ReAct agent analysis")
|
| 100 |
-
raise ValueError("Company analysis must include citations from source documents. No citations were found in the agent's analysis.")
|
| 101 |
-
|
| 102 |
-
analysis_progress.complete_step(3, f"Citations validated: {len(citation_info.get('citations', []))} sources")
|
| 103 |
-
|
| 104 |
# Store comprehensive analysis and citation info for rendering
|
| 105 |
self.session.strategic_company_summary = report_content
|
| 106 |
-
|
| 107 |
-
|
|
|
|
|
|
|
|
|
|
| 108 |
status_message("✅ Company analysis completed successfully!", "success")
|
| 109 |
st.rerun()
|
| 110 |
|
|
@@ -115,24 +109,21 @@ class CompanyAnalysisTab(TabBase):
|
|
| 115 |
# Always reset processing state
|
| 116 |
self._set_processing_active(False)
|
| 117 |
|
| 118 |
-
def _prepare_comprehensive_context(self):
|
| 119 |
"""Prepare comprehensive context by auto-running missing analyses and vectorizing results"""
|
| 120 |
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
"
|
| 133 |
-
]
|
| 134 |
-
|
| 135 |
-
progress_tracker.initialize(steps, "🔄 Preparing Analysis Context")
|
| 136 |
|
| 137 |
try:
|
| 138 |
# STEP 1: Verify data room is processed
|
|
@@ -440,6 +431,7 @@ Key Finding: {answer[:200]}...
|
|
| 440 |
logger.info(f"Rendering company analysis content: {len(content)} characters")
|
| 441 |
logger.info(f"Available citations for download: {len(citations)}")
|
| 442 |
|
|
|
|
| 443 |
# Import the simple clickable file rendering function
|
| 444 |
from app.ui.ui_components import render_content_with_clickable_citations
|
| 445 |
|
|
|
|
| 10 |
from typing import List, Dict, Any
|
| 11 |
|
| 12 |
from app.ui.tabs.tab_base import TabBase
|
| 13 |
+
from app.ui.ui_components import status_message, ProgressTracker
|
| 14 |
from app.core.logging import logger
|
| 15 |
|
| 16 |
|
|
|
|
| 51 |
self._set_processing_active(True)
|
| 52 |
|
| 53 |
try:
|
| 54 |
+
# Create single unified progress tracker for entire process
|
| 55 |
+
unified_progress = ProgressTracker()
|
| 56 |
+
all_steps = [
|
| 57 |
+
"Verify data room processing",
|
| 58 |
+
"Check vector store availability",
|
| 59 |
+
"Validate session data",
|
| 60 |
+
"Check strategy context",
|
| 61 |
+
"Run checklist analysis",
|
| 62 |
+
"Run Q&A analysis",
|
| 63 |
+
"Vectorize analysis results",
|
| 64 |
"Initialize AI agent",
|
| 65 |
"Analyze documents",
|
| 66 |
"Generate report",
|
| 67 |
"Validate citations"
|
| 68 |
]
|
| 69 |
+
unified_progress.initialize(all_steps, "🔄 Comprehensive Analysis")
|
| 70 |
+
|
| 71 |
+
# STEP 1: Prepare comprehensive context by auto-running missing analyses
|
| 72 |
+
self._prepare_comprehensive_context(unified_progress)
|
| 73 |
+
|
| 74 |
+
# STEP 2: Generate comprehensive analysis with all available context
|
| 75 |
|
| 76 |
# Use vdr_store for proper vector store access
|
| 77 |
data_room_name = getattr(self.session, 'vdr_store', None) or self._get_data_room_name()
|
| 78 |
|
| 79 |
+
unified_progress.start_step(7, "🤖 Booting up AI ReAct Agent with advanced reasoning...")
|
| 80 |
+
unified_progress.complete_step(7, f"🎯 AI Agent ready - targeting {data_room_name}")
|
| 81 |
|
| 82 |
+
unified_progress.start_step(8, "🧠 AI Agent reading documents, extracting insights, reasoning about findings...")
|
| 83 |
# Note: This step will run for the longest time, so we keep it in progress
|
| 84 |
|
| 85 |
# Use comprehensive ReAct agent with full prepared context
|
|
|
|
| 92 |
project_info={'company_name': data_room_name, 'data_room_path': self.session.data_room_path}
|
| 93 |
)
|
| 94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
# Store comprehensive analysis and citation info for rendering
|
| 96 |
self.session.strategic_company_summary = report_content
|
| 97 |
+
|
| 98 |
+
# Store citation info for download functionality
|
| 99 |
+
citations_to_store = citation_info.get('citations', []) if citation_info else []
|
| 100 |
+
self.session.strategic_company_citations = citations_to_store
|
| 101 |
+
|
| 102 |
status_message("✅ Company analysis completed successfully!", "success")
|
| 103 |
st.rerun()
|
| 104 |
|
|
|
|
| 109 |
# Always reset processing state
|
| 110 |
self._set_processing_active(False)
|
| 111 |
|
| 112 |
+
def _prepare_comprehensive_context(self, progress_tracker=None):
|
| 113 |
"""Prepare comprehensive context by auto-running missing analyses and vectorizing results"""
|
| 114 |
|
| 115 |
+
if progress_tracker is None:
|
| 116 |
+
# If no progress tracker provided, create a simple one for standalone usage
|
| 117 |
+
progress_tracker = ProgressTracker()
|
| 118 |
+
progress_tracker.initialize([
|
| 119 |
+
"Verify data room processing",
|
| 120 |
+
"Check vector store availability",
|
| 121 |
+
"Validate session data",
|
| 122 |
+
"Check strategy context",
|
| 123 |
+
"Run checklist analysis",
|
| 124 |
+
"Run Q&A analysis",
|
| 125 |
+
"Vectorize analysis results"
|
| 126 |
+
], "🔄 Preparing Analysis Context")
|
|
|
|
|
|
|
|
|
|
| 127 |
|
| 128 |
try:
|
| 129 |
# STEP 1: Verify data room is processed
|
|
|
|
| 431 |
logger.info(f"Rendering company analysis content: {len(content)} characters")
|
| 432 |
logger.info(f"Available citations for download: {len(citations)}")
|
| 433 |
|
| 434 |
+
|
| 435 |
# Import the simple clickable file rendering function
|
| 436 |
from app.ui.ui_components import render_content_with_clickable_citations
|
| 437 |
|
app/ui/tabs/qa_tab.py
CHANGED
|
@@ -145,17 +145,33 @@ class QATab:
|
|
| 145 |
|
| 146 |
# Display source documents with download buttons in collapsed expanders
|
| 147 |
for i, result in enumerate(results[:3], 1):
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
|
| 152 |
# Use expander to show documents collapsed by default
|
| 153 |
with st.expander(f"📄 {doc_title}", expanded=False):
|
| 154 |
col1, col2 = st.columns([5, 1])
|
| 155 |
with col1:
|
| 156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
excerpt = text_content[:500] + "..." if len(text_content) > 500 else text_content
|
| 158 |
-
|
|
|
|
|
|
|
|
|
|
| 159 |
|
| 160 |
with col2:
|
| 161 |
# Only show one download button
|
|
|
|
| 145 |
|
| 146 |
# Display source documents with download buttons in collapsed expanders
|
| 147 |
for i, result in enumerate(results[:3], 1):
|
| 148 |
+
# Handle different result formats defensively
|
| 149 |
+
if not isinstance(result, dict):
|
| 150 |
+
# Result might be malformed - show debug info
|
| 151 |
+
st.error(f"Debug - Result {i} is not a dict: {type(result)} = {result}")
|
| 152 |
+
continue
|
| 153 |
+
|
| 154 |
+
doc_source = result.get('source', result.get('name', 'Unknown Document'))
|
| 155 |
+
score = result.get('score', 0.0)
|
| 156 |
+
citation = f"Score: {score:.3f}"
|
| 157 |
+
doc_title = f"{i}. {doc_source} ({citation})"
|
| 158 |
|
| 159 |
# Use expander to show documents collapsed by default
|
| 160 |
with st.expander(f"📄 {doc_title}", expanded=False):
|
| 161 |
col1, col2 = st.columns([5, 1])
|
| 162 |
with col1:
|
| 163 |
+
# Handle different result formats defensively
|
| 164 |
+
text_content = result.get('text', result.get('document', result.get('content', '')))
|
| 165 |
+
if not text_content and isinstance(result, dict):
|
| 166 |
+
# Debug: show the actual structure if text is missing
|
| 167 |
+
st.error(f"Debug - Result structure: {list(result.keys())}")
|
| 168 |
+
text_content = str(result.get('page_content', result))
|
| 169 |
+
|
| 170 |
excerpt = text_content[:500] + "..." if len(text_content) > 500 else text_content
|
| 171 |
+
if excerpt:
|
| 172 |
+
st.markdown(f"\"{excerpt}\"")
|
| 173 |
+
else:
|
| 174 |
+
st.warning("No text content available for this document")
|
| 175 |
|
| 176 |
with col2:
|
| 177 |
# Only show one download button
|
app/ui/tabs/questions_tab.py
CHANGED
|
@@ -99,7 +99,6 @@ class QuestionsTab:
|
|
| 99 |
from pathlib import Path
|
| 100 |
|
| 101 |
# Step 1: Load pre-parsed questions (no LLM needed)
|
| 102 |
-
st.info("📋 Loading pre-parsed questions...")
|
| 103 |
|
| 104 |
# Extract filename from questions path
|
| 105 |
if hasattr(self.session, 'questions_path') and self.session.questions_path:
|
|
@@ -109,17 +108,13 @@ class QuestionsTab:
|
|
| 109 |
|
| 110 |
questions = load_prebuilt_questions(questions_filename)
|
| 111 |
self.session.questions = questions
|
| 112 |
-
st.info(f"Found {len(questions)} questions to process")
|
| 113 |
|
| 114 |
# Step 2: Use pre-built FAISS index
|
| 115 |
-
st.info("🔍 Setting up document search...")
|
| 116 |
if not document_processor.vector_store:
|
| 117 |
raise ValueError("No pre-built FAISS index loaded. Please ensure data room is processed first.")
|
| 118 |
vector_store = document_processor.vector_store
|
| 119 |
|
| 120 |
# Step 3: Process questions with batch processing
|
| 121 |
-
st.info("🤖 **AI Agent Processing:** Running batch analysis with ReAct reasoning...")
|
| 122 |
-
st.info("🧠 **Agent Status:** Using concurrent processing for faster results...")
|
| 123 |
|
| 124 |
question_answers = search_and_analyze(
|
| 125 |
questions,
|
|
|
|
| 99 |
from pathlib import Path
|
| 100 |
|
| 101 |
# Step 1: Load pre-parsed questions (no LLM needed)
|
|
|
|
| 102 |
|
| 103 |
# Extract filename from questions path
|
| 104 |
if hasattr(self.session, 'questions_path') and self.session.questions_path:
|
|
|
|
| 108 |
|
| 109 |
questions = load_prebuilt_questions(questions_filename)
|
| 110 |
self.session.questions = questions
|
|
|
|
| 111 |
|
| 112 |
# Step 2: Use pre-built FAISS index
|
|
|
|
| 113 |
if not document_processor.vector_store:
|
| 114 |
raise ValueError("No pre-built FAISS index loaded. Please ensure data room is processed first.")
|
| 115 |
vector_store = document_processor.vector_store
|
| 116 |
|
| 117 |
# Step 3: Process questions with batch processing
|
|
|
|
|
|
|
| 118 |
|
| 119 |
question_answers = search_and_analyze(
|
| 120 |
questions,
|
app/ui/ui_components.py
CHANGED
|
@@ -34,36 +34,21 @@ def _resolve_document_path(doc_path: str) -> Optional[Path]:
|
|
| 34 |
if path_obj.is_absolute():
|
| 35 |
return path_obj if path_obj.exists() else None
|
| 36 |
|
| 37 |
-
# For relative paths, try
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
fallback_path = data_dir / path_obj
|
| 47 |
-
if fallback_path.exists():
|
| 48 |
-
return fallback_path
|
| 49 |
-
|
| 50 |
-
# Enhanced search: Look in the currently selected data room only
|
| 51 |
-
# This handles cases where files like "company-profile.pdf" are stored with just filename
|
| 52 |
-
# but should only be resolved within the current data room context
|
| 53 |
-
|
| 54 |
-
# Try using the data room path from session state
|
| 55 |
-
current_data_room = getattr(st.session_state, 'data_room_path', None)
|
| 56 |
-
if current_data_room and Path(current_data_room).exists():
|
| 57 |
-
potential_path = Path(current_data_room) / path_obj
|
| 58 |
-
if potential_path.exists():
|
| 59 |
-
return potential_path
|
| 60 |
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
return potential_path
|
| 67 |
|
| 68 |
# Last resort: check if original path exists as-is
|
| 69 |
if path_obj.exists():
|
|
@@ -284,16 +269,6 @@ def status_message(message: str, message_type: str = "info"):
|
|
| 284 |
st.info(message)
|
| 285 |
|
| 286 |
|
| 287 |
-
def progress_indicator():
|
| 288 |
-
"""
|
| 289 |
-
Create a progress indicator placeholder.
|
| 290 |
-
|
| 291 |
-
Returns:
|
| 292 |
-
A context manager for progress indication
|
| 293 |
-
"""
|
| 294 |
-
return st.empty()
|
| 295 |
-
|
| 296 |
-
|
| 297 |
def compact_status_display(status_items: list, title: str = "Status"):
|
| 298 |
"""
|
| 299 |
Display a compact list of status items with minimal vertical spacing.
|
|
@@ -344,16 +319,6 @@ def compact_status_display(status_items: list, title: str = "Status"):
|
|
| 344 |
""", unsafe_allow_html=True)
|
| 345 |
|
| 346 |
|
| 347 |
-
def progress_status_tracker():
|
| 348 |
-
"""
|
| 349 |
-
Create a progress status tracker that can be updated dynamically.
|
| 350 |
-
|
| 351 |
-
Returns:
|
| 352 |
-
A class instance that can track and update progress
|
| 353 |
-
"""
|
| 354 |
-
return ProgressTracker()
|
| 355 |
-
|
| 356 |
-
|
| 357 |
class ProgressTracker:
|
| 358 |
"""A class to track and display progress with real indicators"""
|
| 359 |
|
|
@@ -367,7 +332,7 @@ class ProgressTracker:
|
|
| 367 |
def initialize(self, steps: list, title: str = "Progress"):
|
| 368 |
"""Initialize progress tracker with steps"""
|
| 369 |
self.status_items = [
|
| 370 |
-
{'message': step, 'status': 'pending'
|
| 371 |
for step in steps
|
| 372 |
]
|
| 373 |
self.total_steps = len(steps)
|
|
@@ -378,7 +343,6 @@ class ProgressTracker:
|
|
| 378 |
"""Mark a step as in progress"""
|
| 379 |
if step_index < len(self.status_items):
|
| 380 |
self.status_items[step_index]['status'] = 'in_progress'
|
| 381 |
-
self.status_items[step_index]['icon'] = '🔄'
|
| 382 |
if message:
|
| 383 |
self.status_items[step_index]['message'] = message
|
| 384 |
self.current_step = step_index
|
|
@@ -388,7 +352,6 @@ class ProgressTracker:
|
|
| 388 |
"""Mark a step as completed"""
|
| 389 |
if step_index < len(self.status_items):
|
| 390 |
self.status_items[step_index]['status'] = 'completed'
|
| 391 |
-
self.status_items[step_index]['icon'] = '✅'
|
| 392 |
if message:
|
| 393 |
self.status_items[step_index]['message'] = message
|
| 394 |
self._render()
|
|
@@ -397,11 +360,11 @@ class ProgressTracker:
|
|
| 397 |
"""Mark a step as error"""
|
| 398 |
if step_index < len(self.status_items):
|
| 399 |
self.status_items[step_index]['status'] = 'error'
|
| 400 |
-
self.status_items[step_index]['icon'] = '❌'
|
| 401 |
if message:
|
| 402 |
self.status_items[step_index]['message'] = message
|
| 403 |
self._render()
|
| 404 |
|
|
|
|
| 405 |
def _render(self, title: str = "Progress"):
|
| 406 |
"""Internal method to render current progress"""
|
| 407 |
with self.container.container():
|
|
@@ -515,59 +478,41 @@ def render_file_selector(directory: str, file_type: str, key_suffix: str, icon:
|
|
| 515 |
# ERROR HANDLING COMPONENTS - Standardized error message patterns
|
| 516 |
# =============================================================================
|
| 517 |
|
| 518 |
-
def
|
| 519 |
"""
|
| 520 |
-
Display a standardized error message for
|
| 521 |
|
| 522 |
Args:
|
| 523 |
-
operation_type: Type of operation that failed (e.g., "question analysis", "
|
|
|
|
| 524 |
error: The exception that occurred (optional)
|
|
|
|
| 525 |
"""
|
| 526 |
if error:
|
| 527 |
-
st.error(f"
|
| 528 |
else:
|
| 529 |
-
st.error(f"
|
| 530 |
|
| 531 |
|
| 532 |
-
|
| 533 |
-
|
| 534 |
-
|
|
|
|
| 535 |
|
| 536 |
-
Args:
|
| 537 |
-
operation_type: Type of operation that failed (e.g., "question", "data room")
|
| 538 |
-
error: The exception that occurred (optional)
|
| 539 |
-
"""
|
| 540 |
-
if error:
|
| 541 |
-
st.error(f"❌ Failed to process {operation_type}: {str(error)}")
|
| 542 |
-
else:
|
| 543 |
-
st.error(f"❌ Failed to process {operation_type}")
|
| 544 |
|
|
|
|
|
|
|
|
|
|
| 545 |
|
| 546 |
-
def display_initialization_error(component_type: str, error: Exception = None):
|
| 547 |
-
"""
|
| 548 |
-
Display a standardized error message for initialization failures.
|
| 549 |
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
| 553 |
-
"""
|
| 554 |
-
if error:
|
| 555 |
-
st.error(f"❌ Failed to initialize {component_type}: {str(error)}")
|
| 556 |
-
else:
|
| 557 |
-
st.error(f"❌ Failed to initialize {component_type}")
|
| 558 |
|
| 559 |
|
| 560 |
def display_download_error(error: Exception = None):
|
| 561 |
-
"""
|
| 562 |
-
|
| 563 |
-
|
| 564 |
-
Args:
|
| 565 |
-
error: The exception that occurred (optional)
|
| 566 |
-
"""
|
| 567 |
-
if error:
|
| 568 |
-
st.error(f"❌ Download failed: {str(error)}")
|
| 569 |
-
else:
|
| 570 |
-
st.error("❌ Download failed")
|
| 571 |
|
| 572 |
|
| 573 |
# =============================================================================
|
|
@@ -629,21 +574,9 @@ def render_checklist_results(results: dict, relevancy_threshold: float):
|
|
| 629 |
|
| 630 |
col1, col2 = st.columns([4, 1])
|
| 631 |
with col1:
|
| 632 |
-
|
| 633 |
-
|
| 634 |
-
|
| 635 |
-
with open(resolved_path, 'rb') as f:
|
| 636 |
-
st.download_button(
|
| 637 |
-
f"📄 {doc_name}",
|
| 638 |
-
data=f.read(),
|
| 639 |
-
file_name=resolved_path.name,
|
| 640 |
-
mime="application/octet-stream",
|
| 641 |
-
key=f"download_{hash(doc_path) % 10000}_{item_idx}"
|
| 642 |
-
)
|
| 643 |
-
except Exception:
|
| 644 |
-
st.write(f"📄 {doc_name} (unavailable)")
|
| 645 |
-
else:
|
| 646 |
-
st.write(f"📄 {doc_name} (unavailable)")
|
| 647 |
with col2:
|
| 648 |
st.caption(f"{score:.3f}")
|
| 649 |
else:
|
|
@@ -667,7 +600,7 @@ def render_question_results(answers: dict):
|
|
| 667 |
answer = answer_data.get('answer', 'No answer available')
|
| 668 |
sources = answer_data.get('sources', [])
|
| 669 |
|
| 670 |
-
with st.expander(f"**{question}**", expanded=
|
| 671 |
if answer:
|
| 672 |
st.markdown(f"**Answer:** {answer}")
|
| 673 |
|
|
@@ -680,55 +613,64 @@ def render_question_results(answers: dict):
|
|
| 680 |
|
| 681 |
col1, col2 = st.columns([4, 1])
|
| 682 |
with col1:
|
| 683 |
-
|
| 684 |
-
|
| 685 |
-
|
| 686 |
-
with open(resolved_path, 'rb') as f:
|
| 687 |
-
st.download_button(
|
| 688 |
-
f"📄 {doc_name}",
|
| 689 |
-
data=f.read(),
|
| 690 |
-
file_name=resolved_path.name,
|
| 691 |
-
mime="application/octet-stream",
|
| 692 |
-
key=f"q_download_{hash(doc_path) % 10000}_{i}"
|
| 693 |
-
)
|
| 694 |
-
except Exception:
|
| 695 |
-
st.write(f"📄 {doc_name} (unavailable)")
|
| 696 |
-
else:
|
| 697 |
-
st.write(f"📄 {doc_name} (unavailable)")
|
| 698 |
with col2:
|
| 699 |
st.caption(f"{score:.3f}")
|
| 700 |
|
| 701 |
|
| 702 |
-
def
|
|
|
|
|
|
|
| 703 |
"""
|
| 704 |
-
|
| 705 |
|
| 706 |
Args:
|
| 707 |
doc_path: Path to the document file (can be relative or absolute)
|
| 708 |
doc_name: Display name for the document
|
| 709 |
-
doc_title: Title for the document
|
| 710 |
unique_key: Unique key for the download button
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 711 |
"""
|
| 712 |
-
import streamlit as st
|
| 713 |
-
from pathlib import Path
|
| 714 |
-
|
| 715 |
-
# Resolve the path - handle both relative and absolute paths
|
| 716 |
resolved_path = _resolve_document_path(doc_path)
|
| 717 |
|
| 718 |
if resolved_path and resolved_path.exists():
|
| 719 |
try:
|
| 720 |
with open(resolved_path, 'rb') as f:
|
|
|
|
|
|
|
|
|
|
| 721 |
st.download_button(
|
| 722 |
-
f"
|
| 723 |
data=f.read(),
|
| 724 |
file_name=resolved_path.name,
|
| 725 |
-
mime=
|
| 726 |
-
key=f"
|
|
|
|
| 727 |
)
|
| 728 |
-
|
| 729 |
-
|
|
|
|
|
|
|
|
|
|
| 730 |
else:
|
| 731 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 732 |
|
| 733 |
|
| 734 |
def render_content_with_clickable_citations(content: str, citations: List[Dict[str, Any]]):
|
|
@@ -771,30 +713,11 @@ def render_content_with_clickable_citations(content: str, citations: List[Dict[s
|
|
| 771 |
cols = st.columns(len(mentioned_docs))
|
| 772 |
for i, (doc_name, doc_path) in enumerate(mentioned_docs):
|
| 773 |
with cols[i]:
|
| 774 |
-
|
| 775 |
else:
|
| 776 |
st.markdown("")
|
| 777 |
|
| 778 |
|
| 779 |
-
def _render_simple_download_button(doc_name: str, doc_path: str, unique_key: str):
|
| 780 |
-
"""Simple inline download button"""
|
| 781 |
-
resolved_path = _resolve_document_path(doc_path)
|
| 782 |
-
|
| 783 |
-
if resolved_path and resolved_path.exists():
|
| 784 |
-
try:
|
| 785 |
-
with open(resolved_path, 'rb') as f:
|
| 786 |
-
st.download_button(
|
| 787 |
-
label=f"📄 {doc_name}",
|
| 788 |
-
data=f.read(),
|
| 789 |
-
file_name=resolved_path.name,
|
| 790 |
-
mime="application/pdf" if doc_path.lower().endswith('.pdf') else "application/octet-stream",
|
| 791 |
-
key=f"simple_download_{unique_key}",
|
| 792 |
-
help=f"Download: {doc_name}"
|
| 793 |
-
)
|
| 794 |
-
except Exception:
|
| 795 |
-
st.caption(f"📄 {doc_name} (unavailable)")
|
| 796 |
-
|
| 797 |
-
|
| 798 |
# =============================================================================
|
| 799 |
# GENERATE/REGENERATE BUTTON COMPONENTS - Common 2-column button layout
|
| 800 |
# =============================================================================
|
|
|
|
| 34 |
if path_obj.is_absolute():
|
| 35 |
return path_obj if path_obj.exists() else None
|
| 36 |
|
| 37 |
+
# For relative paths, try different resolution strategies
|
| 38 |
+
resolution_paths = [
|
| 39 |
+
# Try current data room path
|
| 40 |
+
getattr(st.session_state, 'data_room_path', None),
|
| 41 |
+
# Try selected data room path as fallback
|
| 42 |
+
getattr(st.session_state, 'selected_data_room_path', None),
|
| 43 |
+
# Try relative to data directory
|
| 44 |
+
str(Path('data'))
|
| 45 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
+
for base_path in resolution_paths:
|
| 48 |
+
if base_path and Path(base_path).exists():
|
| 49 |
+
potential_path = Path(base_path) / path_obj
|
| 50 |
+
if potential_path.exists():
|
| 51 |
+
return potential_path
|
|
|
|
| 52 |
|
| 53 |
# Last resort: check if original path exists as-is
|
| 54 |
if path_obj.exists():
|
|
|
|
| 269 |
st.info(message)
|
| 270 |
|
| 271 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
def compact_status_display(status_items: list, title: str = "Status"):
|
| 273 |
"""
|
| 274 |
Display a compact list of status items with minimal vertical spacing.
|
|
|
|
| 319 |
""", unsafe_allow_html=True)
|
| 320 |
|
| 321 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 322 |
class ProgressTracker:
|
| 323 |
"""A class to track and display progress with real indicators"""
|
| 324 |
|
|
|
|
| 332 |
def initialize(self, steps: list, title: str = "Progress"):
|
| 333 |
"""Initialize progress tracker with steps"""
|
| 334 |
self.status_items = [
|
| 335 |
+
{'message': step, 'status': 'pending'}
|
| 336 |
for step in steps
|
| 337 |
]
|
| 338 |
self.total_steps = len(steps)
|
|
|
|
| 343 |
"""Mark a step as in progress"""
|
| 344 |
if step_index < len(self.status_items):
|
| 345 |
self.status_items[step_index]['status'] = 'in_progress'
|
|
|
|
| 346 |
if message:
|
| 347 |
self.status_items[step_index]['message'] = message
|
| 348 |
self.current_step = step_index
|
|
|
|
| 352 |
"""Mark a step as completed"""
|
| 353 |
if step_index < len(self.status_items):
|
| 354 |
self.status_items[step_index]['status'] = 'completed'
|
|
|
|
| 355 |
if message:
|
| 356 |
self.status_items[step_index]['message'] = message
|
| 357 |
self._render()
|
|
|
|
| 360 |
"""Mark a step as error"""
|
| 361 |
if step_index < len(self.status_items):
|
| 362 |
self.status_items[step_index]['status'] = 'error'
|
|
|
|
| 363 |
if message:
|
| 364 |
self.status_items[step_index]['message'] = message
|
| 365 |
self._render()
|
| 366 |
|
| 367 |
+
|
| 368 |
def _render(self, title: str = "Progress"):
|
| 369 |
"""Internal method to render current progress"""
|
| 370 |
with self.container.container():
|
|
|
|
| 478 |
# ERROR HANDLING COMPONENTS - Standardized error message patterns
|
| 479 |
# =============================================================================
|
| 480 |
|
| 481 |
+
def display_error(operation_type: str, action: str = "process", error: Exception = None, icon: str = "❌"):
|
| 482 |
"""
|
| 483 |
+
Display a standardized error message for various operation failures.
|
| 484 |
|
| 485 |
Args:
|
| 486 |
+
operation_type: Type of operation that failed (e.g., "question analysis", "data room", "document processor")
|
| 487 |
+
action: Action that failed (e.g., "generate", "process", "initialize", "download")
|
| 488 |
error: The exception that occurred (optional)
|
| 489 |
+
icon: Icon to display with error message (default: "❌")
|
| 490 |
"""
|
| 491 |
if error:
|
| 492 |
+
st.error(f"{icon} Failed to {action} {operation_type}: {str(error)}")
|
| 493 |
else:
|
| 494 |
+
st.error(f"{icon} Failed to {action} {operation_type}")
|
| 495 |
|
| 496 |
|
| 497 |
+
# Convenience functions for backward compatibility
|
| 498 |
+
def display_generation_error(operation_type: str, error: Exception = None):
|
| 499 |
+
"""DEPRECATED: Use display_error with action='generate' instead."""
|
| 500 |
+
display_error(operation_type, "generate", error)
|
| 501 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 502 |
|
| 503 |
+
def display_processing_error(operation_type: str, error: Exception = None):
|
| 504 |
+
"""DEPRECATED: Use display_error with action='process' instead."""
|
| 505 |
+
display_error(operation_type, "process", error)
|
| 506 |
|
|
|
|
|
|
|
|
|
|
| 507 |
|
| 508 |
+
def display_initialization_error(component_type: str, error: Exception = None):
|
| 509 |
+
"""DEPRECATED: Use display_error with action='initialize' instead."""
|
| 510 |
+
display_error(component_type, "initialize", error)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 511 |
|
| 512 |
|
| 513 |
def display_download_error(error: Exception = None):
|
| 514 |
+
"""DEPRECATED: Use display_error with action='download' instead."""
|
| 515 |
+
display_error("download", "complete", error)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 516 |
|
| 517 |
|
| 518 |
# =============================================================================
|
|
|
|
| 574 |
|
| 575 |
col1, col2 = st.columns([4, 1])
|
| 576 |
with col1:
|
| 577 |
+
render_document_download_button(
|
| 578 |
+
doc_path, doc_name, f"checklist_{hash(doc_path) % 10000}_{item_idx}"
|
| 579 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 580 |
with col2:
|
| 581 |
st.caption(f"{score:.3f}")
|
| 582 |
else:
|
|
|
|
| 600 |
answer = answer_data.get('answer', 'No answer available')
|
| 601 |
sources = answer_data.get('sources', [])
|
| 602 |
|
| 603 |
+
with st.expander(f"**{question}**", expanded=False):
|
| 604 |
if answer:
|
| 605 |
st.markdown(f"**Answer:** {answer}")
|
| 606 |
|
|
|
|
| 613 |
|
| 614 |
col1, col2 = st.columns([4, 1])
|
| 615 |
with col1:
|
| 616 |
+
render_document_download_button(
|
| 617 |
+
doc_path, doc_name, f"question_{hash(doc_path) % 10000}_{i}"
|
| 618 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 619 |
with col2:
|
| 620 |
st.caption(f"{score:.3f}")
|
| 621 |
|
| 622 |
|
| 623 |
+
def render_document_download_button(doc_path: str, doc_name: str, unique_key: str,
|
| 624 |
+
label_prefix: str = "📄", show_unavailable: bool = True,
|
| 625 |
+
button_help: str = None) -> bool:
|
| 626 |
"""
|
| 627 |
+
Consolidated function to render a document download button.
|
| 628 |
|
| 629 |
Args:
|
| 630 |
doc_path: Path to the document file (can be relative or absolute)
|
| 631 |
doc_name: Display name for the document
|
|
|
|
| 632 |
unique_key: Unique key for the download button
|
| 633 |
+
label_prefix: Prefix for the button label (default: "📄")
|
| 634 |
+
show_unavailable: Whether to show unavailable documents as text/caption
|
| 635 |
+
button_help: Optional help text for the button
|
| 636 |
+
|
| 637 |
+
Returns:
|
| 638 |
+
True if document was available and button was rendered, False otherwise
|
| 639 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 640 |
resolved_path = _resolve_document_path(doc_path)
|
| 641 |
|
| 642 |
if resolved_path and resolved_path.exists():
|
| 643 |
try:
|
| 644 |
with open(resolved_path, 'rb') as f:
|
| 645 |
+
# Determine MIME type based on file extension
|
| 646 |
+
mime_type = "application/pdf" if doc_path.lower().endswith('.pdf') else "application/octet-stream"
|
| 647 |
+
|
| 648 |
st.download_button(
|
| 649 |
+
label=f"{label_prefix} {doc_name}",
|
| 650 |
data=f.read(),
|
| 651 |
file_name=resolved_path.name,
|
| 652 |
+
mime=mime_type,
|
| 653 |
+
key=f"download_{unique_key}",
|
| 654 |
+
help=button_help or f"Download: {doc_name}"
|
| 655 |
)
|
| 656 |
+
return True
|
| 657 |
+
except Exception:
|
| 658 |
+
if show_unavailable:
|
| 659 |
+
st.caption(f"{label_prefix} {doc_name} (unavailable)")
|
| 660 |
+
return False
|
| 661 |
else:
|
| 662 |
+
if show_unavailable:
|
| 663 |
+
st.write(f"{label_prefix} {doc_name} (unavailable)")
|
| 664 |
+
return False
|
| 665 |
+
|
| 666 |
+
|
| 667 |
+
def create_document_link(doc_path: str, doc_name: str, doc_title: str, unique_key: str):
|
| 668 |
+
"""
|
| 669 |
+
Create a download link for a document.
|
| 670 |
+
|
| 671 |
+
DEPRECATED: Use render_document_download_button instead.
|
| 672 |
+
"""
|
| 673 |
+
render_document_download_button(doc_path, doc_title, unique_key, "📄", True)
|
| 674 |
|
| 675 |
|
| 676 |
def render_content_with_clickable_citations(content: str, citations: List[Dict[str, Any]]):
|
|
|
|
| 713 |
cols = st.columns(len(mentioned_docs))
|
| 714 |
for i, (doc_name, doc_path) in enumerate(mentioned_docs):
|
| 715 |
with cols[i]:
|
| 716 |
+
render_document_download_button(doc_path, doc_name, f"para_{para_idx}_{i}")
|
| 717 |
else:
|
| 718 |
st.markdown("")
|
| 719 |
|
| 720 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 721 |
# =============================================================================
|
| 722 |
# GENERATE/REGENERATE BUTTON COMPONENTS - Common 2-column button layout
|
| 723 |
# =============================================================================
|