Spaces:

sukhrobnurali
/

financial-document-analyzer

Runtime error

App Files Files Community

financial-document-analyzer / app.py

sukhrobnurali

Bug fixes

76cdde2 about 2 months ago

raw

history blame contribute delete

11.6 kB

	"""
	Intelligent Investment Screener
	A RAG-based application for analyzing company financial reports against investment criteria.
	"""

	import streamlit as st
	import os
	import json
	import tempfile
	from pathlib import Path
	from dotenv import load_dotenv

	from document_processor import InvestmentDocumentProcessor
	from criteria import CRITERIA_OPTIONS

	# Load environment variables
	load_dotenv()

	# Page config
	st.set_page_config(
	page_title="Investment Screener",
	page_icon="📊",
	layout="wide"
	)

	# Custom CSS
	st.markdown("""
	<style>
	.main-header {
	font-size: 2.5rem;
	font-weight: bold;
	margin-bottom: 0.5rem;
	}
	.sub-header {
	font-size: 1.2rem;
	color: #666;
	margin-bottom: 2rem;
	}
	.pass-badge {
	background-color: #28a745;
	color: white;
	padding: 0.5rem 1rem;
	border-radius: 0.5rem;
	font-weight: bold;
	display: inline-block;
	margin: 0.5rem 0;
	}
	.fail-badge {
	background-color: #dc3545;
	color: white;
	padding: 0.5rem 1rem;
	border-radius: 0.5rem;
	font-weight: bold;
	display: inline-block;
	margin: 0.5rem 0;
	}
	div[data-testid="stExpander"] {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
	border-radius: 12px;
	border: none;
	box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
	margin: 0.75rem 0;
	}
	div[data-testid="stExpander"]:hover {
	box-shadow: 0 6px 12px rgba(0, 0, 0, 0.15);
	}
	div[data-testid="stExpander"] summary {
	background: transparent !important;
	color: white !important;
	font-weight: 500;
	padding: 1rem 1.25rem;
	}
	div[data-testid="stExpander"] summary:hover {
	background: transparent !important;
	}
	div[data-testid="stExpander"][open] summary {
	background: transparent !important;
	}
	div[data-testid="stExpander"] div[role="button"] {
	background: transparent !important;
	}
	div[data-testid="stExpander"] div[role="button"] p {
	color: white !important;
	font-size: 0.95rem;
	}
	div[data-testid="stExpander"] > div > div {
	padding: 0 1.25rem 1.25rem 1.25rem;
	color: white;
	}
	.citation-preview {
	color: white;
	font-size: 0.9rem;
	line-height: 1.6;
	opacity: 0.95;
	margin-top: 0.5rem;
	}
	.citation-full {
	color: white;
	font-size: 0.9rem;
	line-height: 1.6;
	opacity: 0.95;
	}
	.metric-card {
	background-color: #ffffff;
	padding: 1.5rem;
	border-radius: 0.5rem;
	border: 1px solid #e0e0e0;
	margin: 1rem 0;
	color: #212529;
	}
	</style>
	""", unsafe_allow_html=True)


	def initialize_session_state():
	"""Initialize Streamlit session state variables."""
	if 'processor' not in st.session_state:
	st.session_state.processor = None
	if 'analysis_result' not in st.session_state:
	st.session_state.analysis_result = None
	if 'document_loaded' not in st.session_state:
	st.session_state.document_loaded = False
	if 'current_file_name' not in st.session_state:
	st.session_state.current_file_name = None


	def display_criteria_rules(criteria):
	"""Display the rules for selected criteria."""
	st.subheader("Screening Rules")
	for rule in criteria['rules']:
	st.markdown(f"{rule['name']}: {rule['description']}")
	st.caption(f"Threshold: {rule['threshold']}")


	def display_analysis_result(result, criteria_name):
	"""Display analysis results with citations."""
	st.markdown("---")
	st.markdown("## Analysis Results")

	# Overall pass/fail
	overall_pass = result.get('overall_pass', False)

	if overall_pass:
	st.markdown('<div class="pass-badge">✓ PASSED - Investment Compatible</div>',
	unsafe_allow_html=True)
	else:
	st.markdown('<div class="fail-badge">✗ FAILED - Does Not Meet Criteria</div>',
	unsafe_allow_html=True)

	# Summary
	if 'summary' in result:
	st.markdown("### Summary")
	st.info(result['summary'])

	# Remove metadata fields for display
	metrics = {k: v for k, v in result.items()
	if k not in ['overall_pass', 'summary', 'citations', 'source_nodes_count', 'parse_error', 'raw_response']}

	for metric_name, metric_data in metrics.items():
	if isinstance(metric_data, dict):
	display_metric_card(metric_name, metric_data)

	# Citations section
	if 'citations' in result and result['citations']:
	st.markdown("### 📚 Citations & Sources")
	st.caption(f"Analysis based on {result.get('source_nodes_count', 0)} relevant document sections")

	for citation in result['citations'][:5]: # Show top 5 citations
	display_citation(citation)


	def display_metric_card(metric_name, metric_data):
	"""Display a single metric card with citation."""
	# Format metric name
	formatted_name = metric_name.replace('_', ' ').title()

	# Determine pass/fail
	passed = metric_data.get('pass', metric_data.get('compliant', metric_data.get('disclosed', None)))

	# Build display
	status_icon = "✓" if passed else "✗"
	status_color = "green" if passed else "red"

	st.markdown(f"""
	<div class="metric-card">
	<h4 style="color: {status_color};">{status_icon} {formatted_name}</h4>
	""", unsafe_allow_html=True)

	# Display metric details
	for key, value in metric_data.items():
	if key not in ['pass', 'page', 'location']:
	if isinstance(value, bool):
	value = "Yes" if value else "No"
	st.markdown(f"{key.replace('_', ' ').title()}: {value}")

	# Citation info
	if 'page' in metric_data and 'location' in metric_data:
	st.markdown(f"""
	<div style="margin-top: 1rem; padding: 0.5rem; background-color: #e7f3ff; border-radius: 0.25rem;">
	📄 <strong>Found on Page {metric_data['page']}</strong><br>
	📍 Section: {metric_data['location']}
	</div>
	""", unsafe_allow_html=True)
	elif 'page' in metric_data:
	st.markdown(f"📄 Page {metric_data['page']}")

	st.markdown("</div>", unsafe_allow_html=True)


	def display_citation(citation):
	"""Display a modern expandable citation card using Streamlit."""
	# Create expander with page, score, and preview in the header
	header = f"📄 Page {citation['page']} • ⭐ {citation['score']:.0%} Match\n\n{citation['text_preview']}"

	with st.expander(header, expanded=False):
	# Show only full text when expanded (no duplicate preview)
	if citation.get('is_truncated', False):
	st.markdown(f"<div class='citation-full'>{citation['full_text']}</div>",
	unsafe_allow_html=True)
	else:
	# If not truncated, preview and full text are the same, so show nothing extra
	st.caption("(Full text shown above)")


	def main():
	"""Main application."""
	initialize_session_state()

	# Header
	st.markdown('<div class="main-header">📊 Intelligent Investment Screener</div>',
	unsafe_allow_html=True)
	st.markdown('<div class="sub-header">AI-powered financial document analysis with citations</div>',
	unsafe_allow_html=True)

	# Sidebar
	with st.sidebar:
	st.markdown("## Configuration")

	# API Key input
	api_key = os.getenv('OPENAI_API_KEY', '')
	if not api_key:
	api_key = st.text_input(
	"OpenAI API Key",
	type="password",
	help="Get your API key at https://platform.openai.com/api-keys"
	)

	if not api_key:
	st.warning("Please enter your OpenAI API key to continue.")
	st.stop()

	# Criteria selection
	st.markdown("## Screening Criteria")
	selected_criteria_name = st.selectbox(
	"Select Investment Strategy",
	options=list(CRITERIA_OPTIONS.keys())
	)

	criteria = CRITERIA_OPTIONS[selected_criteria_name]

	with st.expander("View Criteria Details"):
	st.markdown(f"{criteria['name']}")
	st.caption(criteria['description'])
	display_criteria_rules(criteria)

	st.markdown("---")
	st.markdown("### About")
	st.caption("""
	This tool uses RAG (Retrieval-Augmented Generation) to analyze
	financial documents against specific investment criteria.
	All findings include page citations for verification.
	""")

	# Main content
	col1, col2 = st.columns([1, 1])

	with col1:
	st.markdown("### Upload Document")
	uploaded_file = st.file_uploader(
	"Upload Annual Report or 10-K Filing (PDF)",
	type=['pdf'],
	help="Upload a company's annual report or SEC 10-K filing"
	)

	if uploaded_file is not None:
	# Check if file has changed
	file_changed = (uploaded_file.name != st.session_state.current_file_name)

	if file_changed:
	# Reset session state for new file
	st.session_state.current_file_name = uploaded_file.name
	st.session_state.document_loaded = False
	st.session_state.analysis_result = None
	st.session_state.processor = None

	# Save to temp file
	with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
	tmp_file.write(uploaded_file.getvalue())
	tmp_path = tmp_file.name

	# Load document if not already loaded
	if not st.session_state.document_loaded:
	with st.spinner("Loading and indexing document..."):
	try:
	processor = InvestmentDocumentProcessor(api_key)
	processor.load_pdf(tmp_path)
	st.session_state.processor = processor
	st.session_state.document_loaded = True

	# Show document info
	doc_info = processor.get_document_summary()
	st.success(f"✓ Document loaded: {doc_info['num_pages']} pages")

	except Exception as e:
	st.error(f"Error loading document: {str(e)}")
	st.stop()

	# Clean up temp file
	Path(tmp_path).unlink(missing_ok=True)

	with col2:
	st.markdown("### Analysis")

	if st.session_state.document_loaded:
	if st.button("🔍 Analyze Document", type="primary", use_container_width=True):
	with st.spinner(f"Analyzing against {selected_criteria_name} criteria..."):
	try:
	result = st.session_state.processor.analyze_with_criteria(
	criteria['analysis_prompt']
	)
	st.session_state.analysis_result = result

	except Exception as e:
	st.error(f"Analysis error: {str(e)}")
	st.exception(e)

	else:
	st.info("Upload a PDF document to begin analysis")

	# Display results
	if st.session_state.analysis_result is not None:
	display_analysis_result(st.session_state.analysis_result, selected_criteria_name)


	if __name__ == "__main__":
	main()