"""
Intelligent Investment Screener
A RAG-based application for analyzing company financial reports against investment criteria.
"""
import streamlit as st
import os
import json
import tempfile
from pathlib import Path
from dotenv import load_dotenv
from document_processor import InvestmentDocumentProcessor
from criteria import CRITERIA_OPTIONS
# Load environment variables
load_dotenv()
# Page config
st.set_page_config(
page_title="Investment Screener",
page_icon="📊",
layout="wide"
)
# Custom CSS
st.markdown("""
""", unsafe_allow_html=True)
def initialize_session_state():
"""Initialize Streamlit session state variables."""
if 'processor' not in st.session_state:
st.session_state.processor = None
if 'analysis_result' not in st.session_state:
st.session_state.analysis_result = None
if 'document_loaded' not in st.session_state:
st.session_state.document_loaded = False
if 'current_file_name' not in st.session_state:
st.session_state.current_file_name = None
def display_criteria_rules(criteria):
"""Display the rules for selected criteria."""
st.subheader("Screening Rules")
for rule in criteria['rules']:
st.markdown(f"**{rule['name']}**: {rule['description']}")
st.caption(f"Threshold: {rule['threshold']}")
def display_analysis_result(result, criteria_name):
"""Display analysis results with citations."""
st.markdown("---")
st.markdown("## Analysis Results")
# Overall pass/fail
overall_pass = result.get('overall_pass', False)
if overall_pass:
st.markdown('
✓ PASSED - Investment Compatible
',
unsafe_allow_html=True)
else:
st.markdown('✗ FAILED - Does Not Meet Criteria
',
unsafe_allow_html=True)
# Summary
if 'summary' in result:
st.markdown("### Summary")
st.info(result['summary'])
# Remove metadata fields for display
metrics = {k: v for k, v in result.items()
if k not in ['overall_pass', 'summary', 'citations', 'source_nodes_count', 'parse_error', 'raw_response']}
for metric_name, metric_data in metrics.items():
if isinstance(metric_data, dict):
display_metric_card(metric_name, metric_data)
# Citations section
if 'citations' in result and result['citations']:
st.markdown("### 📚 Citations & Sources")
st.caption(f"Analysis based on {result.get('source_nodes_count', 0)} relevant document sections")
for citation in result['citations'][:5]: # Show top 5 citations
display_citation(citation)
def display_metric_card(metric_name, metric_data):
"""Display a single metric card with citation."""
# Format metric name
formatted_name = metric_name.replace('_', ' ').title()
# Determine pass/fail
passed = metric_data.get('pass', metric_data.get('compliant', metric_data.get('disclosed', None)))
# Build display
status_icon = "✓" if passed else "✗"
status_color = "green" if passed else "red"
st.markdown(f"""
{status_icon} {formatted_name}
""", unsafe_allow_html=True)
# Display metric details
for key, value in metric_data.items():
if key not in ['pass', 'page', 'location']:
if isinstance(value, bool):
value = "Yes" if value else "No"
st.markdown(f"**{key.replace('_', ' ').title()}**: {value}")
# Citation info
if 'page' in metric_data and 'location' in metric_data:
st.markdown(f"""
📄 Found on Page {metric_data['page']}
📍 Section: {metric_data['location']}
""", unsafe_allow_html=True)
elif 'page' in metric_data:
st.markdown(f"📄 **Page {metric_data['page']}**")
st.markdown("
", unsafe_allow_html=True)
def display_citation(citation):
"""Display a modern expandable citation card using Streamlit."""
# Create expander with page, score, and preview in the header
header = f"📄 Page {citation['page']} • ⭐ {citation['score']:.0%} Match\n\n{citation['text_preview']}"
with st.expander(header, expanded=False):
# Show only full text when expanded (no duplicate preview)
if citation.get('is_truncated', False):
st.markdown(f"{citation['full_text']}
",
unsafe_allow_html=True)
else:
# If not truncated, preview and full text are the same, so show nothing extra
st.caption("(Full text shown above)")
def main():
"""Main application."""
initialize_session_state()
# Header
st.markdown('📊 Intelligent Investment Screener
',
unsafe_allow_html=True)
st.markdown('',
unsafe_allow_html=True)
# Sidebar
with st.sidebar:
st.markdown("## Configuration")
# API Key input
api_key = os.getenv('OPENAI_API_KEY', '')
if not api_key:
api_key = st.text_input(
"OpenAI API Key",
type="password",
help="Get your API key at https://platform.openai.com/api-keys"
)
if not api_key:
st.warning("Please enter your OpenAI API key to continue.")
st.stop()
# Criteria selection
st.markdown("## Screening Criteria")
selected_criteria_name = st.selectbox(
"Select Investment Strategy",
options=list(CRITERIA_OPTIONS.keys())
)
criteria = CRITERIA_OPTIONS[selected_criteria_name]
with st.expander("View Criteria Details"):
st.markdown(f"**{criteria['name']}**")
st.caption(criteria['description'])
display_criteria_rules(criteria)
st.markdown("---")
st.markdown("### About")
st.caption("""
This tool uses RAG (Retrieval-Augmented Generation) to analyze
financial documents against specific investment criteria.
All findings include page citations for verification.
""")
# Main content
col1, col2 = st.columns([1, 1])
with col1:
st.markdown("### Upload Document")
uploaded_file = st.file_uploader(
"Upload Annual Report or 10-K Filing (PDF)",
type=['pdf'],
help="Upload a company's annual report or SEC 10-K filing"
)
if uploaded_file is not None:
# Check if file has changed
file_changed = (uploaded_file.name != st.session_state.current_file_name)
if file_changed:
# Reset session state for new file
st.session_state.current_file_name = uploaded_file.name
st.session_state.document_loaded = False
st.session_state.analysis_result = None
st.session_state.processor = None
# Save to temp file
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
tmp_file.write(uploaded_file.getvalue())
tmp_path = tmp_file.name
# Load document if not already loaded
if not st.session_state.document_loaded:
with st.spinner("Loading and indexing document..."):
try:
processor = InvestmentDocumentProcessor(api_key)
processor.load_pdf(tmp_path)
st.session_state.processor = processor
st.session_state.document_loaded = True
# Show document info
doc_info = processor.get_document_summary()
st.success(f"✓ Document loaded: {doc_info['num_pages']} pages")
except Exception as e:
st.error(f"Error loading document: {str(e)}")
st.stop()
# Clean up temp file
Path(tmp_path).unlink(missing_ok=True)
with col2:
st.markdown("### Analysis")
if st.session_state.document_loaded:
if st.button("🔍 Analyze Document", type="primary", use_container_width=True):
with st.spinner(f"Analyzing against {selected_criteria_name} criteria..."):
try:
result = st.session_state.processor.analyze_with_criteria(
criteria['analysis_prompt']
)
st.session_state.analysis_result = result
except Exception as e:
st.error(f"Analysis error: {str(e)}")
st.exception(e)
else:
st.info("Upload a PDF document to begin analysis")
# Display results
if st.session_state.analysis_result is not None:
display_analysis_result(st.session_state.analysis_result, selected_criteria_name)
if __name__ == "__main__":
main()