sukhrobnurali's picture
Bug fixes
76cdde2
"""
Intelligent Investment Screener
A RAG-based application for analyzing company financial reports against investment criteria.
"""
import streamlit as st
import os
import json
import tempfile
from pathlib import Path
from dotenv import load_dotenv
from document_processor import InvestmentDocumentProcessor
from criteria import CRITERIA_OPTIONS
# Load environment variables
load_dotenv()
# Page config
st.set_page_config(
page_title="Investment Screener",
page_icon="πŸ“Š",
layout="wide"
)
# Custom CSS
st.markdown("""
<style>
.main-header {
font-size: 2.5rem;
font-weight: bold;
margin-bottom: 0.5rem;
}
.sub-header {
font-size: 1.2rem;
color: #666;
margin-bottom: 2rem;
}
.pass-badge {
background-color: #28a745;
color: white;
padding: 0.5rem 1rem;
border-radius: 0.5rem;
font-weight: bold;
display: inline-block;
margin: 0.5rem 0;
}
.fail-badge {
background-color: #dc3545;
color: white;
padding: 0.5rem 1rem;
border-radius: 0.5rem;
font-weight: bold;
display: inline-block;
margin: 0.5rem 0;
}
div[data-testid="stExpander"] {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
border-radius: 12px;
border: none;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
margin: 0.75rem 0;
}
div[data-testid="stExpander"]:hover {
box-shadow: 0 6px 12px rgba(0, 0, 0, 0.15);
}
div[data-testid="stExpander"] summary {
background: transparent !important;
color: white !important;
font-weight: 500;
padding: 1rem 1.25rem;
}
div[data-testid="stExpander"] summary:hover {
background: transparent !important;
}
div[data-testid="stExpander"][open] summary {
background: transparent !important;
}
div[data-testid="stExpander"] div[role="button"] {
background: transparent !important;
}
div[data-testid="stExpander"] div[role="button"] p {
color: white !important;
font-size: 0.95rem;
}
div[data-testid="stExpander"] > div > div {
padding: 0 1.25rem 1.25rem 1.25rem;
color: white;
}
.citation-preview {
color: white;
font-size: 0.9rem;
line-height: 1.6;
opacity: 0.95;
margin-top: 0.5rem;
}
.citation-full {
color: white;
font-size: 0.9rem;
line-height: 1.6;
opacity: 0.95;
}
.metric-card {
background-color: #ffffff;
padding: 1.5rem;
border-radius: 0.5rem;
border: 1px solid #e0e0e0;
margin: 1rem 0;
color: #212529;
}
</style>
""", unsafe_allow_html=True)
def initialize_session_state():
"""Initialize Streamlit session state variables."""
if 'processor' not in st.session_state:
st.session_state.processor = None
if 'analysis_result' not in st.session_state:
st.session_state.analysis_result = None
if 'document_loaded' not in st.session_state:
st.session_state.document_loaded = False
if 'current_file_name' not in st.session_state:
st.session_state.current_file_name = None
def display_criteria_rules(criteria):
"""Display the rules for selected criteria."""
st.subheader("Screening Rules")
for rule in criteria['rules']:
st.markdown(f"**{rule['name']}**: {rule['description']}")
st.caption(f"Threshold: {rule['threshold']}")
def display_analysis_result(result, criteria_name):
"""Display analysis results with citations."""
st.markdown("---")
st.markdown("## Analysis Results")
# Overall pass/fail
overall_pass = result.get('overall_pass', False)
if overall_pass:
st.markdown('<div class="pass-badge">βœ“ PASSED - Investment Compatible</div>',
unsafe_allow_html=True)
else:
st.markdown('<div class="fail-badge">βœ— FAILED - Does Not Meet Criteria</div>',
unsafe_allow_html=True)
# Summary
if 'summary' in result:
st.markdown("### Summary")
st.info(result['summary'])
# Remove metadata fields for display
metrics = {k: v for k, v in result.items()
if k not in ['overall_pass', 'summary', 'citations', 'source_nodes_count', 'parse_error', 'raw_response']}
for metric_name, metric_data in metrics.items():
if isinstance(metric_data, dict):
display_metric_card(metric_name, metric_data)
# Citations section
if 'citations' in result and result['citations']:
st.markdown("### πŸ“š Citations & Sources")
st.caption(f"Analysis based on {result.get('source_nodes_count', 0)} relevant document sections")
for citation in result['citations'][:5]: # Show top 5 citations
display_citation(citation)
def display_metric_card(metric_name, metric_data):
"""Display a single metric card with citation."""
# Format metric name
formatted_name = metric_name.replace('_', ' ').title()
# Determine pass/fail
passed = metric_data.get('pass', metric_data.get('compliant', metric_data.get('disclosed', None)))
# Build display
status_icon = "βœ“" if passed else "βœ—"
status_color = "green" if passed else "red"
st.markdown(f"""
<div class="metric-card">
<h4 style="color: {status_color};">{status_icon} {formatted_name}</h4>
""", unsafe_allow_html=True)
# Display metric details
for key, value in metric_data.items():
if key not in ['pass', 'page', 'location']:
if isinstance(value, bool):
value = "Yes" if value else "No"
st.markdown(f"**{key.replace('_', ' ').title()}**: {value}")
# Citation info
if 'page' in metric_data and 'location' in metric_data:
st.markdown(f"""
<div style="margin-top: 1rem; padding: 0.5rem; background-color: #e7f3ff; border-radius: 0.25rem;">
πŸ“„ <strong>Found on Page {metric_data['page']}</strong><br>
πŸ“ Section: {metric_data['location']}
</div>
""", unsafe_allow_html=True)
elif 'page' in metric_data:
st.markdown(f"πŸ“„ **Page {metric_data['page']}**")
st.markdown("</div>", unsafe_allow_html=True)
def display_citation(citation):
"""Display a modern expandable citation card using Streamlit."""
# Create expander with page, score, and preview in the header
header = f"πŸ“„ Page {citation['page']} β€’ ⭐ {citation['score']:.0%} Match\n\n{citation['text_preview']}"
with st.expander(header, expanded=False):
# Show only full text when expanded (no duplicate preview)
if citation.get('is_truncated', False):
st.markdown(f"<div class='citation-full'>{citation['full_text']}</div>",
unsafe_allow_html=True)
else:
# If not truncated, preview and full text are the same, so show nothing extra
st.caption("(Full text shown above)")
def main():
"""Main application."""
initialize_session_state()
# Header
st.markdown('<div class="main-header">πŸ“Š Intelligent Investment Screener</div>',
unsafe_allow_html=True)
st.markdown('<div class="sub-header">AI-powered financial document analysis with citations</div>',
unsafe_allow_html=True)
# Sidebar
with st.sidebar:
st.markdown("## Configuration")
# API Key input
api_key = os.getenv('OPENAI_API_KEY', '')
if not api_key:
api_key = st.text_input(
"OpenAI API Key",
type="password",
help="Get your API key at https://platform.openai.com/api-keys"
)
if not api_key:
st.warning("Please enter your OpenAI API key to continue.")
st.stop()
# Criteria selection
st.markdown("## Screening Criteria")
selected_criteria_name = st.selectbox(
"Select Investment Strategy",
options=list(CRITERIA_OPTIONS.keys())
)
criteria = CRITERIA_OPTIONS[selected_criteria_name]
with st.expander("View Criteria Details"):
st.markdown(f"**{criteria['name']}**")
st.caption(criteria['description'])
display_criteria_rules(criteria)
st.markdown("---")
st.markdown("### About")
st.caption("""
This tool uses RAG (Retrieval-Augmented Generation) to analyze
financial documents against specific investment criteria.
All findings include page citations for verification.
""")
# Main content
col1, col2 = st.columns([1, 1])
with col1:
st.markdown("### Upload Document")
uploaded_file = st.file_uploader(
"Upload Annual Report or 10-K Filing (PDF)",
type=['pdf'],
help="Upload a company's annual report or SEC 10-K filing"
)
if uploaded_file is not None:
# Check if file has changed
file_changed = (uploaded_file.name != st.session_state.current_file_name)
if file_changed:
# Reset session state for new file
st.session_state.current_file_name = uploaded_file.name
st.session_state.document_loaded = False
st.session_state.analysis_result = None
st.session_state.processor = None
# Save to temp file
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
tmp_file.write(uploaded_file.getvalue())
tmp_path = tmp_file.name
# Load document if not already loaded
if not st.session_state.document_loaded:
with st.spinner("Loading and indexing document..."):
try:
processor = InvestmentDocumentProcessor(api_key)
processor.load_pdf(tmp_path)
st.session_state.processor = processor
st.session_state.document_loaded = True
# Show document info
doc_info = processor.get_document_summary()
st.success(f"βœ“ Document loaded: {doc_info['num_pages']} pages")
except Exception as e:
st.error(f"Error loading document: {str(e)}")
st.stop()
# Clean up temp file
Path(tmp_path).unlink(missing_ok=True)
with col2:
st.markdown("### Analysis")
if st.session_state.document_loaded:
if st.button("πŸ” Analyze Document", type="primary", use_container_width=True):
with st.spinner(f"Analyzing against {selected_criteria_name} criteria..."):
try:
result = st.session_state.processor.analyze_with_criteria(
criteria['analysis_prompt']
)
st.session_state.analysis_result = result
except Exception as e:
st.error(f"Analysis error: {str(e)}")
st.exception(e)
else:
st.info("Upload a PDF document to begin analysis")
# Display results
if st.session_state.analysis_result is not None:
display_analysis_result(st.session_state.analysis_result, selected_criteria_name)
if __name__ == "__main__":
main()