""" Intelligent Investment Screener A RAG-based application for analyzing company financial reports against investment criteria. """ import streamlit as st import os import json import tempfile from pathlib import Path from dotenv import load_dotenv from document_processor import InvestmentDocumentProcessor from criteria import CRITERIA_OPTIONS # Load environment variables load_dotenv() # Page config st.set_page_config( page_title="Investment Screener", page_icon="📊", layout="wide" ) # Custom CSS st.markdown(""" """, unsafe_allow_html=True) def initialize_session_state(): """Initialize Streamlit session state variables.""" if 'processor' not in st.session_state: st.session_state.processor = None if 'analysis_result' not in st.session_state: st.session_state.analysis_result = None if 'document_loaded' not in st.session_state: st.session_state.document_loaded = False if 'current_file_name' not in st.session_state: st.session_state.current_file_name = None def display_criteria_rules(criteria): """Display the rules for selected criteria.""" st.subheader("Screening Rules") for rule in criteria['rules']: st.markdown(f"**{rule['name']}**: {rule['description']}") st.caption(f"Threshold: {rule['threshold']}") def display_analysis_result(result, criteria_name): """Display analysis results with citations.""" st.markdown("---") st.markdown("## Analysis Results") # Overall pass/fail overall_pass = result.get('overall_pass', False) if overall_pass: st.markdown('

✓ PASSED - Investment Compatible

', unsafe_allow_html=True) else: st.markdown('

✗ FAILED - Does Not Meet Criteria

', unsafe_allow_html=True) # Summary if 'summary' in result: st.markdown("### Summary") st.info(result['summary']) # Remove metadata fields for display metrics = {k: v for k, v in result.items() if k not in ['overall_pass', 'summary', 'citations', 'source_nodes_count', 'parse_error', 'raw_response']} for metric_name, metric_data in metrics.items(): if isinstance(metric_data, dict): display_metric_card(metric_name, metric_data) # Citations section if 'citations' in result and result['citations']: st.markdown("### 📚 Citations & Sources") st.caption(f"Analysis based on {result.get('source_nodes_count', 0)} relevant document sections") for citation in result['citations'][:5]: # Show top 5 citations display_citation(citation) def display_metric_card(metric_name, metric_data): """Display a single metric card with citation.""" # Format metric name formatted_name = metric_name.replace('_', ' ').title() # Determine pass/fail passed = metric_data.get('pass', metric_data.get('compliant', metric_data.get('disclosed', None))) # Build display status_icon = "✓" if passed else "✗" status_color = "green" if passed else "red" st.markdown(f"""

{status_icon} {formatted_name}

""", unsafe_allow_html=True) # Display metric details for key, value in metric_data.items(): if key not in ['pass', 'page', 'location']: if isinstance(value, bool): value = "Yes" if value else "No" st.markdown(f"**{key.replace('_', ' ').title()}**: {value}") # Citation info if 'page' in metric_data and 'location' in metric_data: st.markdown(f"""

📄 Found on Page {metric_data['page']}
📍 Section: {metric_data['location']}

""", unsafe_allow_html=True) elif 'page' in metric_data: st.markdown(f"📄 **Page {metric_data['page']}**") st.markdown("

", unsafe_allow_html=True) def display_citation(citation): """Display a modern expandable citation card using Streamlit.""" # Create expander with page, score, and preview in the header header = f"📄 Page {citation['page']} • ⭐ {citation['score']:.0%} Match\n\n{citation['text_preview']}" with st.expander(header, expanded=False): # Show only full text when expanded (no duplicate preview) if citation.get('is_truncated', False): st.markdown(f"

{citation['full_text']}

", unsafe_allow_html=True) else: # If not truncated, preview and full text are the same, so show nothing extra st.caption("(Full text shown above)") def main(): """Main application.""" initialize_session_state() # Header st.markdown('

📊 Intelligent Investment Screener

', unsafe_allow_html=True) st.markdown('

AI-powered financial document analysis with citations

', unsafe_allow_html=True) # Sidebar with st.sidebar: st.markdown("## Configuration") # API Key input api_key = os.getenv('OPENAI_API_KEY', '') if not api_key: api_key = st.text_input( "OpenAI API Key", type="password", help="Get your API key at https://platform.openai.com/api-keys" ) if not api_key: st.warning("Please enter your OpenAI API key to continue.") st.stop() # Criteria selection st.markdown("## Screening Criteria") selected_criteria_name = st.selectbox( "Select Investment Strategy", options=list(CRITERIA_OPTIONS.keys()) ) criteria = CRITERIA_OPTIONS[selected_criteria_name] with st.expander("View Criteria Details"): st.markdown(f"**{criteria['name']}**") st.caption(criteria['description']) display_criteria_rules(criteria) st.markdown("---") st.markdown("### About") st.caption(""" This tool uses RAG (Retrieval-Augmented Generation) to analyze financial documents against specific investment criteria. All findings include page citations for verification. """) # Main content col1, col2 = st.columns([1, 1]) with col1: st.markdown("### Upload Document") uploaded_file = st.file_uploader( "Upload Annual Report or 10-K Filing (PDF)", type=['pdf'], help="Upload a company's annual report or SEC 10-K filing" ) if uploaded_file is not None: # Check if file has changed file_changed = (uploaded_file.name != st.session_state.current_file_name) if file_changed: # Reset session state for new file st.session_state.current_file_name = uploaded_file.name st.session_state.document_loaded = False st.session_state.analysis_result = None st.session_state.processor = None # Save to temp file with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file: tmp_file.write(uploaded_file.getvalue()) tmp_path = tmp_file.name # Load document if not already loaded if not st.session_state.document_loaded: with st.spinner("Loading and indexing document..."): try: processor = InvestmentDocumentProcessor(api_key) processor.load_pdf(tmp_path) st.session_state.processor = processor st.session_state.document_loaded = True # Show document info doc_info = processor.get_document_summary() st.success(f"✓ Document loaded: {doc_info['num_pages']} pages") except Exception as e: st.error(f"Error loading document: {str(e)}") st.stop() # Clean up temp file Path(tmp_path).unlink(missing_ok=True) with col2: st.markdown("### Analysis") if st.session_state.document_loaded: if st.button("🔍 Analyze Document", type="primary", use_container_width=True): with st.spinner(f"Analyzing against {selected_criteria_name} criteria..."): try: result = st.session_state.processor.analyze_with_criteria( criteria['analysis_prompt'] ) st.session_state.analysis_result = result except Exception as e: st.error(f"Analysis error: {str(e)}") st.exception(e) else: st.info("Upload a PDF document to begin analysis") # Display results if st.session_state.analysis_result is not None: display_analysis_result(st.session_state.analysis_result, selected_criteria_name) if __name__ == "__main__": main()