RFP_Analyzer_Agent

Paused

File size: 10,372 Bytes

# app.py
import os
import sqlite3
import tempfile
import traceback
from pathlib import Path
from datetime import datetime
import streamlit as st
from langchain_community.document_loaders import PyPDFLoader  # Add this import
from components.chat import display_chat_interface, ensure_embeddings_initialized
from utils.database import (
    display_vector_store_info, 
    handle_document_upload,
    create_connection,
    create_tables,
    insert_document  # Make sure this is imported
)
from utils.persistence import PersistenceManager
from utils.document_chunker import DocumentChunker
from backend import get_embeddings_model, initialize_qa_system, initialize_faiss
import time

def initialize_database():
    """Initialize database connection and tables."""
    try:
        if 'db_conn' not in st.session_state:
            data_dir = "data"
            if not os.path.exists(data_dir):
                os.makedirs(data_dir)
            
            db_path = os.path.join(data_dir, 'rfp_analysis.db')
            
            try:
                with open(db_path, 'a') as f:
                    pass
            except IOError as e:
                return False
            
            conn = create_connection(db_path)
            
            if conn is not None:
                create_tables(conn)
                st.session_state.db_conn = conn
                return True
            else:
                return False
        else:
            return True
            
    except Exception:
        return False

def initialize_embeddings():
    """Initialize the embeddings model."""
    try:
        if 'embeddings' not in st.session_state:
            with st.spinner("Initializing embeddings model..."):
                embeddings = get_embeddings_model()
                if embeddings is not None:
                    st.session_state.embeddings = embeddings
                    return True
                else:
                    st.error("Failed to initialize embeddings model.")
                    return False
        return True
    except Exception as e:
        st.error(f"Error initializing embeddings: {str(e)}")
        return False

def display_header():
    """Display application header with logo."""
    header_col1, header_col2 = st.columns([1, 4])
    
    with header_col1:
        if os.path.exists("img/logo.png"):
            st.image("img/logo.png", width=100)
        else:
            st.error("Logo not found at img/logo.png")
    
    with header_col2:
        st.title("Synaptyx.AI - RFP Analysis Agent")
        
def display_example_questions():
    """Return a list of example questions for RFP analysis."""
    return [
        "📊 Summarize the main points of the document",
        "📝 Draft a 'Why Us' section based on the document",
        "🎯 Extract key success metrics and outcomes",
        "💡 What are the innovative solutions mentioned?",
        "🤝 Analyze the partnership benefits described",
        "📈 What are the key performance requirements?",
        "💰 Extract budget and pricing information",
        "📅 What are the important deadlines and milestones?",
        "⚡ Identify the technical requirements",
        "🔍 What are the evaluation criteria?"
    ]
def handle_document_upload(uploaded_files, persistence):
    """Handle document upload and processing."""
    try:
        # Initialize progress indicators
        progress = st.progress(0)
        status = st.empty()
        
        # Initialize document chunker
        chunker = DocumentChunker(
            chunk_size=1000,
            chunk_overlap=200,
            max_tokens_per_chunk=2000
        )
        
        # Process each document
        progress_increment = 100 / len(uploaded_files)
        current_progress = 0
        
        document_pairs = []
        for idx, file in enumerate(uploaded_files):
            status.text(f"Processing document {idx + 1}/{len(uploaded_files)}: {file.name}")
            
            # Create temporary file
            with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
                tmp_file.write(file.getvalue())
                tmp_file.flush()
                
                # Load and process document
                loader = PyPDFLoader(tmp_file.name)
                pages = loader.load()
                content = "\n".join(page.page_content for page in pages)
                
                # Store in database
                doc_id = insert_document(st.session_state.db_conn, file.name, content)
                if not doc_id:
                    raise Exception(f"Failed to store document: {file.name}")
                
                document_pairs.append((content, file.name))
                
                # Clean up temp file
                os.unlink(tmp_file.name)
            
            current_progress += progress_increment
            progress.progress(int(current_progress))
        
        # Process documents with chunker
        status.text("Chunking documents...")
        chunks, chunk_metadatas = chunker.process_documents(document_pairs)
        
        # Generate session ID
        session_id = f"session_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
        
        # Save chunks
        persistence.save_chunks(chunks, chunk_metadatas, session_id)
        
        # Initialize vector store
        status.text("Creating vector embeddings...")
        vector_store = initialize_faiss(st.session_state.embeddings, chunks, chunk_metadatas)
        if not vector_store:
            raise Exception("Failed to initialize vector store")
        
        # Save vector store
        persistence.save_vector_store(vector_store, session_id)
        
        # Initialize QA system
        status.text("Setting up QA system...")
        qa_system = initialize_qa_system(vector_store)
        if not qa_system:
            raise Exception("Failed to initialize QA system")
        
        # Update session state
        st.session_state.vector_store = vector_store
        st.session_state.qa_system = qa_system
        st.session_state.current_session_id = session_id
        
        progress.progress(100)
        status.empty()
        
        return True
        
    except Exception as e:
        st.error(f"Error processing documents: {str(e)}")
        return False
        
def main():
    # Set up the page configuration
    st.set_page_config(layout="wide", page_title="SYNAPTYX - RFP Analysis Agent")

    # Custom CSS for logo positioning
    st.markdown("""
        <style>
        [data-testid="stSidebarNav"] {
            background-image: url('img/logo.png');
            background-repeat: no-repeat;
            background-position: 20px 20px;
            background-size: 150px auto;
            padding-top: 120px;
        }
        </style>
        """, unsafe_allow_html=True)

    # Initialize database and embeddings
    if not initialize_database():
        st.error("Failed to initialize database. Please contact support.")
        return
    
    if not initialize_embeddings():
        st.error("Failed to initialize embeddings model. Please try refreshing the page.")
        return

    # Initialize session state for UI control
    if 'chat_ready' not in st.session_state:
        st.session_state.chat_ready = False

    # Sidebar for document management
    with st.sidebar:
        st.title("📚 Document Manager")
        
        # Upload Section
        st.header("Upload Documents", anchor=False)
        uploaded_files = st.file_uploader(
            "Upload PDF documents",
            type=['pdf'],
            accept_multiple_files=True,
            help="Limit 200MB per file • PDF"
        )

        # Process uploads
        if uploaded_files:
            if 'processed_files' not in st.session_state or uploaded_files != st.session_state.processed_files:
                try:
                    with st.spinner("Processing documents..."):
                        # Initialize components first
                        if 'persistence' not in st.session_state:
                            st.session_state.persistence = PersistenceManager()
                        
                        # Process documents
                        success = handle_document_upload(
                            uploaded_files=uploaded_files,
                            persistence=st.session_state.persistence  # Pass persistence manager as parameter
                        )
                        
                        if success:
                            st.session_state.processed_files = uploaded_files
                            st.session_state.chat_ready = True
                            st.success("Documents processed successfully!")
                            time.sleep(1)
                            st.rerun()
                        else:
                            st.error("Failed to process documents. Please try again.")
                except Exception as e:
                    st.error(f"Error during document processing: {str(e)}")
                    st.error(traceback.format_exc())

        # Knowledge Base Status
        if st.session_state.get('vector_store'):
            st.success("✅ Documents ready for analysis")
            display_vector_store_info()
        
        # Document List
        if uploaded_files:
            st.subheader("📑 Uploaded Documents")
            for doc in uploaded_files:
                st.write(f"• {doc.name}")

    # Main chat area
    if not st.session_state.chat_ready:
        # Welcome screen
        st.title("🤖 SYNAPTYX - RFP Analysis Agent")
        st.markdown("### Welcome to your AI-powered RFP analysis assistant!")
        
        col1, col2 = st.columns(2)
        with col1:
            st.markdown("""
            #### Getting Started:
            1. Upload your RFP documents using the sidebar
            2. Wait for the processing to complete
            3. Start chatting with your documents!
            """)
        
        with col2:
            st.markdown("#### Example Questions You Can Ask:")
            examples = display_example_questions()
            for example in examples:
                st.markdown(f"{example}")
    else:
        # Clean chat interface
        display_chat_interface()

if __name__ == "__main__":
    main()