cryogenic22's picture
Update app.py
30e7c2f verified
# app.py
import os
import sqlite3
import tempfile
import traceback
from pathlib import Path
from datetime import datetime
import streamlit as st
from langchain_community.document_loaders import PyPDFLoader # Add this import
from components.chat import display_chat_interface, ensure_embeddings_initialized
from utils.database import (
display_vector_store_info,
handle_document_upload,
create_connection,
create_tables,
insert_document # Make sure this is imported
)
from utils.persistence import PersistenceManager
from utils.document_chunker import DocumentChunker
from backend import get_embeddings_model, initialize_qa_system, initialize_faiss
import time
def initialize_database():
"""Initialize database connection and tables."""
try:
if 'db_conn' not in st.session_state:
data_dir = "data"
if not os.path.exists(data_dir):
os.makedirs(data_dir)
db_path = os.path.join(data_dir, 'rfp_analysis.db')
try:
with open(db_path, 'a') as f:
pass
except IOError as e:
return False
conn = create_connection(db_path)
if conn is not None:
create_tables(conn)
st.session_state.db_conn = conn
return True
else:
return False
else:
return True
except Exception:
return False
def initialize_embeddings():
"""Initialize the embeddings model."""
try:
if 'embeddings' not in st.session_state:
with st.spinner("Initializing embeddings model..."):
embeddings = get_embeddings_model()
if embeddings is not None:
st.session_state.embeddings = embeddings
return True
else:
st.error("Failed to initialize embeddings model.")
return False
return True
except Exception as e:
st.error(f"Error initializing embeddings: {str(e)}")
return False
def display_header():
"""Display application header with logo."""
header_col1, header_col2 = st.columns([1, 4])
with header_col1:
if os.path.exists("img/logo.png"):
st.image("img/logo.png", width=100)
else:
st.error("Logo not found at img/logo.png")
with header_col2:
st.title("Synaptyx.AI - RFP Analysis Agent")
def display_example_questions():
"""Return a list of example questions for RFP analysis."""
return [
"📊 Summarize the main points of the document",
"📝 Draft a 'Why Us' section based on the document",
"🎯 Extract key success metrics and outcomes",
"💡 What are the innovative solutions mentioned?",
"🤝 Analyze the partnership benefits described",
"📈 What are the key performance requirements?",
"💰 Extract budget and pricing information",
"📅 What are the important deadlines and milestones?",
"⚡ Identify the technical requirements",
"🔍 What are the evaluation criteria?"
]
def handle_document_upload(uploaded_files, persistence):
"""Handle document upload and processing."""
try:
# Initialize progress indicators
progress = st.progress(0)
status = st.empty()
# Initialize document chunker
chunker = DocumentChunker(
chunk_size=1000,
chunk_overlap=200,
max_tokens_per_chunk=2000
)
# Process each document
progress_increment = 100 / len(uploaded_files)
current_progress = 0
document_pairs = []
for idx, file in enumerate(uploaded_files):
status.text(f"Processing document {idx + 1}/{len(uploaded_files)}: {file.name}")
# Create temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
tmp_file.write(file.getvalue())
tmp_file.flush()
# Load and process document
loader = PyPDFLoader(tmp_file.name)
pages = loader.load()
content = "\n".join(page.page_content for page in pages)
# Store in database
doc_id = insert_document(st.session_state.db_conn, file.name, content)
if not doc_id:
raise Exception(f"Failed to store document: {file.name}")
document_pairs.append((content, file.name))
# Clean up temp file
os.unlink(tmp_file.name)
current_progress += progress_increment
progress.progress(int(current_progress))
# Process documents with chunker
status.text("Chunking documents...")
chunks, chunk_metadatas = chunker.process_documents(document_pairs)
# Generate session ID
session_id = f"session_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
# Save chunks
persistence.save_chunks(chunks, chunk_metadatas, session_id)
# Initialize vector store
status.text("Creating vector embeddings...")
vector_store = initialize_faiss(st.session_state.embeddings, chunks, chunk_metadatas)
if not vector_store:
raise Exception("Failed to initialize vector store")
# Save vector store
persistence.save_vector_store(vector_store, session_id)
# Initialize QA system
status.text("Setting up QA system...")
qa_system = initialize_qa_system(vector_store)
if not qa_system:
raise Exception("Failed to initialize QA system")
# Update session state
st.session_state.vector_store = vector_store
st.session_state.qa_system = qa_system
st.session_state.current_session_id = session_id
progress.progress(100)
status.empty()
return True
except Exception as e:
st.error(f"Error processing documents: {str(e)}")
return False
def main():
# Set up the page configuration
st.set_page_config(layout="wide", page_title="SYNAPTYX - RFP Analysis Agent")
# Custom CSS for logo positioning
st.markdown("""
<style>
[data-testid="stSidebarNav"] {
background-image: url('img/logo.png');
background-repeat: no-repeat;
background-position: 20px 20px;
background-size: 150px auto;
padding-top: 120px;
}
</style>
""", unsafe_allow_html=True)
# Initialize database and embeddings
if not initialize_database():
st.error("Failed to initialize database. Please contact support.")
return
if not initialize_embeddings():
st.error("Failed to initialize embeddings model. Please try refreshing the page.")
return
# Initialize session state for UI control
if 'chat_ready' not in st.session_state:
st.session_state.chat_ready = False
# Sidebar for document management
with st.sidebar:
st.title("📚 Document Manager")
# Upload Section
st.header("Upload Documents", anchor=False)
uploaded_files = st.file_uploader(
"Upload PDF documents",
type=['pdf'],
accept_multiple_files=True,
help="Limit 200MB per file • PDF"
)
# Process uploads
if uploaded_files:
if 'processed_files' not in st.session_state or uploaded_files != st.session_state.processed_files:
try:
with st.spinner("Processing documents..."):
# Initialize components first
if 'persistence' not in st.session_state:
st.session_state.persistence = PersistenceManager()
# Process documents
success = handle_document_upload(
uploaded_files=uploaded_files,
persistence=st.session_state.persistence # Pass persistence manager as parameter
)
if success:
st.session_state.processed_files = uploaded_files
st.session_state.chat_ready = True
st.success("Documents processed successfully!")
time.sleep(1)
st.rerun()
else:
st.error("Failed to process documents. Please try again.")
except Exception as e:
st.error(f"Error during document processing: {str(e)}")
st.error(traceback.format_exc())
# Knowledge Base Status
if st.session_state.get('vector_store'):
st.success("✅ Documents ready for analysis")
display_vector_store_info()
# Document List
if uploaded_files:
st.subheader("📑 Uploaded Documents")
for doc in uploaded_files:
st.write(f"• {doc.name}")
# Main chat area
if not st.session_state.chat_ready:
# Welcome screen
st.title("🤖 SYNAPTYX - RFP Analysis Agent")
st.markdown("### Welcome to your AI-powered RFP analysis assistant!")
col1, col2 = st.columns(2)
with col1:
st.markdown("""
#### Getting Started:
1. Upload your RFP documents using the sidebar
2. Wait for the processing to complete
3. Start chatting with your documents!
""")
with col2:
st.markdown("#### Example Questions You Can Ask:")
examples = display_example_questions()
for example in examples:
st.markdown(f"{example}")
else:
# Clean chat interface
display_chat_interface()
if __name__ == "__main__":
main()