Spaces:
Paused
Paused
File size: 10,372 Bytes
420a617 71ba388 e557000 71ba388 aa01ea5 30e7c2f e557000 c350675 30e7c2f c350675 aa01ea5 e557000 aa01ea5 25c34ad c350675 26bd6f5 71ba388 c350675 71ba388 26bd6f5 71ba388 e3bf196 420a617 26bd6f5 5f090f7 acec4a7 a251126 5f439e5 25c34ad 56b316d cc2b215 420a617 c350675 26bd6f5 c350675 420a617 c350675 25c34ad 420a617 25c34ad 299df73 25c34ad a251126 56b316d 25c34ad cc2b215 25c34ad acec4a7 56b316d cc2b215 25c34ad c3ceb0f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 |
# app.py
import os
import sqlite3
import tempfile
import traceback
from pathlib import Path
from datetime import datetime
import streamlit as st
from langchain_community.document_loaders import PyPDFLoader # Add this import
from components.chat import display_chat_interface, ensure_embeddings_initialized
from utils.database import (
display_vector_store_info,
handle_document_upload,
create_connection,
create_tables,
insert_document # Make sure this is imported
)
from utils.persistence import PersistenceManager
from utils.document_chunker import DocumentChunker
from backend import get_embeddings_model, initialize_qa_system, initialize_faiss
import time
def initialize_database():
"""Initialize database connection and tables."""
try:
if 'db_conn' not in st.session_state:
data_dir = "data"
if not os.path.exists(data_dir):
os.makedirs(data_dir)
db_path = os.path.join(data_dir, 'rfp_analysis.db')
try:
with open(db_path, 'a') as f:
pass
except IOError as e:
return False
conn = create_connection(db_path)
if conn is not None:
create_tables(conn)
st.session_state.db_conn = conn
return True
else:
return False
else:
return True
except Exception:
return False
def initialize_embeddings():
"""Initialize the embeddings model."""
try:
if 'embeddings' not in st.session_state:
with st.spinner("Initializing embeddings model..."):
embeddings = get_embeddings_model()
if embeddings is not None:
st.session_state.embeddings = embeddings
return True
else:
st.error("Failed to initialize embeddings model.")
return False
return True
except Exception as e:
st.error(f"Error initializing embeddings: {str(e)}")
return False
def display_header():
"""Display application header with logo."""
header_col1, header_col2 = st.columns([1, 4])
with header_col1:
if os.path.exists("img/logo.png"):
st.image("img/logo.png", width=100)
else:
st.error("Logo not found at img/logo.png")
with header_col2:
st.title("Synaptyx.AI - RFP Analysis Agent")
def display_example_questions():
"""Return a list of example questions for RFP analysis."""
return [
"π Summarize the main points of the document",
"π Draft a 'Why Us' section based on the document",
"π― Extract key success metrics and outcomes",
"π‘ What are the innovative solutions mentioned?",
"π€ Analyze the partnership benefits described",
"π What are the key performance requirements?",
"π° Extract budget and pricing information",
"π
What are the important deadlines and milestones?",
"β‘ Identify the technical requirements",
"π What are the evaluation criteria?"
]
def handle_document_upload(uploaded_files, persistence):
"""Handle document upload and processing."""
try:
# Initialize progress indicators
progress = st.progress(0)
status = st.empty()
# Initialize document chunker
chunker = DocumentChunker(
chunk_size=1000,
chunk_overlap=200,
max_tokens_per_chunk=2000
)
# Process each document
progress_increment = 100 / len(uploaded_files)
current_progress = 0
document_pairs = []
for idx, file in enumerate(uploaded_files):
status.text(f"Processing document {idx + 1}/{len(uploaded_files)}: {file.name}")
# Create temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
tmp_file.write(file.getvalue())
tmp_file.flush()
# Load and process document
loader = PyPDFLoader(tmp_file.name)
pages = loader.load()
content = "\n".join(page.page_content for page in pages)
# Store in database
doc_id = insert_document(st.session_state.db_conn, file.name, content)
if not doc_id:
raise Exception(f"Failed to store document: {file.name}")
document_pairs.append((content, file.name))
# Clean up temp file
os.unlink(tmp_file.name)
current_progress += progress_increment
progress.progress(int(current_progress))
# Process documents with chunker
status.text("Chunking documents...")
chunks, chunk_metadatas = chunker.process_documents(document_pairs)
# Generate session ID
session_id = f"session_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
# Save chunks
persistence.save_chunks(chunks, chunk_metadatas, session_id)
# Initialize vector store
status.text("Creating vector embeddings...")
vector_store = initialize_faiss(st.session_state.embeddings, chunks, chunk_metadatas)
if not vector_store:
raise Exception("Failed to initialize vector store")
# Save vector store
persistence.save_vector_store(vector_store, session_id)
# Initialize QA system
status.text("Setting up QA system...")
qa_system = initialize_qa_system(vector_store)
if not qa_system:
raise Exception("Failed to initialize QA system")
# Update session state
st.session_state.vector_store = vector_store
st.session_state.qa_system = qa_system
st.session_state.current_session_id = session_id
progress.progress(100)
status.empty()
return True
except Exception as e:
st.error(f"Error processing documents: {str(e)}")
return False
def main():
# Set up the page configuration
st.set_page_config(layout="wide", page_title="SYNAPTYX - RFP Analysis Agent")
# Custom CSS for logo positioning
st.markdown("""
<style>
[data-testid="stSidebarNav"] {
background-image: url('img/logo.png');
background-repeat: no-repeat;
background-position: 20px 20px;
background-size: 150px auto;
padding-top: 120px;
}
</style>
""", unsafe_allow_html=True)
# Initialize database and embeddings
if not initialize_database():
st.error("Failed to initialize database. Please contact support.")
return
if not initialize_embeddings():
st.error("Failed to initialize embeddings model. Please try refreshing the page.")
return
# Initialize session state for UI control
if 'chat_ready' not in st.session_state:
st.session_state.chat_ready = False
# Sidebar for document management
with st.sidebar:
st.title("π Document Manager")
# Upload Section
st.header("Upload Documents", anchor=False)
uploaded_files = st.file_uploader(
"Upload PDF documents",
type=['pdf'],
accept_multiple_files=True,
help="Limit 200MB per file β’ PDF"
)
# Process uploads
if uploaded_files:
if 'processed_files' not in st.session_state or uploaded_files != st.session_state.processed_files:
try:
with st.spinner("Processing documents..."):
# Initialize components first
if 'persistence' not in st.session_state:
st.session_state.persistence = PersistenceManager()
# Process documents
success = handle_document_upload(
uploaded_files=uploaded_files,
persistence=st.session_state.persistence # Pass persistence manager as parameter
)
if success:
st.session_state.processed_files = uploaded_files
st.session_state.chat_ready = True
st.success("Documents processed successfully!")
time.sleep(1)
st.rerun()
else:
st.error("Failed to process documents. Please try again.")
except Exception as e:
st.error(f"Error during document processing: {str(e)}")
st.error(traceback.format_exc())
# Knowledge Base Status
if st.session_state.get('vector_store'):
st.success("β
Documents ready for analysis")
display_vector_store_info()
# Document List
if uploaded_files:
st.subheader("π Uploaded Documents")
for doc in uploaded_files:
st.write(f"β’ {doc.name}")
# Main chat area
if not st.session_state.chat_ready:
# Welcome screen
st.title("π€ SYNAPTYX - RFP Analysis Agent")
st.markdown("### Welcome to your AI-powered RFP analysis assistant!")
col1, col2 = st.columns(2)
with col1:
st.markdown("""
#### Getting Started:
1. Upload your RFP documents using the sidebar
2. Wait for the processing to complete
3. Start chatting with your documents!
""")
with col2:
st.markdown("#### Example Questions You Can Ask:")
examples = display_example_questions()
for example in examples:
st.markdown(f"{example}")
else:
# Clean chat interface
display_chat_interface()
if __name__ == "__main__":
main() |