import streamlit as st import os import shutil from rag_pipeline import load_retriever, build_rag_chain from ingest import load_documents, split_documents, create_vector_store, DATA_DIR # Page config st.set_page_config(page_title="RAG Chatbot", page_icon="🤖", layout="wide") # Initialize session state for chat history and chain if "messages" not in st.session_state: st.session_state.messages = [] if "chain" not in st.session_state: st.session_state.chain = None try: # Only load retriever if the database actually exists to prevent SQLite locking if os.path.exists("chroma_db") and len(os.listdir("chroma_db")) > 0: retriever = load_retriever() st.session_state.chain = build_rag_chain(retriever) except Exception as e: pass # Sidebar for document upload with st.sidebar: st.title("📁 Upload PDFs") st.markdown("Upload your PDFs") uploaded_files = st.file_uploader( "Choose PDF files", type="pdf", accept_multiple_files=True, label_visibility="collapsed" ) if st.button("Process Documents"): if uploaded_files: with st.spinner("Processing documents..."): try: if not os.path.exists(DATA_DIR): os.makedirs(DATA_DIR) for file in uploaded_files: # Clean filename safe_filename = "".join(c for c in file.name if c.isalnum() or c in " ._-") if not safe_filename.lower().endswith(".pdf"): safe_filename += ".pdf" # Save file file_path = os.path.join(DATA_DIR, safe_filename) with open(file_path, "wb") as f: f.write(file.getbuffer()) # Ingest docs = load_documents(DATA_DIR) chunks = split_documents(docs) create_vector_store(chunks) # Reinitialize chain retriever = load_retriever() st.session_state.chain = build_rag_chain(retriever) st.success("Documents processed successfully!") except Exception as e: import traceback st.error(f"Error during ingestion: {str(e)}") st.code(traceback.format_exc()) else: st.warning("Please upload at least one PDF.") st.markdown("---") st.title("🗑️ Manage Data") if st.button("Clear All Documents"): with st.spinner("Clearing database and files..."): try: if os.path.exists(DATA_DIR): shutil.rmtree(DATA_DIR) if os.path.exists("chroma_db"): shutil.rmtree("chroma_db") # Reset session state st.session_state.chain = None st.session_state.messages = [] st.success("All documents and chat history cleared!") st.rerun() except Exception as e: st.error(f"Error clearing data: {str(e)}") # Main chat interface # Display chat history for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) # Chat input if prompt := st.chat_input("Ask something..."): # Display user message with st.chat_message("user"): st.markdown(prompt) # Add to history st.session_state.messages.append({"role": "user", "content": prompt}) # Generate response with st.chat_message("assistant"): if st.session_state.chain is None: # Try to initialize one last time try: retriever = load_retriever() st.session_state.chain = build_rag_chain(retriever) except Exception: pass if st.session_state.chain is None: response = "Pipeline is not initialized. Please upload a document first." st.markdown(response) else: with st.spinner("Thinking..."): try: response = st.session_state.chain.invoke(prompt) st.markdown(response) except Exception as e: response = f"Error: {str(e)}" st.error(response) # Add to history st.session_state.messages.append({"role": "assistant", "content": response})