| """ |
| Streamlit UI for Agentic RAG System |
| - Default URL ingestion |
| - Sidebar PDF upload |
| - Incremental indexing |
| - Question answering with sources |
| """ |
|
|
| import streamlit as st |
| from pathlib import Path |
| import sys |
| import time |
| import os |
|
|
| |
| |
| |
| sys.path.append(str(Path(__file__).parent)) |
|
|
| |
| |
| |
| from src.config.config import Config |
| from src.document_ingestion.document_processor import DocumentProcessor |
| from src.vectorstore.vectorstore import VectorStore |
| from src.graph_builder.graph_builder import GraphBuilder |
|
|
| |
| |
| |
| st.set_page_config( |
| page_title="π€ Agentic RAG Search", |
| page_icon="π", |
| layout="centered" |
| ) |
|
|
| |
| |
| |
| st.markdown( |
| """ |
| <style> |
| .stButton > button { |
| width: 100%; |
| background-color: #4CAF50; |
| color: white; |
| font-weight: bold; |
| } |
| </style> |
| """, |
| unsafe_allow_html=True |
| ) |
|
|
| |
| |
| |
| def init_session_state(): |
| if "rag_system" not in st.session_state: |
| st.session_state.rag_system = None |
| if "initialized" not in st.session_state: |
| st.session_state.initialized = False |
| if "history" not in st.session_state: |
| st.session_state.history = [] |
| if "processed_files" not in st.session_state: |
| st.session_state.processed_files = [] |
|
|
| |
| |
| |
| @st.cache_resource |
| def initialize_rag(): |
| """ |
| Initializes RAG using default URLs. |
| This runs ONLY once due to caching. |
| """ |
| try: |
| llm = Config.get_llm() |
|
|
| doc_processor = DocumentProcessor( |
| chunk_size=Config.CHUNK_SIZE, |
| chunk_overlap=Config.CHUNK_OVERLAP |
| ) |
|
|
| vector_store = VectorStore() |
|
|
| |
| urls = Config.DEFAULT_URLS |
| documents = doc_processor.process_urls(urls) |
|
|
| |
| vector_store.create_vectorstore(documents) |
|
|
| |
| graph_builder = GraphBuilder( |
| retriever=vector_store.get_retriever(), |
| llm=llm |
| ) |
| graph_builder.build() |
|
|
| return graph_builder, vector_store, doc_processor, len(documents) |
|
|
| except Exception as e: |
| st.error(f"Initialization failed: {str(e)}") |
| return None, None, None, 0 |
|
|
| |
| |
| |
| def main(): |
| init_session_state() |
|
|
| |
| |
| |
| st.title("π Agentic RAG Document Search") |
| st.markdown("Ask questions over default docs or uploaded PDFs") |
|
|
| |
| |
| |
| if not st.session_state.initialized: |
| with st.spinner("Loading RAG system..."): |
| rag_system, vector_store, doc_processor, num_chunks = initialize_rag() |
|
|
| if rag_system: |
| st.session_state.rag_system = rag_system |
| st.session_state.vector_store = vector_store |
| st.session_state.doc_processor = doc_processor |
| st.session_state.initialized = True |
|
|
| st.success(f"β
System ready! ({num_chunks} chunks indexed)") |
|
|
| |
| |
| |
| st.sidebar.header("π Upload Project PDFs") |
|
|
| uploaded_files = st.sidebar.file_uploader( |
| "Upload PDF documents", |
| type="pdf", |
| accept_multiple_files=True |
| ) |
|
|
| if uploaded_files: |
| uploaded_names = {f.name for f in uploaded_files} |
|
|
| if ( |
| not st.session_state.processed_files |
| or set(st.session_state.processed_files) != uploaded_names |
| ): |
| with st.spinner("Analyzing uploaded PDFs..."): |
| temp_dir = "temp" |
| os.makedirs(temp_dir, exist_ok=True) |
|
|
| paths = [] |
| for f in uploaded_files: |
| path = os.path.join(temp_dir, f.name) |
| with open(path, "wb") as out: |
| out.write(f.getbuffer()) |
| paths.append(path) |
|
|
| |
| documents = st.session_state.doc_processor.process_pdfs(paths) |
|
|
| |
| st.session_state.vector_store.add_documents(documents) |
|
|
| |
| st.session_state.processed_files = list(uploaded_names) |
|
|
| st.sidebar.success("π PDFs indexed successfully!") |
|
|
| st.markdown("---") |
|
|
| |
| |
| |
| with st.form("search_form"): |
| question = st.text_input( |
| "Enter your question:", |
| placeholder="Ask something about the documents..." |
| ) |
| submit = st.form_submit_button("π Search") |
|
|
| |
| |
| |
| if submit and question: |
| if st.session_state.rag_system: |
| with st.spinner("Searching..."): |
| start_time = time.time() |
|
|
| result = st.session_state.rag_system.run(question) |
|
|
| elapsed_time = time.time() - start_time |
|
|
| |
| st.session_state.history.append( |
| { |
| "question": question, |
| "answer": result["answer"], |
| "time": elapsed_time, |
| } |
| ) |
|
|
| |
| st.markdown("### π‘ Answer") |
| st.success(result["answer"]) |
|
|
| |
| with st.expander("π Source Documents"): |
| for i, doc in enumerate(result["retrieved_docs"], 1): |
| st.text_area( |
| f"Document {i}", |
| doc.page_content[:300] + "...", |
| height=100, |
| disabled=True, |
| ) |
|
|
| st.caption(f"β±οΈ Response time: {elapsed_time:.2f} seconds") |
|
|
| |
| |
| |
| if st.session_state.history: |
| st.markdown("---") |
| st.markdown("### π Recent Searches") |
|
|
| for item in reversed(st.session_state.history[-3:]): |
| st.markdown(f"**Q:** {item['question']}") |
| st.markdown(f"**A:** {item['answer'][:200]}...") |
| st.caption(f"Time: {item['time']:.2f}s") |
|
|
| |
| |
| |
| if __name__ == "__main__": |
| main() |
|
|