Spaces:

Dinesh310
/

demo2

Sleeping

App Files Files Community

Dinesh310 commited on 28 days ago

Commit

d143793

verified ·

1 Parent(s): 028a330

Update streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +113 -196

streamlit_app.py CHANGED Viewed

@@ -1,238 +1,155 @@
-"""
-Streamlit UI for Agentic RAG System
-- Default URL ingestion
-- Sidebar PDF upload
-- Incremental indexing
-- Question answering with sources
-"""
 import streamlit as st
 from pathlib import Path
 import sys
 import time
-import os
-# -------------------------------------------------
-# Path setup
-# -------------------------------------------------
 sys.path.append(str(Path(__file__).parent))
-# -------------------------------------------------
-# Project imports
-# -------------------------------------------------
 from src.config.config import Config
 from src.document_ingestion.document_processor import DocumentProcessor
 from src.vectorstore.vectorstore import VectorStore
 from src.graph_builder.graph_builder import GraphBuilder
-# -------------------------------------------------
 # Page configuration
-# -------------------------------------------------
 st.set_page_config(
-    page_title="🤖 Agentic RAG Search",
-    page_icon="🔍",
-    layout="centered"
 )
-# -------------------------------------------------
-# Simple CSS
-# -------------------------------------------------
-st.markdown(
-    """
     <style>
-    .stButton > button {
-        width: 100%;
-        background-color: #4CAF50;
-        color: white;
-        font-weight: bold;
-    }
     </style>
-    """,
-    unsafe_allow_html=True
-)
-# -------------------------------------------------
-# Session state initialization
-# -------------------------------------------------
 def init_session_state():
-    if "rag_system" not in st.session_state:
         st.session_state.rag_system = None
-    if "initialized" not in st.session_state:
-        st.session_state.initialized = False
-    if "history" not in st.session_state:
-        st.session_state.history = []
-    if "processed_files" not in st.session_state:
         st.session_state.processed_files = []
-# -------------------------------------------------
-# RAG system initialization (cached)
-# -------------------------------------------------
-@st.cache_resource
-def initialize_rag():
-    """
-    Initializes RAG using default URLs.
-    This runs ONLY once due to caching.
-    """
     try:
-        llm = Config.get_llm()
         doc_processor = DocumentProcessor(
             chunk_size=Config.CHUNK_SIZE,
             chunk_overlap=Config.CHUNK_OVERLAP
         )
         vector_store = VectorStore()
-        # Load default URLs
-        urls = Config.DEFAULT_URLS
-        documents = doc_processor.process_urls(urls)
-        # Create vector store
-        vector_store.create_vectorstore(documents)
-        # Build agentic graph
         graph_builder = GraphBuilder(
             retriever=vector_store.get_retriever(),
-            llm=llm
         )
         graph_builder.build()
-        return graph_builder, vector_store, doc_processor, len(documents)
     except Exception as e:
-        st.error(f"Initialization failed: {str(e)}")
-        return None, None, None, 0
-# -------------------------------------------------
-# Main app
-# -------------------------------------------------
 def main():
     init_session_state()
-    # -------------------------------
-    # Title
-    # -------------------------------
-    st.title("🔍 Agentic RAG Document Search")
-    # st.markdown("Ask questions over default docs or uploaded PDFs")
-    # -------------------------------
-    # Initialize RAG system
-    # -------------------------------
-    if not st.session_state.initialized:
-        with st.spinner("Loading RAG system..."):
-            rag_system, vector_store, doc_processor, num_chunks = initialize_rag()
-            if rag_system:
-                st.session_state.rag_system = rag_system
-                st.session_state.vector_store = vector_store
-                st.session_state.doc_processor = doc_processor
-                st.session_state.initialized = True
-                st.success(f"✅ System ready! ({num_chunks} chunks indexed)")
-    # -------------------------------------------------
-    # Sidebar: PDF Upload
-    # -------------------------------------------------
-    st.sidebar.header("📄 Upload Project PDFs")
-    uploaded_files = st.sidebar.file_uploader(
-        "Upload PDF documents",
-        type="pdf",
-        accept_multiple_files=True
-    )
-    if uploaded_files:
-        uploaded_names = {f.name for f in uploaded_files}
-        if (
-            not st.session_state.processed_files
-            or set(st.session_state.processed_files) != uploaded_names
-        ):
-            with st.spinner("Analyzing uploaded PDFs..."):
-                temp_dir = "temp"
-                os.makedirs(temp_dir, exist_ok=True)
-                paths = []
-                for f in uploaded_files:
-                    path = os.path.join(temp_dir, f.name)
-                    with open(path, "wb") as out:
-                        out.write(f.getbuffer())
-                    paths.append(path)
-                # Process PDFs
-                documents = st.session_state.doc_processor.process_pdfs(paths)
-                # Add to existing vector store
-                st.session_state.vector_store.add_documents(documents)
-                # Update processed file list
-                st.session_state.processed_files = list(uploaded_names)
-            st.sidebar.success("📚 PDFs indexed successfully!")
-    st.markdown("---")
-    # -------------------------------------------------
-    # Query input
-    # -------------------------------------------------
-    with st.form("search_form"):
-        question = st.text_input(
-            "Enter your question:",
-            placeholder="Ask something about the documents..."
         )
-        submit = st.form_submit_button("🔍 Search")
-    # -------------------------------------------------
-    # Query processing
-    # -------------------------------------------------
-    if submit and question:
-        if st.session_state.rag_system:
-            with st.spinner("Searching..."):
-                start_time = time.time()
-                result = st.session_state.rag_system.run(question)
-                elapsed_time = time.time() - start_time
-                # Save history
-                st.session_state.history.append(
-                    {
-                        "question": question,
-                        "answer": result["answer"],
-                        "time": elapsed_time,
-                    }
-                )
-                # Display answer
-                st.markdown("### 💡 Answer")
-                st.success(result["answer"])
-                # Show retrieved documents
-                with st.expander("📄 Source Documents"):
-                    for i, doc in enumerate(result["retrieved_docs"], 1):
-                        st.text_area(
-                            f"Document {i}",
-                            doc.page_content[:300] + "...",
-                            height=100,
-                            disabled=True,
-                        )
-                st.caption(f"⏱️ Response time: {elapsed_time:.2f} seconds")
-    # -------------------------------------------------
-    # Search history
-    # -------------------------------------------------
     if st.session_state.history:
         st.markdown("---")
-        st.markdown("### 📜 Recent Searches")
-        for item in reversed(st.session_state.history[-3:]):
-            st.markdown(f"**Q:** {item['question']}")
-            st.markdown(f"**A:** {item['answer'][:200]}...")
-            st.caption(f"Time: {item['time']:.2f}s")
-# -------------------------------------------------
-# Entry point
-# -------------------------------------------------
 if __name__ == "__main__":
-    main()

 import streamlit as st
 from pathlib import Path
 import sys
 import time
+# Add src to path
 sys.path.append(str(Path(__file__).parent))
 from src.config.config import Config
 from src.document_ingestion.document_processor import DocumentProcessor
 from src.vectorstore.vectorstore import VectorStore
 from src.graph_builder.graph_builder import GraphBuilder
 # Page configuration
 st.set_page_config(
+    page_title="🤖 PDF Agentic RAG",
+    page_icon="📄",
+    layout="wide"
 )
+# Custom CSS for a cleaner look
+st.markdown("""
     <style>
+    .stAlert { margin-top: 1rem; }
+    .stButton > button { width: 100%; border-radius: 5px; height: 3em; }
     </style>
+""", unsafe_allow_html=True)
 def init_session_state():
+    """Initialize session state variables"""
+    if 'rag_system' not in st.session_state:
         st.session_state.rag_system = None
+    if 'processed_files' not in st.session_state:
         st.session_state.processed_files = []
+    if 'history' not in st.session_state:
+        st.session_state.history = []
+def process_new_documents(uploaded_files):
+    """Processes uploaded PDFs and initializes/updates the RAG system"""
     try:
         doc_processor = DocumentProcessor(
             chunk_size=Config.CHUNK_SIZE,
             chunk_overlap=Config.CHUNK_OVERLAP
         )
+        # 1. Process PDFs into chunks
+        # Assuming your DocumentProcessor has a method for uploaded files or local paths
+        # If not, you may need to save them to a temp directory first
+        all_docs = []
+        for uploaded_file in uploaded_files:
+            # Save temp file
+            temp_path = Path(f"temp_{uploaded_file.name}")
+            with open(temp_path, "wb") as f:
+                f.write(uploaded_file.getvalue())
+            # Process (Update this call based on your DocumentProcessor's actual method)
+            docs = doc_processor.process_pdf(str(temp_path))
+            all_docs.extend(docs)
+            # Cleanup temp file
+            temp_path.unlink()
+        # 2. Initialize components
         vector_store = VectorStore()
+        vector_store.create_vectorstore(all_docs)
+        # 3. Build Graph
         graph_builder = GraphBuilder(
             retriever=vector_store.get_retriever(),
+            llm=Config.get_llm()
         )
         graph_builder.build()
+        return graph_builder, len(all_docs)
     except Exception as e:
+        st.error(f"Error processing documents: {str(e)}")
+        return None, 0
 def main():
     init_session_state()
+    # --- Sidebar: Document Upload ---
+    with st.sidebar:
+        st.title("📁 Document Management")
+        uploaded_files = st.file_uploader(
+            "Upload PDF documents",
+            type="pdf",
+            accept_multiple_files=True
         )
+        process_btn = st.button("🚀 Process Documents")
+        if process_btn and uploaded_files:
+            with st.spinner("Analyzing PDFs and building index..."):
+                rag_system, num_chunks = process_new_documents(uploaded_files)
+                if rag_system:
+                    st.session_state.rag_system = rag_system
+                    st.session_state.processed_files = [f.name for f in uploaded_files]
+                    st.success(f"Indexed {len(uploaded_files)} files ({num_chunks} chunks)")
+        if st.session_state.processed_files:
+            st.markdown("---")
+            st.markdown("**Currently Loaded:**")
+            for f in st.session_state.processed_files:
+                st.caption(f"✅ {f}")
+    # --- Main UI: Search ---
+    st.title("🔍 Agentic RAG Search")
+    if not st.session_state.rag_system:
+        st.info("👈 Please upload and process PDF documents in the sidebar to start searching.")
+        return
+    # Search interface
+    with st.container():
+        question = st.text_input("Ask a question about your documents:")
+        search_cols = st.columns([1, 4])
+        submit = search_cols[0].button("Search")
+    if (submit or question) and question:
+        with st.spinner("Agent is thinking..."):
+            start_time = time.time()
+            # Execute RAG pipeline
+            result = st.session_state.rag_system.run(question)
+            elapsed_time = time.time() - start_time
+            # Update History
+            st.session_state.history.append({
+                'question': question,
+                'answer': result['answer'],
+                'time': elapsed_time
+            })
+            # Display results
+            st.markdown("### 💡 Answer")
+            st.write(result['answer'])
+            with st.expander("📄 View Source Context"):
+                for i, doc in enumerate(result.get('retrieved_docs', []), 1):
+                    st.markdown(f"**Source {i}:**")
+                    st.info(doc.page_content)
+    # --- History Section ---
     if st.session_state.history:
         st.markdown("---")
+        st.subheader("📜 Search History")
+        for item in reversed(st.session_state.history):
+            with st.expander(f"Q: {item['question']}"):
+                st.write(item['answer'])
+                st.caption(f"Response time: {item['time']:.2f}s")
 if __name__ == "__main__":
+    main()