Spaces:

Dinesh310
/

demo2

Sleeping

App Files Files Community

Dinesh310 commited on about 1 month ago

Commit

c00effc

verified ·

1 Parent(s): a9cc27c

Update streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +238 -151

streamlit_app.py CHANGED Viewed

@@ -1,151 +1,238 @@
-"""Streamlit UI for Agentic RAG System - Simplified Version"""
-import streamlit as st
-from pathlib import Path
-import sys
-import time
-# Add src to path
-sys.path.append(str(Path(__file__).parent))
-from src.config.config import Config
-from src.document_ingestion.document_processor import DocumentProcessor
-from src.vectorstore.vectorstore import VectorStore
-from src.graph_builder.graph_builder import GraphBuilder
-# Page configuration
-st.set_page_config(
-    page_title="🤖 RAG Search",
-    page_icon="🔍",
-    layout="centered"
-)
-# Simple CSS
-st.markdown("""
-    <style>
-    .stButton > button {
-        width: 100%;
-        background-color: #4CAF50;
-        color: white;
-        font-weight: bold;
-    }
-    </style>
-""", unsafe_allow_html=True)
-def init_session_state():
-    """Initialize session state variables"""
-    if 'rag_system' not in st.session_state:
-        st.session_state.rag_system = None
-    if 'initialized' not in st.session_state:
-        st.session_state.initialized = False
-    if 'history' not in st.session_state:
-        st.session_state.history = []
-@st.cache_resource
-def initialize_rag():
-    """Initialize the RAG system (cached)"""
-    try:
-        # Initialize components
-        llm = Config.get_llm()
-        doc_processor = DocumentProcessor(
-            chunk_size=Config.CHUNK_SIZE,
-            chunk_overlap=Config.CHUNK_OVERLAP
-        )
-        vector_store = VectorStore()
-        # Use default URLs
-        urls = Config.DEFAULT_URLS
-        # Process documents
-        documents = doc_processor.process_urls(urls)
-        # Create vector store
-        vector_store.create_vectorstore(documents)
-        # Build graph
-        graph_builder = GraphBuilder(
-            retriever=vector_store.get_retriever(),
-            llm=llm
-        )
-        graph_builder.build()
-        return graph_builder, len(documents)
-    except Exception as e:
-        st.error(f"Failed to initialize: {str(e)}")
-        return None, 0
-def main():
-    """Main application"""
-    init_session_state()
-    # Title
-    st.title("🔍 RAG Document Search")
-    st.markdown("Ask questions about the loaded documents")
-    # Initialize system
-    if not st.session_state.initialized:
-        with st.spinner("Loading system..."):
-            rag_system, num_chunks = initialize_rag()
-            if rag_system:
-                st.session_state.rag_system = rag_system
-                st.session_state.initialized = True
-                st.success(f"✅ System ready! ({num_chunks} document chunks loaded)")
-    st.markdown("---")
-    # Search interface
-    with st.form("search_form"):
-        question = st.text_input(
-            "Enter your question:",
-            placeholder="What would you like to know?"
-        )
-        submit = st.form_submit_button("🔍 Search")
-    # Process search
-    if submit and question:
-        if st.session_state.rag_system:
-            with st.spinner("Searching..."):
-                start_time = time.time()
-                # Get answer
-                result = st.session_state.rag_system.run(question)
-                elapsed_time = time.time() - start_time
-                # Add to history
-                st.session_state.history.append({
-                    'question': question,
-                    'answer': result['answer'],
-                    'time': elapsed_time
-                })
-                # Display answer
-                st.markdown("### 💡 Answer")
-                st.success(result['answer'])
-                # Show retrieved docs in expander
-                with st.expander("📄 Source Documents"):
-                    for i, doc in enumerate(result['retrieved_docs'], 1):
-                        st.text_area(
-                            f"Document {i}",
-                            doc.page_content[:300] + "...",
-                            height=100,
-                            disabled=True
-                        )
-                st.caption(f"⏱️ Response time: {elapsed_time:.2f} seconds")
-    # Show history
-    if st.session_state.history:
-        st.markdown("---")
-        st.markdown("### 📜 Recent Searches")
-        for item in reversed(st.session_state.history[-3:]):  # Show last 3
-            with st.container():
-                st.markdown(f"**Q:** {item['question']}")
-                st.markdown(f"**A:** {item['answer'][:200]}...")
-                st.caption(f"Time: {item['time']:.2f}s")
-                st.markdown("")
-if __name__ == "__main__":
-    main()

+"""
+Streamlit UI for Agentic RAG System
+- Default URL ingestion
+- Sidebar PDF upload
+- Incremental indexing
+- Question answering with sources
+"""
+import streamlit as st
+from pathlib import Path
+import sys
+import time
+import os
+# -------------------------------------------------
+# Path setup
+# -------------------------------------------------
+sys.path.append(str(Path(__file__).parent))
+# -------------------------------------------------
+# Project imports
+# -------------------------------------------------
+from src.config.config import Config
+from src.document_ingestion.document_processor import DocumentProcessor
+from src.vectorstore.vectorstore import VectorStore
+from src.graph_builder.graph_builder import GraphBuilder
+# -------------------------------------------------
+# Page configuration
+# -------------------------------------------------
+st.set_page_config(
+    page_title="🤖 Agentic RAG Search",
+    page_icon="🔍",
+    layout="centered"
+)
+# -------------------------------------------------
+# Simple CSS
+# -------------------------------------------------
+st.markdown(
+    """
+    <style>
+    .stButton > button {
+        width: 100%;
+        background-color: #4CAF50;
+        color: white;
+        font-weight: bold;
+    }
+    </style>
+    """,
+    unsafe_allow_html=True
+)
+# -------------------------------------------------
+# Session state initialization
+# -------------------------------------------------
+def init_session_state():
+    if "rag_system" not in st.session_state:
+        st.session_state.rag_system = None
+    if "initialized" not in st.session_state:
+        st.session_state.initialized = False
+    if "history" not in st.session_state:
+        st.session_state.history = []
+    if "processed_files" not in st.session_state:
+        st.session_state.processed_files = []
+# -------------------------------------------------
+# RAG system initialization (cached)
+# -------------------------------------------------
+@st.cache_resource
+def initialize_rag():
+    """
+    Initializes RAG using default URLs.
+    This runs ONLY once due to caching.
+    """
+    try:
+        llm = Config.get_llm()
+        doc_processor = DocumentProcessor(
+            chunk_size=Config.CHUNK_SIZE,
+            chunk_overlap=Config.CHUNK_OVERLAP
+        )
+        vector_store = VectorStore()
+        # Load default URLs
+        urls = Config.DEFAULT_URLS
+        documents = doc_processor.process_urls(urls)
+        # Create vector store
+        vector_store.create_vectorstore(documents)
+        # Build agentic graph
+        graph_builder = GraphBuilder(
+            retriever=vector_store.get_retriever(),
+            llm=llm
+        )
+        graph_builder.build()
+        return graph_builder, vector_store, doc_processor, len(documents)
+    except Exception as e:
+        st.error(f"Initialization failed: {str(e)}")
+        return None, None, None, 0
+# -------------------------------------------------
+# Main app
+# -------------------------------------------------
+def main():
+    init_session_state()
+    # -------------------------------
+    # Title
+    # -------------------------------
+    st.title("🔍 Agentic RAG Document Search")
+    st.markdown("Ask questions over default docs or uploaded PDFs")
+    # -------------------------------
+    # Initialize RAG system
+    # -------------------------------
+    if not st.session_state.initialized:
+        with st.spinner("Loading RAG system..."):
+            rag_system, vector_store, doc_processor, num_chunks = initialize_rag()
+            if rag_system:
+                st.session_state.rag_system = rag_system
+                st.session_state.vector_store = vector_store
+                st.session_state.doc_processor = doc_processor
+                st.session_state.initialized = True
+                st.success(f"✅ System ready! ({num_chunks} chunks indexed)")
+    # -------------------------------------------------
+    # Sidebar: PDF Upload
+    # -------------------------------------------------
+    st.sidebar.header("📄 Upload Project PDFs")
+    uploaded_files = st.sidebar.file_uploader(
+        "Upload PDF documents",
+        type="pdf",
+        accept_multiple_files=True
+    )
+    if uploaded_files:
+        uploaded_names = {f.name for f in uploaded_files}
+        if (
+            not st.session_state.processed_files
+            or set(st.session_state.processed_files) != uploaded_names
+        ):
+            with st.spinner("Analyzing uploaded PDFs..."):
+                temp_dir = "temp"
+                os.makedirs(temp_dir, exist_ok=True)
+                paths = []
+                for f in uploaded_files:
+                    path = os.path.join(temp_dir, f.name)
+                    with open(path, "wb") as out:
+                        out.write(f.getbuffer())
+                    paths.append(path)
+                # Process PDFs
+                documents = st.session_state.doc_processor.process_pdfs(paths)
+                # Add to existing vector store
+                st.session_state.vector_store.add_documents(documents)
+                # Update processed file list
+                st.session_state.processed_files = list(uploaded_names)
+            st.sidebar.success("📚 PDFs indexed successfully!")
+    st.markdown("---")
+    # -------------------------------------------------
+    # Query input
+    # -------------------------------------------------
+    with st.form("search_form"):
+        question = st.text_input(
+            "Enter your question:",
+            placeholder="Ask something about the documents..."
+        )
+        submit = st.form_submit_button("🔍 Search")
+    # -------------------------------------------------
+    # Query processing
+    # -------------------------------------------------
+    if submit and question:
+        if st.session_state.rag_system:
+            with st.spinner("Searching..."):
+                start_time = time.time()
+                result = st.session_state.rag_system.run(question)
+                elapsed_time = time.time() - start_time
+                # Save history
+                st.session_state.history.append(
+                    {
+                        "question": question,
+                        "answer": result["answer"],
+                        "time": elapsed_time,
+                    }
+                )
+                # Display answer
+                st.markdown("### 💡 Answer")
+                st.success(result["answer"])
+                # Show retrieved documents
+                with st.expander("📄 Source Documents"):
+                    for i, doc in enumerate(result["retrieved_docs"], 1):
+                        st.text_area(
+                            f"Document {i}",
+                            doc.page_content[:300] + "...",
+                            height=100,
+                            disabled=True,
+                        )
+                st.caption(f"⏱️ Response time: {elapsed_time:.2f} seconds")
+    # -------------------------------------------------
+    # Search history
+    # -------------------------------------------------
+    if st.session_state.history:
+        st.markdown("---")
+        st.markdown("### 📜 Recent Searches")
+        for item in reversed(st.session_state.history[-3:]):
+            st.markdown(f"**Q:** {item['question']}")
+            st.markdown(f"**A:** {item['answer'][:200]}...")
+            st.caption(f"Time: {item['time']:.2f}s")
+# -------------------------------------------------
+# Entry point
+# -------------------------------------------------
+if __name__ == "__main__":
+    main()