Spaces:

Dinesh310
/

demo2

Sleeping

App Files Files Community

Dinesh310 commited on Jan 24

Commit

383ee63

verified ·

1 Parent(s): 8586611

Update streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +118 -166

streamlit_app.py CHANGED Viewed

@@ -1,12 +1,11 @@
 import streamlit as st
 from pathlib import Path
 import sys
-import os
-import hashlib
-# -------------------------------------------------
-# Path setup
-# -------------------------------------------------
 sys.path.append(str(Path(__file__).parent))
 from src.config.config import Config
@@ -14,186 +13,139 @@ from src.document_ingestion.document_processor import DocumentProcessor
 from src.vectorstore.vectorstore import VectorStore
 from src.graph_builder.graph_builder import GraphBuilder
-# -------------------------------------------------
-# Page config
-# -------------------------------------------------
 st.set_page_config(
-    page_title="Agentic PDF RAG",
-    page_icon="🧠",
-    layout="wide"
 )
-# -------------------------------------------------
-# Styles
-# -------------------------------------------------
 st.markdown("""
-<style>
-.stChatMessage { border-radius: 10px; margin-bottom: 10px; }
-.stSidebar { background-color: #f8f9fa; }
-</style>
 """, unsafe_allow_html=True)
-# -------------------------------------------------
-# Session state
-# -------------------------------------------------
 def init_session_state():
-    defaults = {
-        "rag_system": None,
-        "messages": [
-            {"role": "assistant", "content": "👋 Upload one or more PDFs from the sidebar and start chatting across them."}
-        ],
-        "processed_files": [],
-        "kb_hash": None
-    }
-    for k, v in defaults.items():
-        if k not in st.session_state:
-            st.session_state[k] = v
-# -------------------------------------------------
-# Helpers
-# -------------------------------------------------
-def compute_files_hash(files):
-    """Prevents rebuilding KB for same PDFs"""
-    hasher = hashlib.md5()
-    for f in files:
-        hasher.update(f.name.encode())
-        hasher.update(f.getvalue())
-    return hasher.hexdigest()
-def process_documents(uploaded_files):
-    """
-    Ingests multiple PDFs into ONE knowledge base
-    """
     try:
-        processor = DocumentProcessor(
             chunk_size=Config.CHUNK_SIZE,
             chunk_overlap=Config.CHUNK_OVERLAP
         )
-        temp_dir = Path("temp_uploads")
-        temp_dir.mkdir(exist_ok=True)
-        all_docs = []
-        for file in uploaded_files:
-            temp_path = temp_dir / file.name
-            with open(temp_path, "wb") as f:
-                f.write(file.getvalue())
-            docs = processor.process_pdf(str(temp_path))
-            # Ensure filename metadata exists
-            for d in docs:
-                d.metadata["source"] = file.name
-            all_docs.extend(docs)
-            os.remove(temp_path)
-        if not all_docs:
-            return None, 0
         vector_store = VectorStore()
-        vector_store.create_vectorstore(all_docs)
-        graph = GraphBuilder(
             retriever=vector_store.get_retriever(),
-            llm=Config.get_llm()
         )
-        graph.build()
-        return graph, len(all_docs)
     except Exception as e:
-        st.error(f"Ingestion failed: {e}")
         return None, 0
-# -------------------------------------------------
-# Main app
-# -------------------------------------------------
 def main():
     init_session_state()
-    # ---------------- Sidebar ----------------
-    with st.sidebar:
-        st.header("📄 Document Ingestion")
-        uploaded_files = st.file_uploader(
-            "Upload PDF files",
-            type="pdf",
-            accept_multiple_files=True
         )
-        if st.button("🛠️ Build Knowledge Base", type="primary"):
-            if not uploaded_files:
-                st.warning("Upload at least one PDF.")
-            else:
-                new_hash = compute_files_hash(uploaded_files)
-                if new_hash == st.session_state.kb_hash:
-                    st.info("Knowledge base already built for these PDFs.")
-                else:
-                    with st.spinner("Indexing PDFs and building agent graph..."):
-                        rag, chunks = process_documents(uploaded_files)
-                        if rag:
-                            st.session_state.rag_system = rag
-                            st.session_state.processed_files = [f.name for f in uploaded_files]
-                            st.session_state.kb_hash = new_hash
-                            msg = (
-                                f"✅ Knowledge base ready!\n\n"
-                                f"Indexed **{chunks} chunks** from:\n"
-                                + "\n".join(f"- {f}" for f in st.session_state.processed_files)
-                            )
-                            st.session_state.messages.append({"role": "assistant", "content": msg})
-                            st.rerun()
-        if st.session_state.processed_files:
-            st.markdown("---")
-            st.subheader("📚 Loaded PDFs")
-            for f in st.session_state.processed_files:
-                st.caption(f"✔ {f}")
-            if st.button("🧹 Clear Chat"):
-                st.session_state.messages = [
-                    {"role": "assistant", "content": "Chat cleared. Ask anything about the loaded PDFs!"}
-                ]
-                st.rerun()
-    # ---------------- Main Chat ----------------
-    st.title("🔍 Agentic Multi-PDF Chat")
-    st.caption("Ask questions across all uploaded documents")
-    for msg in st.session_state.messages:
-        with st.chat_message(msg["role"]):
-            st.markdown(msg["content"])
-    if prompt := st.chat_input("Ask a question across all PDFs..."):
-        st.session_state.messages.append({"role": "user", "content": prompt})
-        st.chat_message("user").markdown(prompt)
-        if not st.session_state.rag_system:
-            st.warning("Build the knowledge base first.")
-            return
-        with st.chat_message("assistant"):
-            with st.spinner("Thinking across documents..."):
-                result = st.session_state.rag_system.run(prompt)
-                answer = result.get("answer", "No clear answer found.")
-                st.markdown(answer)
-                if result.get("retrieved_docs"):
-                    with st.expander("📌 Sources"):
-                        for i, doc in enumerate(result["retrieved_docs"], 1):
-                            st.markdown(
-                                f"**{i}. {doc.metadata.get('source', 'Unknown')} "
-                                f"(Page {doc.metadata.get('page', 'N/A')})**"
-                            )
-                            st.info(doc.page_content[:400] + "...")
-                st.session_state.messages.append({"role": "assistant", "content": answer})
-# -------------------------------------------------
 if __name__ == "__main__":
-    main()

+"""Streamlit UI for Agentic RAG System - Simplified Version"""
 import streamlit as st
 from pathlib import Path
 import sys
+import time
+# Add src to path
 sys.path.append(str(Path(__file__).parent))
 from src.config.config import Config
 from src.vectorstore.vectorstore import VectorStore
 from src.graph_builder.graph_builder import GraphBuilder
+# Page configuration
 st.set_page_config(
+    page_title="🤖 RAG Search",
+    page_icon="🔍",
+    layout="centered"
 )
+# Simple CSS
 st.markdown("""
+    <style>
+    .stButton > button {
+        width: 100%;
+        background-color: #4CAF50;
+        color: white;
+        font-weight: bold;
+    }
+    </style>
 """, unsafe_allow_html=True)
 def init_session_state():
+    """Initialize session state variables"""
+    if 'rag_system' not in st.session_state:
+        st.session_state.rag_system = None
+    if 'initialized' not in st.session_state:
+        st.session_state.initialized = False
+    if 'history' not in st.session_state:
+        st.session_state.history = []
+@st.cache_resource
+def initialize_rag():
+    """Initialize the RAG system (cached)"""
     try:
+        # Initialize components
+        llm = Config.get_llm()
+        doc_processor = DocumentProcessor(
             chunk_size=Config.CHUNK_SIZE,
             chunk_overlap=Config.CHUNK_OVERLAP
         )
         vector_store = VectorStore()
+        # Use default URLs
+        urls = Config.DEFAULT_URLS
+        # Process documents
+        documents = doc_processor.process_urls(urls)
+        # Create vector store
+        vector_store.create_vectorstore(documents)
+        # Build graph
+        graph_builder = GraphBuilder(
             retriever=vector_store.get_retriever(),
+            llm=llm
         )
+        graph_builder.build()
+        return graph_builder, len(documents)
     except Exception as e:
+        st.error(f"Failed to initialize: {str(e)}")
         return None, 0
 def main():
+    """Main application"""
     init_session_state()
+    # Title
+    st.title("🔍 RAG Document Search")
+    st.markdown("Ask questions about the loaded documents")
+    # Initialize system
+    if not st.session_state.initialized:
+        with st.spinner("Loading system..."):
+            rag_system, num_chunks = initialize_rag()
+            if rag_system:
+                st.session_state.rag_system = rag_system
+                st.session_state.initialized = True
+                st.success(f"✅ System ready! ({num_chunks} document chunks loaded)")
+    st.markdown("---")
+    # Search interface
+    with st.form("search_form"):
+        question = st.text_input(
+            "Enter your question:",
+            placeholder="What would you like to know?"
         )
+        submit = st.form_submit_button("🔍 Search")
+    # Process search
+    if submit and question:
+        if st.session_state.rag_system:
+            with st.spinner("Searching..."):
+                start_time = time.time()
+                # Get answer
+                result = st.session_state.rag_system.run(question)
+                elapsed_time = time.time() - start_time
+                # Add to history
+                st.session_state.history.append({
+                    'question': question,
+                    'answer': result['answer'],
+                    'time': elapsed_time
+                })
+                # Display answer
+                st.markdown("### 💡 Answer")
+                st.success(result['answer'])
+                # Show retrieved docs in expander
+                with st.expander("📄 Source Documents"):
+                    for i, doc in enumerate(result['retrieved_docs'], 1):
+                        st.text_area(
+                            f"Document {i}",
+                            doc.page_content[:300] + "...",
+                            height=100,
+                            disabled=True
+                        )
+                st.caption(f"⏱️ Response time: {elapsed_time:.2f} seconds")
+    # Show history
+    if st.session_state.history:
+        st.markdown("---")
+        st.markdown("### 📜 Recent Searches")
+        for item in reversed(st.session_state.history[-3:]):  # Show last 3
+            with st.container():
+                st.markdown(f"**Q:** {item['question']}")
+                st.markdown(f"**A:** {item['answer'][:200]}...")
+                st.caption(f"Time: {item['time']:.2f}s")
+                st.markdown("")
 if __name__ == "__main__":
+    main()