Spaces:

Dinesh310
/

demo2

Sleeping

App Files Files Community

Dinesh310 commited on Jan 24

Commit

6557eac

verified ·

1 Parent(s): 1ba8003

Update streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +89 -58

streamlit_app.py CHANGED Viewed

@@ -2,8 +2,9 @@ import streamlit as st
 from pathlib import Path
 import sys
 import time
-# Add src to path
 sys.path.append(str(Path(__file__).parent))
 from src.config.config import Config
@@ -11,27 +12,39 @@ from src.document_ingestion.document_processor import DocumentProcessor
 from src.vectorstore.vectorstore import VectorStore
 from src.graph_builder.graph_builder import GraphBuilder
-# Page configuration
 st.set_page_config(
-    page_title="🤖 PDF Agentic Chat",
-    page_icon="💬",
     layout="wide"
 )
 def init_session_state():
-    """Initialize session state variables for chat history and system state"""
     if 'rag_system' not in st.session_state:
         st.session_state.rag_system = None
     if 'messages' not in st.session_state:
-        # Initialize with a greeting
         st.session_state.messages = [
-            {"role": "assistant", "content": "Hi! Upload some PDFs in the sidebar, and I'll help you analyze them."}
         ]
     if 'processed_files' not in st.session_state:
         st.session_state.processed_files = []
-def process_new_documents(uploaded_files):
-    """Processes uploaded PDFs and initializes/updates the RAG system"""
     try:
         doc_processor = DocumentProcessor(
             chunk_size=Config.CHUNK_SIZE,
@@ -39,23 +52,30 @@ def process_new_documents(uploaded_files):
         )
         all_docs = []
         for uploaded_file in uploaded_files:
-            # Save temp file for processing
             temp_path = Path(f"temp_{uploaded_file.name}")
             with open(temp_path, "wb") as f:
                 f.write(uploaded_file.getvalue())
-            # Use the processor to extract text and chunk
-            docs = doc_processor.process_pdf(str(temp_path))
             all_docs.extend(docs)
-            # Cleanup temp file
-            temp_path.unlink()
-        # Build the vector store and graph
         vector_store = VectorStore()
         vector_store.create_vectorstore(all_docs)
         graph_builder = GraphBuilder(
             retriever=vector_store.get_retriever(),
             llm=Config.get_llm()
@@ -63,86 +83,97 @@ def process_new_documents(uploaded_files):
         graph_builder.build()
         return graph_builder, len(all_docs)
     except Exception as e:
-        st.error(f"Error processing documents: {str(e)}")
         return None, 0
 def main():
     init_session_state()
-    # --- Sidebar: Document Upload ---
     with st.sidebar:
-        st.title("📁 Document Portal")
         uploaded_files = st.file_uploader(
-            "Upload PDFs",
             type="pdf",
-            accept_multiple_files=True
         )
-        if st.button("🚀 Index Documents") and uploaded_files:
-            with st.spinner("Processing documents..."):
-                rag_system, num_chunks = process_new_documents(uploaded_files)
-                if rag_system:
-                    st.session_state.rag_system = rag_system
-                    st.session_state.processed_files = [f.name for f in uploaded_files]
-                    st.success(f"Successfully indexed {num_chunks} chunks.")
-                    # Add a status message to chat
-                    st.session_state.messages.append({
-                        "role": "assistant",
-                        "content": f"I've finished reading: {', '.join(st.session_state.processed_files)}. What would you like to know?"
-                    })
         if st.session_state.processed_files:
             st.markdown("---")
-            st.markdown("**Active Documents:**")
             for f in st.session_state.processed_files:
-                st.caption(f"📄 {f}")
-            if st.button("Clear Chat History"):
-                st.session_state.messages = [{"role": "assistant", "content": "Chat history cleared. How can I help?"}]
                 st.rerun()
-    # --- Main Chat Interface ---
-    st.title("💬 PDF AI Assistant")
-    # Display chat messages from history on app rerun
     for message in st.session_state.messages:
         with st.chat_message(message["role"]):
             st.markdown(message["content"])
-    # React to user input
     if prompt := st.chat_input("Ask a question about your documents..."):
-        # Display user message in chat message container
         st.chat_message("user").markdown(prompt)
-        # Add user message to chat history
         st.session_state.messages.append({"role": "user", "content": prompt})
-        # Generate response
         if st.session_state.rag_system:
             with st.chat_message("assistant"):
-                with st.spinner("Thinking..."):
                     try:
-                        # Run the Agentic RAG pipeline
                         result = st.session_state.rag_system.run(prompt)
-                        response = result['answer']
-                        # Display response
-                        st.markdown(response)
-                        # Optional: Show sources in an expader inside the bubble
                         if result.get('retrieved_docs'):
-                            with st.expander("View Sources"):
                                 for i, doc in enumerate(result['retrieved_docs'], 1):
-                                    st.markdown(f"**Source {i}:**\n{doc.page_content[:500]}...")
-                        # Add assistant response to chat history
-                        st.session_state.messages.append({"role": "assistant", "content": response})
                     except Exception as e:
-                        error_msg = f"I encountered an error: {str(e)}"
-                        st.error(error_msg)
-                        st.session_state.messages.append({"role": "assistant", "content": error_msg})
         else:
             with st.chat_message("assistant"):
-                st.warning("Please upload and index some PDFs in the sidebar first!")
 if __name__ == "__main__":
     main()

 from pathlib import Path
 import sys
 import time
+import os
+# Add src to path to ensure imports work correctly
 sys.path.append(str(Path(__file__).parent))
 from src.config.config import Config
 from src.vectorstore.vectorstore import VectorStore
 from src.graph_builder.graph_builder import GraphBuilder
+# --- Page Configuration ---
 st.set_page_config(
+    page_title="Agentic PDF RAG",
+    page_icon="🧠",
     layout="wide"
 )
+# Custom CSS for chat styling
+st.markdown("""
+    <style>
+    .stChatMessage { border-radius: 10px; margin-bottom: 10px; }
+    .stSidebar { background-color: #f8f9fa; }
+    </style>
+""", unsafe_allow_html=True)
 def init_session_state():
+    """Initializes all required session state variables"""
     if 'rag_system' not in st.session_state:
         st.session_state.rag_system = None
     if 'messages' not in st.session_state:
         st.session_state.messages = [
+            {"role": "assistant", "content": "Hello! Please upload PDF documents in the sidebar to begin our technical deep-dive."}
         ]
     if 'processed_files' not in st.session_state:
         st.session_state.processed_files = []
+def process_documents(uploaded_files):
+    """
+    Handles the heavy lifting:
+    1. Saves uploaded bytes to temp files
+    2. Uses DocumentProcessor to chunk text
+    3. Builds VectorStore and Graph
+    """
     try:
         doc_processor = DocumentProcessor(
             chunk_size=Config.CHUNK_SIZE,
         )
         all_docs = []
         for uploaded_file in uploaded_files:
+            # Create a temporary local file for the loader to read
             temp_path = Path(f"temp_{uploaded_file.name}")
             with open(temp_path, "wb") as f:
                 f.write(uploaded_file.getvalue())
+            # Process the PDF using the new method we added to DocumentProcessor
+            docs = doc_processor.process_pdf(str(temp_path))
             all_docs.extend(docs)
+            # Clean up the temporary file immediately
+            if temp_path.exists():
+                os.remove(temp_path)
+        if not all_docs:
+            st.error("No text could be extracted from the uploaded files.")
+            return None, 0
+        # Create Vector Store
         vector_store = VectorStore()
         vector_store.create_vectorstore(all_docs)
+        # Build the Agentic Graph
         graph_builder = GraphBuilder(
             retriever=vector_store.get_retriever(),
             llm=Config.get_llm()
         graph_builder.build()
         return graph_builder, len(all_docs)
     except Exception as e:
+        st.error(f"Critical Error during ingestion: {str(e)}")
         return None, 0
 def main():
     init_session_state()
+    # --- Sidebar UI ---
     with st.sidebar:
+        st.header("📂 Document Manager")
+        st.info("Upload your PDFs here to provide context to the AI.")
         uploaded_files = st.file_uploader(
+            "Select PDF files",
             type="pdf",
+            accept_multiple_files=True,
+            help="You can select multiple files at once."
         )
+        if st.button("🛠️ Build Knowledge Base", type="primary"):
+            if uploaded_files:
+                with st.spinner("Analyzing PDF structure and generating embeddings..."):
+                    rag_system, num_chunks = process_documents(uploaded_files)
+                    if rag_system:
+                        st.session_state.rag_system = rag_system
+                        st.session_state.processed_files = [f.name for f in uploaded_files]
+                        st.success(f"Indexed {num_chunks} chunks from {len(uploaded_files)} files.")
+                        # Notify the user in the chat
+                        st.session_state.messages.append({
+                            "role": "assistant",
+                            "content": f"I have successfully indexed: {', '.join(st.session_state.processed_files)}. I'm ready for your questions!"
+                        })
+            else:
+                st.warning("Please upload at least one PDF first.")
         if st.session_state.processed_files:
             st.markdown("---")
+            st.subheader("Current Context")
             for f in st.session_state.processed_files:
+                st.caption(f"✅ {f}")
+            if st.button("Clear Chat"):
+                st.session_state.messages = [{"role": "assistant", "content": "Chat cleared. Ask me anything about the loaded documents!"}]
                 st.rerun()
+    # --- Main Chat UI ---
+    st.title("🔍 Agentic RAG Explorer")
+    st.caption("Powered by LangGraph & Vector Embeddings")
+    # Display existing chat history
     for message in st.session_state.messages:
         with st.chat_message(message["role"]):
             st.markdown(message["content"])
+    # Chat Input logic
     if prompt := st.chat_input("Ask a question about your documents..."):
+        # Show user message
         st.chat_message("user").markdown(prompt)
         st.session_state.messages.append({"role": "user", "content": prompt})
+        # Process via RAG
         if st.session_state.rag_system:
             with st.chat_message("assistant"):
+                with st.spinner("Agent searching knowledge base..."):
                     try:
+                        # Call the Agentic Graph
                         result = st.session_state.rag_system.run(prompt)
+                        answer = result.get('answer', "I couldn't find a definitive answer.")
+                        st.markdown(answer)
+                        # Show Source Citations
                         if result.get('retrieved_docs'):
+                            with st.expander("🔍 View Referenced Context"):
                                 for i, doc in enumerate(result['retrieved_docs'], 1):
+                                    source_name = doc.metadata.get('source', 'Unknown')
+                                    page_num = doc.metadata.get('page', 'N/A')
+                                    st.markdown(f"**Source {i}:** {Path(source_name).name} (Page {page_num})")
+                                    st.info(doc.page_content[:400] + "...")
+                        st.session_state.messages.append({"role": "assistant", "content": answer})
                     except Exception as e:
+                        error_text = f"An error occurred while searching: {str(e)}"
+                        st.error(error_text)
+                        st.session_state.messages.append({"role": "assistant", "content": error_text})
         else:
             with st.chat_message("assistant"):
+                st.warning("I don't have any documents in my memory yet. Please upload PDFs in the sidebar and click 'Build Knowledge Base'.")
 if __name__ == "__main__":
     main()