Spaces:

Dinesh310
/

demo2

Sleeping

App Files Files Community

Dinesh310 commited on Jan 24

Commit

6ff38d9

verified ·

1 Parent(s): 6557eac

Update streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +31 -36

streamlit_app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import streamlit as st
 from pathlib import Path
 import sys
-import time
 import os
 # Add src to path to ensure imports work correctly
@@ -40,10 +39,11 @@ def init_session_state():
 def process_documents(uploaded_files):
     """
-    Handles the heavy lifting:
-    1. Saves uploaded bytes to temp files
-    2. Uses DocumentProcessor to chunk text
-    3. Builds VectorStore and Graph
     """
     try:
         doc_processor = DocumentProcessor(
@@ -53,17 +53,22 @@ def process_documents(uploaded_files):
         all_docs = []
         for uploaded_file in uploaded_files:
-            # Create a temporary local file for the loader to read
-            temp_path = Path(f"temp_{uploaded_file.name}")
             with open(temp_path, "wb") as f:
                 f.write(uploaded_file.getvalue())
-            # Process the PDF using the new method we added to DocumentProcessor
             docs = doc_processor.process_pdf(str(temp_path))
             all_docs.extend(docs)
-            # Clean up the temporary file immediately
             if temp_path.exists():
                 os.remove(temp_path)
@@ -71,11 +76,11 @@ def process_documents(uploaded_files):
             st.error("No text could be extracted from the uploaded files.")
             return None, 0
-        # Create Vector Store
         vector_store = VectorStore()
         vector_store.create_vectorstore(all_docs)
-        # Build the Agentic Graph
         graph_builder = GraphBuilder(
             retriever=vector_store.get_retriever(),
             llm=Config.get_llm()
@@ -93,11 +98,9 @@ def main():
     # --- Sidebar UI ---
     with st.sidebar:
-        st.header("📂 Document Manager")
-        st.info("Upload your PDFs here to provide context to the AI.")
         uploaded_files = st.file_uploader(
-            "Select PDF files",
             type="pdf",
             accept_multiple_files=True,
             help="You can select multiple files at once."
@@ -110,24 +113,22 @@ def main():
                     if rag_system:
                         st.session_state.rag_system = rag_system
                         st.session_state.processed_files = [f.name for f in uploaded_files]
-                        st.success(f"Indexed {num_chunks} chunks from {len(uploaded_files)} files.")
-                        # Notify the user in the chat
-                        st.session_state.messages.append({
-                            "role": "assistant",
-                            "content": f"I have successfully indexed: {', '.join(st.session_state.processed_files)}. I'm ready for your questions!"
-                        })
             else:
                 st.warning("Please upload at least one PDF first.")
         if st.session_state.processed_files:
             st.markdown("---")
-            st.subheader("Current Context")
             for f in st.session_state.processed_files:
                 st.caption(f"✅ {f}")
-            if st.button("Clear Chat"):
-                st.session_state.messages = [{"role": "assistant", "content": "Chat cleared. Ask me anything about the loaded documents!"}]
                 st.rerun()
     # --- Main Chat UI ---
@@ -141,39 +142,33 @@ def main():
     # Chat Input logic
     if prompt := st.chat_input("Ask a question about your documents..."):
-        # Show user message
         st.chat_message("user").markdown(prompt)
         st.session_state.messages.append({"role": "user", "content": prompt})
-        # Process via RAG
         if st.session_state.rag_system:
             with st.chat_message("assistant"):
                 with st.spinner("Agent searching knowledge base..."):
                     try:
-                        # Call the Agentic Graph
                         result = st.session_state.rag_system.run(prompt)
                         answer = result.get('answer', "I couldn't find a definitive answer.")
                         st.markdown(answer)
-                        # Show Source Citations
                         if result.get('retrieved_docs'):
                             with st.expander("🔍 View Referenced Context"):
                                 for i, doc in enumerate(result['retrieved_docs'], 1):
-                                    source_name = doc.metadata.get('source', 'Unknown')
                                     page_num = doc.metadata.get('page', 'N/A')
-                                    st.markdown(f"**Source {i}:** {Path(source_name).name} (Page {page_num})")
                                     st.info(doc.page_content[:400] + "...")
                         st.session_state.messages.append({"role": "assistant", "content": answer})
                     except Exception as e:
-                        error_text = f"An error occurred while searching: {str(e)}"
-                        st.error(error_text)
-                        st.session_state.messages.append({"role": "assistant", "content": error_text})
         else:
-            with st.chat_message("assistant"):
-                st.warning("I don't have any documents in my memory yet. Please upload PDFs in the sidebar and click 'Build Knowledge Base'.")
 if __name__ == "__main__":
     main()

 import streamlit as st
 from pathlib import Path
 import sys
 import os
 # Add src to path to ensure imports work correctly
 def process_documents(uploaded_files):
     """
+    Handles multi-file ingestion:
+    1. Loops through all uploaded files
+    2. Saves each to a temp path
+    3. Aggregates all document chunks
+    4. Initializes VectorStore and Graph once
     """
     try:
         doc_processor = DocumentProcessor(
         all_docs = []
+        # Ensure a temporary directory exists
+        temp_dir = Path("temp_uploads")
+        temp_dir.mkdir(exist_ok=True)
         for uploaded_file in uploaded_files:
+            # 1. Save uploaded bytes to a local string path
+            temp_path = temp_dir / uploaded_file.name
             with open(temp_path, "wb") as f:
                 f.write(uploaded_file.getvalue())
+            # 2. Process this specific PDF into chunks
+            # Assuming your DocumentProcessor.process_pdf takes a string path
             docs = doc_processor.process_pdf(str(temp_path))
             all_docs.extend(docs)
+            # 3. Clean up the temporary file immediately after processing
             if temp_path.exists():
                 os.remove(temp_path)
             st.error("No text could be extracted from the uploaded files.")
             return None, 0
+        # 4. Create Vector Store with the combined list of all chunks
         vector_store = VectorStore()
         vector_store.create_vectorstore(all_docs)
+        # 5. Build the Agentic Graph using the compiled retriever
         graph_builder = GraphBuilder(
             retriever=vector_store.get_retriever(),
             llm=Config.get_llm()
     # --- Sidebar UI ---
     with st.sidebar:
+        st.header("Document Ingestion")
         uploaded_files = st.file_uploader(
+            "Upload PDF files",
             type="pdf",
             accept_multiple_files=True,
             help="You can select multiple files at once."
                     if rag_system:
                         st.session_state.rag_system = rag_system
                         st.session_state.processed_files = [f.name for f in uploaded_files]
+                        # Add success notification to chat
+                        confirm_msg = f"I have successfully indexed {num_chunks} chunks from: {', '.join(st.session_state.processed_files)}."
+                        st.session_state.messages.append({"role": "assistant", "content": confirm_msg})
+                        st.rerun() # Refresh to show the message immediately
             else:
                 st.warning("Please upload at least one PDF first.")
         if st.session_state.processed_files:
             st.markdown("---")
+            st.subheader("Loaded Documents")
             for f in st.session_state.processed_files:
                 st.caption(f"✅ {f}")
+            if st.button("Clear Chat History"):
+                st.session_state.messages = [{"role": "assistant", "content": "Chat cleared. How can I help with the current documents?"}]
                 st.rerun()
     # --- Main Chat UI ---
     # Chat Input logic
     if prompt := st.chat_input("Ask a question about your documents..."):
         st.chat_message("user").markdown(prompt)
         st.session_state.messages.append({"role": "user", "content": prompt})
         if st.session_state.rag_system:
             with st.chat_message("assistant"):
                 with st.spinner("Agent searching knowledge base..."):
                     try:
+                        # Call your GraphBuilder's run method
                         result = st.session_state.rag_system.run(prompt)
                         answer = result.get('answer', "I couldn't find a definitive answer.")
                         st.markdown(answer)
+                        # Show Source Citations in an Expander
                         if result.get('retrieved_docs'):
                             with st.expander("🔍 View Referenced Context"):
                                 for i, doc in enumerate(result['retrieved_docs'], 1):
+                                    source_name = Path(doc.metadata.get('source', 'Unknown')).name
                                     page_num = doc.metadata.get('page', 'N/A')
+                                    st.markdown(f"**Source {i}:** {source_name} (Page {page_num})")
                                     st.info(doc.page_content[:400] + "...")
                         st.session_state.messages.append({"role": "assistant", "content": answer})
                     except Exception as e:
+                        st.error(f"Search Error: {str(e)}")
         else:
+            st.warning("Please upload and build the knowledge base first!")
 if __name__ == "__main__":
     main()