Update streamlit_app.py
Browse files- streamlit_app.py +31 -36
streamlit_app.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
from pathlib import Path
|
| 3 |
import sys
|
| 4 |
-
import time
|
| 5 |
import os
|
| 6 |
|
| 7 |
# Add src to path to ensure imports work correctly
|
|
@@ -40,10 +39,11 @@ def init_session_state():
|
|
| 40 |
|
| 41 |
def process_documents(uploaded_files):
|
| 42 |
"""
|
| 43 |
-
Handles
|
| 44 |
-
1.
|
| 45 |
-
2.
|
| 46 |
-
3.
|
|
|
|
| 47 |
"""
|
| 48 |
try:
|
| 49 |
doc_processor = DocumentProcessor(
|
|
@@ -53,17 +53,22 @@ def process_documents(uploaded_files):
|
|
| 53 |
|
| 54 |
all_docs = []
|
| 55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
for uploaded_file in uploaded_files:
|
| 57 |
-
#
|
| 58 |
-
temp_path =
|
| 59 |
with open(temp_path, "wb") as f:
|
| 60 |
f.write(uploaded_file.getvalue())
|
| 61 |
|
| 62 |
-
# Process
|
|
|
|
| 63 |
docs = doc_processor.process_pdf(str(temp_path))
|
| 64 |
all_docs.extend(docs)
|
| 65 |
|
| 66 |
-
# Clean up the temporary file immediately
|
| 67 |
if temp_path.exists():
|
| 68 |
os.remove(temp_path)
|
| 69 |
|
|
@@ -71,11 +76,11 @@ def process_documents(uploaded_files):
|
|
| 71 |
st.error("No text could be extracted from the uploaded files.")
|
| 72 |
return None, 0
|
| 73 |
|
| 74 |
-
# Create Vector Store
|
| 75 |
vector_store = VectorStore()
|
| 76 |
vector_store.create_vectorstore(all_docs)
|
| 77 |
|
| 78 |
-
# Build the Agentic Graph
|
| 79 |
graph_builder = GraphBuilder(
|
| 80 |
retriever=vector_store.get_retriever(),
|
| 81 |
llm=Config.get_llm()
|
|
@@ -93,11 +98,9 @@ def main():
|
|
| 93 |
|
| 94 |
# --- Sidebar UI ---
|
| 95 |
with st.sidebar:
|
| 96 |
-
st.header("
|
| 97 |
-
st.info("Upload your PDFs here to provide context to the AI.")
|
| 98 |
-
|
| 99 |
uploaded_files = st.file_uploader(
|
| 100 |
-
"
|
| 101 |
type="pdf",
|
| 102 |
accept_multiple_files=True,
|
| 103 |
help="You can select multiple files at once."
|
|
@@ -110,24 +113,22 @@ def main():
|
|
| 110 |
if rag_system:
|
| 111 |
st.session_state.rag_system = rag_system
|
| 112 |
st.session_state.processed_files = [f.name for f in uploaded_files]
|
| 113 |
-
st.success(f"Indexed {num_chunks} chunks from {len(uploaded_files)} files.")
|
| 114 |
|
| 115 |
-
#
|
| 116 |
-
st.session_state.
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
})
|
| 120 |
else:
|
| 121 |
st.warning("Please upload at least one PDF first.")
|
| 122 |
|
| 123 |
if st.session_state.processed_files:
|
| 124 |
st.markdown("---")
|
| 125 |
-
st.subheader("
|
| 126 |
for f in st.session_state.processed_files:
|
| 127 |
st.caption(f"✅ {f}")
|
| 128 |
|
| 129 |
-
if st.button("Clear Chat"):
|
| 130 |
-
st.session_state.messages = [{"role": "assistant", "content": "Chat cleared.
|
| 131 |
st.rerun()
|
| 132 |
|
| 133 |
# --- Main Chat UI ---
|
|
@@ -141,39 +142,33 @@ def main():
|
|
| 141 |
|
| 142 |
# Chat Input logic
|
| 143 |
if prompt := st.chat_input("Ask a question about your documents..."):
|
| 144 |
-
# Show user message
|
| 145 |
st.chat_message("user").markdown(prompt)
|
| 146 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
| 147 |
|
| 148 |
-
# Process via RAG
|
| 149 |
if st.session_state.rag_system:
|
| 150 |
with st.chat_message("assistant"):
|
| 151 |
with st.spinner("Agent searching knowledge base..."):
|
| 152 |
try:
|
| 153 |
-
# Call
|
| 154 |
result = st.session_state.rag_system.run(prompt)
|
| 155 |
answer = result.get('answer', "I couldn't find a definitive answer.")
|
| 156 |
-
|
| 157 |
st.markdown(answer)
|
| 158 |
|
| 159 |
-
# Show Source Citations
|
| 160 |
if result.get('retrieved_docs'):
|
| 161 |
with st.expander("🔍 View Referenced Context"):
|
| 162 |
for i, doc in enumerate(result['retrieved_docs'], 1):
|
| 163 |
-
source_name = doc.metadata.get('source', 'Unknown')
|
| 164 |
page_num = doc.metadata.get('page', 'N/A')
|
| 165 |
-
st.markdown(f"**Source {i}:** {
|
| 166 |
st.info(doc.page_content[:400] + "...")
|
| 167 |
|
| 168 |
st.session_state.messages.append({"role": "assistant", "content": answer})
|
| 169 |
|
| 170 |
except Exception as e:
|
| 171 |
-
|
| 172 |
-
st.error(error_text)
|
| 173 |
-
st.session_state.messages.append({"role": "assistant", "content": error_text})
|
| 174 |
else:
|
| 175 |
-
|
| 176 |
-
st.warning("I don't have any documents in my memory yet. Please upload PDFs in the sidebar and click 'Build Knowledge Base'.")
|
| 177 |
|
| 178 |
if __name__ == "__main__":
|
| 179 |
main()
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
from pathlib import Path
|
| 3 |
import sys
|
|
|
|
| 4 |
import os
|
| 5 |
|
| 6 |
# Add src to path to ensure imports work correctly
|
|
|
|
| 39 |
|
| 40 |
def process_documents(uploaded_files):
|
| 41 |
"""
|
| 42 |
+
Handles multi-file ingestion:
|
| 43 |
+
1. Loops through all uploaded files
|
| 44 |
+
2. Saves each to a temp path
|
| 45 |
+
3. Aggregates all document chunks
|
| 46 |
+
4. Initializes VectorStore and Graph once
|
| 47 |
"""
|
| 48 |
try:
|
| 49 |
doc_processor = DocumentProcessor(
|
|
|
|
| 53 |
|
| 54 |
all_docs = []
|
| 55 |
|
| 56 |
+
# Ensure a temporary directory exists
|
| 57 |
+
temp_dir = Path("temp_uploads")
|
| 58 |
+
temp_dir.mkdir(exist_ok=True)
|
| 59 |
+
|
| 60 |
for uploaded_file in uploaded_files:
|
| 61 |
+
# 1. Save uploaded bytes to a local string path
|
| 62 |
+
temp_path = temp_dir / uploaded_file.name
|
| 63 |
with open(temp_path, "wb") as f:
|
| 64 |
f.write(uploaded_file.getvalue())
|
| 65 |
|
| 66 |
+
# 2. Process this specific PDF into chunks
|
| 67 |
+
# Assuming your DocumentProcessor.process_pdf takes a string path
|
| 68 |
docs = doc_processor.process_pdf(str(temp_path))
|
| 69 |
all_docs.extend(docs)
|
| 70 |
|
| 71 |
+
# 3. Clean up the temporary file immediately after processing
|
| 72 |
if temp_path.exists():
|
| 73 |
os.remove(temp_path)
|
| 74 |
|
|
|
|
| 76 |
st.error("No text could be extracted from the uploaded files.")
|
| 77 |
return None, 0
|
| 78 |
|
| 79 |
+
# 4. Create Vector Store with the combined list of all chunks
|
| 80 |
vector_store = VectorStore()
|
| 81 |
vector_store.create_vectorstore(all_docs)
|
| 82 |
|
| 83 |
+
# 5. Build the Agentic Graph using the compiled retriever
|
| 84 |
graph_builder = GraphBuilder(
|
| 85 |
retriever=vector_store.get_retriever(),
|
| 86 |
llm=Config.get_llm()
|
|
|
|
| 98 |
|
| 99 |
# --- Sidebar UI ---
|
| 100 |
with st.sidebar:
|
| 101 |
+
st.header("Document Ingestion")
|
|
|
|
|
|
|
| 102 |
uploaded_files = st.file_uploader(
|
| 103 |
+
"Upload PDF files",
|
| 104 |
type="pdf",
|
| 105 |
accept_multiple_files=True,
|
| 106 |
help="You can select multiple files at once."
|
|
|
|
| 113 |
if rag_system:
|
| 114 |
st.session_state.rag_system = rag_system
|
| 115 |
st.session_state.processed_files = [f.name for f in uploaded_files]
|
|
|
|
| 116 |
|
| 117 |
+
# Add success notification to chat
|
| 118 |
+
confirm_msg = f"I have successfully indexed {num_chunks} chunks from: {', '.join(st.session_state.processed_files)}."
|
| 119 |
+
st.session_state.messages.append({"role": "assistant", "content": confirm_msg})
|
| 120 |
+
st.rerun() # Refresh to show the message immediately
|
|
|
|
| 121 |
else:
|
| 122 |
st.warning("Please upload at least one PDF first.")
|
| 123 |
|
| 124 |
if st.session_state.processed_files:
|
| 125 |
st.markdown("---")
|
| 126 |
+
st.subheader("Loaded Documents")
|
| 127 |
for f in st.session_state.processed_files:
|
| 128 |
st.caption(f"✅ {f}")
|
| 129 |
|
| 130 |
+
if st.button("Clear Chat History"):
|
| 131 |
+
st.session_state.messages = [{"role": "assistant", "content": "Chat cleared. How can I help with the current documents?"}]
|
| 132 |
st.rerun()
|
| 133 |
|
| 134 |
# --- Main Chat UI ---
|
|
|
|
| 142 |
|
| 143 |
# Chat Input logic
|
| 144 |
if prompt := st.chat_input("Ask a question about your documents..."):
|
|
|
|
| 145 |
st.chat_message("user").markdown(prompt)
|
| 146 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
| 147 |
|
|
|
|
| 148 |
if st.session_state.rag_system:
|
| 149 |
with st.chat_message("assistant"):
|
| 150 |
with st.spinner("Agent searching knowledge base..."):
|
| 151 |
try:
|
| 152 |
+
# Call your GraphBuilder's run method
|
| 153 |
result = st.session_state.rag_system.run(prompt)
|
| 154 |
answer = result.get('answer', "I couldn't find a definitive answer.")
|
|
|
|
| 155 |
st.markdown(answer)
|
| 156 |
|
| 157 |
+
# Show Source Citations in an Expander
|
| 158 |
if result.get('retrieved_docs'):
|
| 159 |
with st.expander("🔍 View Referenced Context"):
|
| 160 |
for i, doc in enumerate(result['retrieved_docs'], 1):
|
| 161 |
+
source_name = Path(doc.metadata.get('source', 'Unknown')).name
|
| 162 |
page_num = doc.metadata.get('page', 'N/A')
|
| 163 |
+
st.markdown(f"**Source {i}:** {source_name} (Page {page_num})")
|
| 164 |
st.info(doc.page_content[:400] + "...")
|
| 165 |
|
| 166 |
st.session_state.messages.append({"role": "assistant", "content": answer})
|
| 167 |
|
| 168 |
except Exception as e:
|
| 169 |
+
st.error(f"Search Error: {str(e)}")
|
|
|
|
|
|
|
| 170 |
else:
|
| 171 |
+
st.warning("Please upload and build the knowledge base first!")
|
|
|
|
| 172 |
|
| 173 |
if __name__ == "__main__":
|
| 174 |
main()
|