Dinesh310 commited on
Commit
6ff38d9
·
verified ·
1 Parent(s): 6557eac

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +31 -36
streamlit_app.py CHANGED
@@ -1,7 +1,6 @@
1
  import streamlit as st
2
  from pathlib import Path
3
  import sys
4
- import time
5
  import os
6
 
7
  # Add src to path to ensure imports work correctly
@@ -40,10 +39,11 @@ def init_session_state():
40
 
41
  def process_documents(uploaded_files):
42
  """
43
- Handles the heavy lifting:
44
- 1. Saves uploaded bytes to temp files
45
- 2. Uses DocumentProcessor to chunk text
46
- 3. Builds VectorStore and Graph
 
47
  """
48
  try:
49
  doc_processor = DocumentProcessor(
@@ -53,17 +53,22 @@ def process_documents(uploaded_files):
53
 
54
  all_docs = []
55
 
 
 
 
 
56
  for uploaded_file in uploaded_files:
57
- # Create a temporary local file for the loader to read
58
- temp_path = Path(f"temp_{uploaded_file.name}")
59
  with open(temp_path, "wb") as f:
60
  f.write(uploaded_file.getvalue())
61
 
62
- # Process the PDF using the new method we added to DocumentProcessor
 
63
  docs = doc_processor.process_pdf(str(temp_path))
64
  all_docs.extend(docs)
65
 
66
- # Clean up the temporary file immediately
67
  if temp_path.exists():
68
  os.remove(temp_path)
69
 
@@ -71,11 +76,11 @@ def process_documents(uploaded_files):
71
  st.error("No text could be extracted from the uploaded files.")
72
  return None, 0
73
 
74
- # Create Vector Store
75
  vector_store = VectorStore()
76
  vector_store.create_vectorstore(all_docs)
77
 
78
- # Build the Agentic Graph
79
  graph_builder = GraphBuilder(
80
  retriever=vector_store.get_retriever(),
81
  llm=Config.get_llm()
@@ -93,11 +98,9 @@ def main():
93
 
94
  # --- Sidebar UI ---
95
  with st.sidebar:
96
- st.header("📂 Document Manager")
97
- st.info("Upload your PDFs here to provide context to the AI.")
98
-
99
  uploaded_files = st.file_uploader(
100
- "Select PDF files",
101
  type="pdf",
102
  accept_multiple_files=True,
103
  help="You can select multiple files at once."
@@ -110,24 +113,22 @@ def main():
110
  if rag_system:
111
  st.session_state.rag_system = rag_system
112
  st.session_state.processed_files = [f.name for f in uploaded_files]
113
- st.success(f"Indexed {num_chunks} chunks from {len(uploaded_files)} files.")
114
 
115
- # Notify the user in the chat
116
- st.session_state.messages.append({
117
- "role": "assistant",
118
- "content": f"I have successfully indexed: {', '.join(st.session_state.processed_files)}. I'm ready for your questions!"
119
- })
120
  else:
121
  st.warning("Please upload at least one PDF first.")
122
 
123
  if st.session_state.processed_files:
124
  st.markdown("---")
125
- st.subheader("Current Context")
126
  for f in st.session_state.processed_files:
127
  st.caption(f"✅ {f}")
128
 
129
- if st.button("Clear Chat"):
130
- st.session_state.messages = [{"role": "assistant", "content": "Chat cleared. Ask me anything about the loaded documents!"}]
131
  st.rerun()
132
 
133
  # --- Main Chat UI ---
@@ -141,39 +142,33 @@ def main():
141
 
142
  # Chat Input logic
143
  if prompt := st.chat_input("Ask a question about your documents..."):
144
- # Show user message
145
  st.chat_message("user").markdown(prompt)
146
  st.session_state.messages.append({"role": "user", "content": prompt})
147
 
148
- # Process via RAG
149
  if st.session_state.rag_system:
150
  with st.chat_message("assistant"):
151
  with st.spinner("Agent searching knowledge base..."):
152
  try:
153
- # Call the Agentic Graph
154
  result = st.session_state.rag_system.run(prompt)
155
  answer = result.get('answer', "I couldn't find a definitive answer.")
156
-
157
  st.markdown(answer)
158
 
159
- # Show Source Citations
160
  if result.get('retrieved_docs'):
161
  with st.expander("🔍 View Referenced Context"):
162
  for i, doc in enumerate(result['retrieved_docs'], 1):
163
- source_name = doc.metadata.get('source', 'Unknown')
164
  page_num = doc.metadata.get('page', 'N/A')
165
- st.markdown(f"**Source {i}:** {Path(source_name).name} (Page {page_num})")
166
  st.info(doc.page_content[:400] + "...")
167
 
168
  st.session_state.messages.append({"role": "assistant", "content": answer})
169
 
170
  except Exception as e:
171
- error_text = f"An error occurred while searching: {str(e)}"
172
- st.error(error_text)
173
- st.session_state.messages.append({"role": "assistant", "content": error_text})
174
  else:
175
- with st.chat_message("assistant"):
176
- st.warning("I don't have any documents in my memory yet. Please upload PDFs in the sidebar and click 'Build Knowledge Base'.")
177
 
178
  if __name__ == "__main__":
179
  main()
 
1
  import streamlit as st
2
  from pathlib import Path
3
  import sys
 
4
  import os
5
 
6
  # Add src to path to ensure imports work correctly
 
39
 
40
  def process_documents(uploaded_files):
41
  """
42
+ Handles multi-file ingestion:
43
+ 1. Loops through all uploaded files
44
+ 2. Saves each to a temp path
45
+ 3. Aggregates all document chunks
46
+ 4. Initializes VectorStore and Graph once
47
  """
48
  try:
49
  doc_processor = DocumentProcessor(
 
53
 
54
  all_docs = []
55
 
56
+ # Ensure a temporary directory exists
57
+ temp_dir = Path("temp_uploads")
58
+ temp_dir.mkdir(exist_ok=True)
59
+
60
  for uploaded_file in uploaded_files:
61
+ # 1. Save uploaded bytes to a local string path
62
+ temp_path = temp_dir / uploaded_file.name
63
  with open(temp_path, "wb") as f:
64
  f.write(uploaded_file.getvalue())
65
 
66
+ # 2. Process this specific PDF into chunks
67
+ # Assuming your DocumentProcessor.process_pdf takes a string path
68
  docs = doc_processor.process_pdf(str(temp_path))
69
  all_docs.extend(docs)
70
 
71
+ # 3. Clean up the temporary file immediately after processing
72
  if temp_path.exists():
73
  os.remove(temp_path)
74
 
 
76
  st.error("No text could be extracted from the uploaded files.")
77
  return None, 0
78
 
79
+ # 4. Create Vector Store with the combined list of all chunks
80
  vector_store = VectorStore()
81
  vector_store.create_vectorstore(all_docs)
82
 
83
+ # 5. Build the Agentic Graph using the compiled retriever
84
  graph_builder = GraphBuilder(
85
  retriever=vector_store.get_retriever(),
86
  llm=Config.get_llm()
 
98
 
99
  # --- Sidebar UI ---
100
  with st.sidebar:
101
+ st.header("Document Ingestion")
 
 
102
  uploaded_files = st.file_uploader(
103
+ "Upload PDF files",
104
  type="pdf",
105
  accept_multiple_files=True,
106
  help="You can select multiple files at once."
 
113
  if rag_system:
114
  st.session_state.rag_system = rag_system
115
  st.session_state.processed_files = [f.name for f in uploaded_files]
 
116
 
117
+ # Add success notification to chat
118
+ confirm_msg = f"I have successfully indexed {num_chunks} chunks from: {', '.join(st.session_state.processed_files)}."
119
+ st.session_state.messages.append({"role": "assistant", "content": confirm_msg})
120
+ st.rerun() # Refresh to show the message immediately
 
121
  else:
122
  st.warning("Please upload at least one PDF first.")
123
 
124
  if st.session_state.processed_files:
125
  st.markdown("---")
126
+ st.subheader("Loaded Documents")
127
  for f in st.session_state.processed_files:
128
  st.caption(f"✅ {f}")
129
 
130
+ if st.button("Clear Chat History"):
131
+ st.session_state.messages = [{"role": "assistant", "content": "Chat cleared. How can I help with the current documents?"}]
132
  st.rerun()
133
 
134
  # --- Main Chat UI ---
 
142
 
143
  # Chat Input logic
144
  if prompt := st.chat_input("Ask a question about your documents..."):
 
145
  st.chat_message("user").markdown(prompt)
146
  st.session_state.messages.append({"role": "user", "content": prompt})
147
 
 
148
  if st.session_state.rag_system:
149
  with st.chat_message("assistant"):
150
  with st.spinner("Agent searching knowledge base..."):
151
  try:
152
+ # Call your GraphBuilder's run method
153
  result = st.session_state.rag_system.run(prompt)
154
  answer = result.get('answer', "I couldn't find a definitive answer.")
 
155
  st.markdown(answer)
156
 
157
+ # Show Source Citations in an Expander
158
  if result.get('retrieved_docs'):
159
  with st.expander("🔍 View Referenced Context"):
160
  for i, doc in enumerate(result['retrieved_docs'], 1):
161
+ source_name = Path(doc.metadata.get('source', 'Unknown')).name
162
  page_num = doc.metadata.get('page', 'N/A')
163
+ st.markdown(f"**Source {i}:** {source_name} (Page {page_num})")
164
  st.info(doc.page_content[:400] + "...")
165
 
166
  st.session_state.messages.append({"role": "assistant", "content": answer})
167
 
168
  except Exception as e:
169
+ st.error(f"Search Error: {str(e)}")
 
 
170
  else:
171
+ st.warning("Please upload and build the knowledge base first!")
 
172
 
173
  if __name__ == "__main__":
174
  main()