Dinesh310 commited on
Commit
6557eac
Β·
verified Β·
1 Parent(s): 1ba8003

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +89 -58
streamlit_app.py CHANGED
@@ -2,8 +2,9 @@ import streamlit as st
2
  from pathlib import Path
3
  import sys
4
  import time
 
5
 
6
- # Add src to path
7
  sys.path.append(str(Path(__file__).parent))
8
 
9
  from src.config.config import Config
@@ -11,27 +12,39 @@ from src.document_ingestion.document_processor import DocumentProcessor
11
  from src.vectorstore.vectorstore import VectorStore
12
  from src.graph_builder.graph_builder import GraphBuilder
13
 
14
- # Page configuration
15
  st.set_page_config(
16
- page_title="πŸ€– PDF Agentic Chat",
17
- page_icon="πŸ’¬",
18
  layout="wide"
19
  )
20
 
 
 
 
 
 
 
 
 
21
  def init_session_state():
22
- """Initialize session state variables for chat history and system state"""
23
  if 'rag_system' not in st.session_state:
24
  st.session_state.rag_system = None
25
  if 'messages' not in st.session_state:
26
- # Initialize with a greeting
27
  st.session_state.messages = [
28
- {"role": "assistant", "content": "Hi! Upload some PDFs in the sidebar, and I'll help you analyze them."}
29
  ]
30
  if 'processed_files' not in st.session_state:
31
  st.session_state.processed_files = []
32
 
33
- def process_new_documents(uploaded_files):
34
- """Processes uploaded PDFs and initializes/updates the RAG system"""
 
 
 
 
 
35
  try:
36
  doc_processor = DocumentProcessor(
37
  chunk_size=Config.CHUNK_SIZE,
@@ -39,23 +52,30 @@ def process_new_documents(uploaded_files):
39
  )
40
 
41
  all_docs = []
 
42
  for uploaded_file in uploaded_files:
43
- # Save temp file for processing
44
  temp_path = Path(f"temp_{uploaded_file.name}")
45
  with open(temp_path, "wb") as f:
46
  f.write(uploaded_file.getvalue())
47
 
48
- # Use the processor to extract text and chunk
49
- docs = doc_processor.process_pdf(str(temp_path))
50
  all_docs.extend(docs)
51
 
52
- # Cleanup temp file
53
- temp_path.unlink()
 
 
 
 
 
54
 
55
- # Build the vector store and graph
56
  vector_store = VectorStore()
57
  vector_store.create_vectorstore(all_docs)
58
 
 
59
  graph_builder = GraphBuilder(
60
  retriever=vector_store.get_retriever(),
61
  llm=Config.get_llm()
@@ -63,86 +83,97 @@ def process_new_documents(uploaded_files):
63
  graph_builder.build()
64
 
65
  return graph_builder, len(all_docs)
 
66
  except Exception as e:
67
- st.error(f"Error processing documents: {str(e)}")
68
  return None, 0
69
 
70
  def main():
71
  init_session_state()
72
 
73
- # --- Sidebar: Document Upload ---
74
  with st.sidebar:
75
- st.title("πŸ“ Document Portal")
 
 
76
  uploaded_files = st.file_uploader(
77
- "Upload PDFs",
78
  type="pdf",
79
- accept_multiple_files=True
 
80
  )
81
 
82
- if st.button("πŸš€ Index Documents") and uploaded_files:
83
- with st.spinner("Processing documents..."):
84
- rag_system, num_chunks = process_new_documents(uploaded_files)
85
- if rag_system:
86
- st.session_state.rag_system = rag_system
87
- st.session_state.processed_files = [f.name for f in uploaded_files]
88
- st.success(f"Successfully indexed {num_chunks} chunks.")
89
- # Add a status message to chat
90
- st.session_state.messages.append({
91
- "role": "assistant",
92
- "content": f"I've finished reading: {', '.join(st.session_state.processed_files)}. What would you like to know?"
93
- })
94
-
 
 
 
 
95
  if st.session_state.processed_files:
96
  st.markdown("---")
97
- st.markdown("**Active Documents:**")
98
  for f in st.session_state.processed_files:
99
- st.caption(f"πŸ“„ {f}")
100
- if st.button("Clear Chat History"):
101
- st.session_state.messages = [{"role": "assistant", "content": "Chat history cleared. How can I help?"}]
 
102
  st.rerun()
103
 
104
- # --- Main Chat Interface ---
105
- st.title("πŸ’¬ PDF AI Assistant")
 
106
 
107
- # Display chat messages from history on app rerun
108
  for message in st.session_state.messages:
109
  with st.chat_message(message["role"]):
110
  st.markdown(message["content"])
111
 
112
- # React to user input
113
  if prompt := st.chat_input("Ask a question about your documents..."):
114
- # Display user message in chat message container
115
  st.chat_message("user").markdown(prompt)
116
- # Add user message to chat history
117
  st.session_state.messages.append({"role": "user", "content": prompt})
118
 
119
- # Generate response
120
  if st.session_state.rag_system:
121
  with st.chat_message("assistant"):
122
- with st.spinner("Thinking..."):
123
  try:
124
- # Run the Agentic RAG pipeline
125
  result = st.session_state.rag_system.run(prompt)
126
- response = result['answer']
127
 
128
- # Display response
129
- st.markdown(response)
130
 
131
- # Optional: Show sources in an expader inside the bubble
132
  if result.get('retrieved_docs'):
133
- with st.expander("View Sources"):
134
  for i, doc in enumerate(result['retrieved_docs'], 1):
135
- st.markdown(f"**Source {i}:**\n{doc.page_content[:500]}...")
 
 
 
136
 
137
- # Add assistant response to chat history
138
- st.session_state.messages.append({"role": "assistant", "content": response})
139
  except Exception as e:
140
- error_msg = f"I encountered an error: {str(e)}"
141
- st.error(error_msg)
142
- st.session_state.messages.append({"role": "assistant", "content": error_msg})
143
  else:
144
  with st.chat_message("assistant"):
145
- st.warning("Please upload and index some PDFs in the sidebar first!")
146
 
147
  if __name__ == "__main__":
148
  main()
 
2
  from pathlib import Path
3
  import sys
4
  import time
5
+ import os
6
 
7
+ # Add src to path to ensure imports work correctly
8
  sys.path.append(str(Path(__file__).parent))
9
 
10
  from src.config.config import Config
 
12
  from src.vectorstore.vectorstore import VectorStore
13
  from src.graph_builder.graph_builder import GraphBuilder
14
 
15
+ # --- Page Configuration ---
16
  st.set_page_config(
17
+ page_title="Agentic PDF RAG",
18
+ page_icon="🧠",
19
  layout="wide"
20
  )
21
 
22
+ # Custom CSS for chat styling
23
+ st.markdown("""
24
+ <style>
25
+ .stChatMessage { border-radius: 10px; margin-bottom: 10px; }
26
+ .stSidebar { background-color: #f8f9fa; }
27
+ </style>
28
+ """, unsafe_allow_html=True)
29
+
30
  def init_session_state():
31
+ """Initializes all required session state variables"""
32
  if 'rag_system' not in st.session_state:
33
  st.session_state.rag_system = None
34
  if 'messages' not in st.session_state:
 
35
  st.session_state.messages = [
36
+ {"role": "assistant", "content": "Hello! Please upload PDF documents in the sidebar to begin our technical deep-dive."}
37
  ]
38
  if 'processed_files' not in st.session_state:
39
  st.session_state.processed_files = []
40
 
41
+ def process_documents(uploaded_files):
42
+ """
43
+ Handles the heavy lifting:
44
+ 1. Saves uploaded bytes to temp files
45
+ 2. Uses DocumentProcessor to chunk text
46
+ 3. Builds VectorStore and Graph
47
+ """
48
  try:
49
  doc_processor = DocumentProcessor(
50
  chunk_size=Config.CHUNK_SIZE,
 
52
  )
53
 
54
  all_docs = []
55
+
56
  for uploaded_file in uploaded_files:
57
+ # Create a temporary local file for the loader to read
58
  temp_path = Path(f"temp_{uploaded_file.name}")
59
  with open(temp_path, "wb") as f:
60
  f.write(uploaded_file.getvalue())
61
 
62
+ # Process the PDF using the new method we added to DocumentProcessor
63
+ docs = doc_processor.process_pdf(str(temp_path))
64
  all_docs.extend(docs)
65
 
66
+ # Clean up the temporary file immediately
67
+ if temp_path.exists():
68
+ os.remove(temp_path)
69
+
70
+ if not all_docs:
71
+ st.error("No text could be extracted from the uploaded files.")
72
+ return None, 0
73
 
74
+ # Create Vector Store
75
  vector_store = VectorStore()
76
  vector_store.create_vectorstore(all_docs)
77
 
78
+ # Build the Agentic Graph
79
  graph_builder = GraphBuilder(
80
  retriever=vector_store.get_retriever(),
81
  llm=Config.get_llm()
 
83
  graph_builder.build()
84
 
85
  return graph_builder, len(all_docs)
86
+
87
  except Exception as e:
88
+ st.error(f"Critical Error during ingestion: {str(e)}")
89
  return None, 0
90
 
91
  def main():
92
  init_session_state()
93
 
94
+ # --- Sidebar UI ---
95
  with st.sidebar:
96
+ st.header("πŸ“‚ Document Manager")
97
+ st.info("Upload your PDFs here to provide context to the AI.")
98
+
99
  uploaded_files = st.file_uploader(
100
+ "Select PDF files",
101
  type="pdf",
102
+ accept_multiple_files=True,
103
+ help="You can select multiple files at once."
104
  )
105
 
106
+ if st.button("πŸ› οΈ Build Knowledge Base", type="primary"):
107
+ if uploaded_files:
108
+ with st.spinner("Analyzing PDF structure and generating embeddings..."):
109
+ rag_system, num_chunks = process_documents(uploaded_files)
110
+ if rag_system:
111
+ st.session_state.rag_system = rag_system
112
+ st.session_state.processed_files = [f.name for f in uploaded_files]
113
+ st.success(f"Indexed {num_chunks} chunks from {len(uploaded_files)} files.")
114
+
115
+ # Notify the user in the chat
116
+ st.session_state.messages.append({
117
+ "role": "assistant",
118
+ "content": f"I have successfully indexed: {', '.join(st.session_state.processed_files)}. I'm ready for your questions!"
119
+ })
120
+ else:
121
+ st.warning("Please upload at least one PDF first.")
122
+
123
  if st.session_state.processed_files:
124
  st.markdown("---")
125
+ st.subheader("Current Context")
126
  for f in st.session_state.processed_files:
127
+ st.caption(f"βœ… {f}")
128
+
129
+ if st.button("Clear Chat"):
130
+ st.session_state.messages = [{"role": "assistant", "content": "Chat cleared. Ask me anything about the loaded documents!"}]
131
  st.rerun()
132
 
133
+ # --- Main Chat UI ---
134
+ st.title("πŸ” Agentic RAG Explorer")
135
+ st.caption("Powered by LangGraph & Vector Embeddings")
136
 
137
+ # Display existing chat history
138
  for message in st.session_state.messages:
139
  with st.chat_message(message["role"]):
140
  st.markdown(message["content"])
141
 
142
+ # Chat Input logic
143
  if prompt := st.chat_input("Ask a question about your documents..."):
144
+ # Show user message
145
  st.chat_message("user").markdown(prompt)
 
146
  st.session_state.messages.append({"role": "user", "content": prompt})
147
 
148
+ # Process via RAG
149
  if st.session_state.rag_system:
150
  with st.chat_message("assistant"):
151
+ with st.spinner("Agent searching knowledge base..."):
152
  try:
153
+ # Call the Agentic Graph
154
  result = st.session_state.rag_system.run(prompt)
155
+ answer = result.get('answer', "I couldn't find a definitive answer.")
156
 
157
+ st.markdown(answer)
 
158
 
159
+ # Show Source Citations
160
  if result.get('retrieved_docs'):
161
+ with st.expander("πŸ” View Referenced Context"):
162
  for i, doc in enumerate(result['retrieved_docs'], 1):
163
+ source_name = doc.metadata.get('source', 'Unknown')
164
+ page_num = doc.metadata.get('page', 'N/A')
165
+ st.markdown(f"**Source {i}:** {Path(source_name).name} (Page {page_num})")
166
+ st.info(doc.page_content[:400] + "...")
167
 
168
+ st.session_state.messages.append({"role": "assistant", "content": answer})
169
+
170
  except Exception as e:
171
+ error_text = f"An error occurred while searching: {str(e)}"
172
+ st.error(error_text)
173
+ st.session_state.messages.append({"role": "assistant", "content": error_text})
174
  else:
175
  with st.chat_message("assistant"):
176
+ st.warning("I don't have any documents in my memory yet. Please upload PDFs in the sidebar and click 'Build Knowledge Base'.")
177
 
178
  if __name__ == "__main__":
179
  main()