Dinesh310 commited on
Commit
d143793
Β·
verified Β·
1 Parent(s): 028a330

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +113 -196
streamlit_app.py CHANGED
@@ -1,238 +1,155 @@
1
- """
2
- Streamlit UI for Agentic RAG System
3
- - Default URL ingestion
4
- - Sidebar PDF upload
5
- - Incremental indexing
6
- - Question answering with sources
7
- """
8
-
9
  import streamlit as st
10
  from pathlib import Path
11
  import sys
12
  import time
13
- import os
14
 
15
- # -------------------------------------------------
16
- # Path setup
17
- # -------------------------------------------------
18
  sys.path.append(str(Path(__file__).parent))
19
 
20
- # -------------------------------------------------
21
- # Project imports
22
- # -------------------------------------------------
23
  from src.config.config import Config
24
  from src.document_ingestion.document_processor import DocumentProcessor
25
  from src.vectorstore.vectorstore import VectorStore
26
  from src.graph_builder.graph_builder import GraphBuilder
27
 
28
- # -------------------------------------------------
29
  # Page configuration
30
- # -------------------------------------------------
31
  st.set_page_config(
32
- page_title="πŸ€– Agentic RAG Search",
33
- page_icon="πŸ”",
34
- layout="centered"
35
  )
36
 
37
- # -------------------------------------------------
38
- # Simple CSS
39
- # -------------------------------------------------
40
- st.markdown(
41
- """
42
  <style>
43
- .stButton > button {
44
- width: 100%;
45
- background-color: #4CAF50;
46
- color: white;
47
- font-weight: bold;
48
- }
49
  </style>
50
- """,
51
- unsafe_allow_html=True
52
- )
53
 
54
- # -------------------------------------------------
55
- # Session state initialization
56
- # -------------------------------------------------
57
  def init_session_state():
58
- if "rag_system" not in st.session_state:
 
59
  st.session_state.rag_system = None
60
- if "initialized" not in st.session_state:
61
- st.session_state.initialized = False
62
- if "history" not in st.session_state:
63
- st.session_state.history = []
64
- if "processed_files" not in st.session_state:
65
  st.session_state.processed_files = []
 
 
66
 
67
- # -------------------------------------------------
68
- # RAG system initialization (cached)
69
- # -------------------------------------------------
70
- @st.cache_resource
71
- def initialize_rag():
72
- """
73
- Initializes RAG using default URLs.
74
- This runs ONLY once due to caching.
75
- """
76
  try:
77
- llm = Config.get_llm()
78
-
79
  doc_processor = DocumentProcessor(
80
  chunk_size=Config.CHUNK_SIZE,
81
  chunk_overlap=Config.CHUNK_OVERLAP
82
  )
83
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  vector_store = VectorStore()
85
-
86
- # Load default URLs
87
- urls = Config.DEFAULT_URLS
88
- documents = doc_processor.process_urls(urls)
89
-
90
- # Create vector store
91
- vector_store.create_vectorstore(documents)
92
-
93
- # Build agentic graph
94
  graph_builder = GraphBuilder(
95
  retriever=vector_store.get_retriever(),
96
- llm=llm
97
  )
98
  graph_builder.build()
99
-
100
- return graph_builder, vector_store, doc_processor, len(documents)
101
-
102
  except Exception as e:
103
- st.error(f"Initialization failed: {str(e)}")
104
- return None, None, None, 0
105
 
106
- # -------------------------------------------------
107
- # Main app
108
- # -------------------------------------------------
109
  def main():
110
  init_session_state()
111
-
112
- # -------------------------------
113
- # Title
114
- # -------------------------------
115
- st.title("πŸ” Agentic RAG Document Search")
116
- # st.markdown("Ask questions over default docs or uploaded PDFs")
117
-
118
- # -------------------------------
119
- # Initialize RAG system
120
- # -------------------------------
121
- if not st.session_state.initialized:
122
- with st.spinner("Loading RAG system..."):
123
- rag_system, vector_store, doc_processor, num_chunks = initialize_rag()
124
-
125
- if rag_system:
126
- st.session_state.rag_system = rag_system
127
- st.session_state.vector_store = vector_store
128
- st.session_state.doc_processor = doc_processor
129
- st.session_state.initialized = True
130
-
131
- st.success(f"βœ… System ready! ({num_chunks} chunks indexed)")
132
-
133
- # -------------------------------------------------
134
- # Sidebar: PDF Upload
135
- # -------------------------------------------------
136
- st.sidebar.header("πŸ“„ Upload Project PDFs")
137
-
138
- uploaded_files = st.sidebar.file_uploader(
139
- "Upload PDF documents",
140
- type="pdf",
141
- accept_multiple_files=True
142
- )
143
-
144
- if uploaded_files:
145
- uploaded_names = {f.name for f in uploaded_files}
146
-
147
- if (
148
- not st.session_state.processed_files
149
- or set(st.session_state.processed_files) != uploaded_names
150
- ):
151
- with st.spinner("Analyzing uploaded PDFs..."):
152
- temp_dir = "temp"
153
- os.makedirs(temp_dir, exist_ok=True)
154
-
155
- paths = []
156
- for f in uploaded_files:
157
- path = os.path.join(temp_dir, f.name)
158
- with open(path, "wb") as out:
159
- out.write(f.getbuffer())
160
- paths.append(path)
161
-
162
- # Process PDFs
163
- documents = st.session_state.doc_processor.process_pdfs(paths)
164
-
165
- # Add to existing vector store
166
- st.session_state.vector_store.add_documents(documents)
167
-
168
- # Update processed file list
169
- st.session_state.processed_files = list(uploaded_names)
170
-
171
- st.sidebar.success("πŸ“š PDFs indexed successfully!")
172
-
173
- st.markdown("---")
174
-
175
- # -------------------------------------------------
176
- # Query input
177
- # -------------------------------------------------
178
- with st.form("search_form"):
179
- question = st.text_input(
180
- "Enter your question:",
181
- placeholder="Ask something about the documents..."
182
  )
183
- submit = st.form_submit_button("πŸ” Search")
184
-
185
- # -------------------------------------------------
186
- # Query processing
187
- # -------------------------------------------------
188
- if submit and question:
189
- if st.session_state.rag_system:
190
- with st.spinner("Searching..."):
191
- start_time = time.time()
192
-
193
- result = st.session_state.rag_system.run(question)
194
-
195
- elapsed_time = time.time() - start_time
196
-
197
- # Save history
198
- st.session_state.history.append(
199
- {
200
- "question": question,
201
- "answer": result["answer"],
202
- "time": elapsed_time,
203
- }
204
- )
205
-
206
- # Display answer
207
- st.markdown("### πŸ’‘ Answer")
208
- st.success(result["answer"])
209
-
210
- # Show retrieved documents
211
- with st.expander("πŸ“„ Source Documents"):
212
- for i, doc in enumerate(result["retrieved_docs"], 1):
213
- st.text_area(
214
- f"Document {i}",
215
- doc.page_content[:300] + "...",
216
- height=100,
217
- disabled=True,
218
- )
219
-
220
- st.caption(f"⏱️ Response time: {elapsed_time:.2f} seconds")
221
-
222
- # -------------------------------------------------
223
- # Search history
224
- # -------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  if st.session_state.history:
226
  st.markdown("---")
227
- st.markdown("### πŸ“œ Recent Searches")
228
-
229
- for item in reversed(st.session_state.history[-3:]):
230
- st.markdown(f"**Q:** {item['question']}")
231
- st.markdown(f"**A:** {item['answer'][:200]}...")
232
- st.caption(f"Time: {item['time']:.2f}s")
233
 
234
- # -------------------------------------------------
235
- # Entry point
236
- # -------------------------------------------------
237
  if __name__ == "__main__":
238
- main()
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  from pathlib import Path
3
  import sys
4
  import time
 
5
 
6
+ # Add src to path
 
 
7
  sys.path.append(str(Path(__file__).parent))
8
 
 
 
 
9
  from src.config.config import Config
10
  from src.document_ingestion.document_processor import DocumentProcessor
11
  from src.vectorstore.vectorstore import VectorStore
12
  from src.graph_builder.graph_builder import GraphBuilder
13
 
 
14
  # Page configuration
 
15
  st.set_page_config(
16
+ page_title="πŸ€– PDF Agentic RAG",
17
+ page_icon="πŸ“„",
18
+ layout="wide"
19
  )
20
 
21
+ # Custom CSS for a cleaner look
22
+ st.markdown("""
 
 
 
23
  <style>
24
+ .stAlert { margin-top: 1rem; }
25
+ .stButton > button { width: 100%; border-radius: 5px; height: 3em; }
 
 
 
 
26
  </style>
27
+ """, unsafe_allow_html=True)
 
 
28
 
 
 
 
29
  def init_session_state():
30
+ """Initialize session state variables"""
31
+ if 'rag_system' not in st.session_state:
32
  st.session_state.rag_system = None
33
+ if 'processed_files' not in st.session_state:
 
 
 
 
34
  st.session_state.processed_files = []
35
+ if 'history' not in st.session_state:
36
+ st.session_state.history = []
37
 
38
+ def process_new_documents(uploaded_files):
39
+ """Processes uploaded PDFs and initializes/updates the RAG system"""
 
 
 
 
 
 
 
40
  try:
 
 
41
  doc_processor = DocumentProcessor(
42
  chunk_size=Config.CHUNK_SIZE,
43
  chunk_overlap=Config.CHUNK_OVERLAP
44
  )
45
+
46
+ # 1. Process PDFs into chunks
47
+ # Assuming your DocumentProcessor has a method for uploaded files or local paths
48
+ # If not, you may need to save them to a temp directory first
49
+ all_docs = []
50
+ for uploaded_file in uploaded_files:
51
+ # Save temp file
52
+ temp_path = Path(f"temp_{uploaded_file.name}")
53
+ with open(temp_path, "wb") as f:
54
+ f.write(uploaded_file.getvalue())
55
+
56
+ # Process (Update this call based on your DocumentProcessor's actual method)
57
+ docs = doc_processor.process_pdf(str(temp_path))
58
+ all_docs.extend(docs)
59
+
60
+ # Cleanup temp file
61
+ temp_path.unlink()
62
+
63
+ # 2. Initialize components
64
  vector_store = VectorStore()
65
+ vector_store.create_vectorstore(all_docs)
66
+
67
+ # 3. Build Graph
 
 
 
 
 
 
68
  graph_builder = GraphBuilder(
69
  retriever=vector_store.get_retriever(),
70
+ llm=Config.get_llm()
71
  )
72
  graph_builder.build()
73
+
74
+ return graph_builder, len(all_docs)
 
75
  except Exception as e:
76
+ st.error(f"Error processing documents: {str(e)}")
77
+ return None, 0
78
 
 
 
 
79
  def main():
80
  init_session_state()
81
+
82
+ # --- Sidebar: Document Upload ---
83
+ with st.sidebar:
84
+ st.title("πŸ“ Document Management")
85
+ uploaded_files = st.file_uploader(
86
+ "Upload PDF documents",
87
+ type="pdf",
88
+ accept_multiple_files=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  )
90
+
91
+ process_btn = st.button("πŸš€ Process Documents")
92
+
93
+ if process_btn and uploaded_files:
94
+ with st.spinner("Analyzing PDFs and building index..."):
95
+ rag_system, num_chunks = process_new_documents(uploaded_files)
96
+ if rag_system:
97
+ st.session_state.rag_system = rag_system
98
+ st.session_state.processed_files = [f.name for f in uploaded_files]
99
+ st.success(f"Indexed {len(uploaded_files)} files ({num_chunks} chunks)")
100
+
101
+ if st.session_state.processed_files:
102
+ st.markdown("---")
103
+ st.markdown("**Currently Loaded:**")
104
+ for f in st.session_state.processed_files:
105
+ st.caption(f"βœ… {f}")
106
+
107
+ # --- Main UI: Search ---
108
+ st.title("πŸ” Agentic RAG Search")
109
+
110
+ if not st.session_state.rag_system:
111
+ st.info("πŸ‘ˆ Please upload and process PDF documents in the sidebar to start searching.")
112
+ return
113
+
114
+ # Search interface
115
+ with st.container():
116
+ question = st.text_input("Ask a question about your documents:")
117
+ search_cols = st.columns([1, 4])
118
+ submit = search_cols[0].button("Search")
119
+
120
+ if (submit or question) and question:
121
+ with st.spinner("Agent is thinking..."):
122
+ start_time = time.time()
123
+
124
+ # Execute RAG pipeline
125
+ result = st.session_state.rag_system.run(question)
126
+
127
+ elapsed_time = time.time() - start_time
128
+
129
+ # Update History
130
+ st.session_state.history.append({
131
+ 'question': question,
132
+ 'answer': result['answer'],
133
+ 'time': elapsed_time
134
+ })
135
+
136
+ # Display results
137
+ st.markdown("### πŸ’‘ Answer")
138
+ st.write(result['answer'])
139
+
140
+ with st.expander("πŸ“„ View Source Context"):
141
+ for i, doc in enumerate(result.get('retrieved_docs', []), 1):
142
+ st.markdown(f"**Source {i}:**")
143
+ st.info(doc.page_content)
144
+
145
+ # --- History Section ---
146
  if st.session_state.history:
147
  st.markdown("---")
148
+ st.subheader("πŸ“œ Search History")
149
+ for item in reversed(st.session_state.history):
150
+ with st.expander(f"Q: {item['question']}"):
151
+ st.write(item['answer'])
152
+ st.caption(f"Response time: {item['time']:.2f}s")
 
153
 
 
 
 
154
  if __name__ == "__main__":
155
+ main()