Dinesh310 commited on
Commit
c00effc
Β·
verified Β·
1 Parent(s): a9cc27c

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +238 -151
streamlit_app.py CHANGED
@@ -1,151 +1,238 @@
1
- """Streamlit UI for Agentic RAG System - Simplified Version"""
2
-
3
- import streamlit as st
4
- from pathlib import Path
5
- import sys
6
- import time
7
-
8
- # Add src to path
9
- sys.path.append(str(Path(__file__).parent))
10
-
11
- from src.config.config import Config
12
- from src.document_ingestion.document_processor import DocumentProcessor
13
- from src.vectorstore.vectorstore import VectorStore
14
- from src.graph_builder.graph_builder import GraphBuilder
15
-
16
- # Page configuration
17
- st.set_page_config(
18
- page_title="πŸ€– RAG Search",
19
- page_icon="πŸ”",
20
- layout="centered"
21
- )
22
-
23
- # Simple CSS
24
- st.markdown("""
25
- <style>
26
- .stButton > button {
27
- width: 100%;
28
- background-color: #4CAF50;
29
- color: white;
30
- font-weight: bold;
31
- }
32
- </style>
33
- """, unsafe_allow_html=True)
34
-
35
- def init_session_state():
36
- """Initialize session state variables"""
37
- if 'rag_system' not in st.session_state:
38
- st.session_state.rag_system = None
39
- if 'initialized' not in st.session_state:
40
- st.session_state.initialized = False
41
- if 'history' not in st.session_state:
42
- st.session_state.history = []
43
-
44
- @st.cache_resource
45
- def initialize_rag():
46
- """Initialize the RAG system (cached)"""
47
- try:
48
- # Initialize components
49
- llm = Config.get_llm()
50
- doc_processor = DocumentProcessor(
51
- chunk_size=Config.CHUNK_SIZE,
52
- chunk_overlap=Config.CHUNK_OVERLAP
53
- )
54
- vector_store = VectorStore()
55
-
56
- # Use default URLs
57
- urls = Config.DEFAULT_URLS
58
-
59
- # Process documents
60
- documents = doc_processor.process_urls(urls)
61
-
62
- # Create vector store
63
- vector_store.create_vectorstore(documents)
64
-
65
- # Build graph
66
- graph_builder = GraphBuilder(
67
- retriever=vector_store.get_retriever(),
68
- llm=llm
69
- )
70
- graph_builder.build()
71
-
72
- return graph_builder, len(documents)
73
- except Exception as e:
74
- st.error(f"Failed to initialize: {str(e)}")
75
- return None, 0
76
-
77
- def main():
78
- """Main application"""
79
- init_session_state()
80
-
81
- # Title
82
- st.title("πŸ” RAG Document Search")
83
- st.markdown("Ask questions about the loaded documents")
84
-
85
- # Initialize system
86
- if not st.session_state.initialized:
87
- with st.spinner("Loading system..."):
88
- rag_system, num_chunks = initialize_rag()
89
- if rag_system:
90
- st.session_state.rag_system = rag_system
91
- st.session_state.initialized = True
92
- st.success(f"βœ… System ready! ({num_chunks} document chunks loaded)")
93
-
94
- st.markdown("---")
95
-
96
- # Search interface
97
- with st.form("search_form"):
98
- question = st.text_input(
99
- "Enter your question:",
100
- placeholder="What would you like to know?"
101
- )
102
- submit = st.form_submit_button("πŸ” Search")
103
-
104
- # Process search
105
- if submit and question:
106
- if st.session_state.rag_system:
107
- with st.spinner("Searching..."):
108
- start_time = time.time()
109
-
110
- # Get answer
111
- result = st.session_state.rag_system.run(question)
112
-
113
- elapsed_time = time.time() - start_time
114
-
115
- # Add to history
116
- st.session_state.history.append({
117
- 'question': question,
118
- 'answer': result['answer'],
119
- 'time': elapsed_time
120
- })
121
-
122
- # Display answer
123
- st.markdown("### πŸ’‘ Answer")
124
- st.success(result['answer'])
125
-
126
- # Show retrieved docs in expander
127
- with st.expander("πŸ“„ Source Documents"):
128
- for i, doc in enumerate(result['retrieved_docs'], 1):
129
- st.text_area(
130
- f"Document {i}",
131
- doc.page_content[:300] + "...",
132
- height=100,
133
- disabled=True
134
- )
135
-
136
- st.caption(f"⏱️ Response time: {elapsed_time:.2f} seconds")
137
-
138
- # Show history
139
- if st.session_state.history:
140
- st.markdown("---")
141
- st.markdown("### πŸ“œ Recent Searches")
142
-
143
- for item in reversed(st.session_state.history[-3:]): # Show last 3
144
- with st.container():
145
- st.markdown(f"**Q:** {item['question']}")
146
- st.markdown(f"**A:** {item['answer'][:200]}...")
147
- st.caption(f"Time: {item['time']:.2f}s")
148
- st.markdown("")
149
-
150
- if __name__ == "__main__":
151
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Streamlit UI for Agentic RAG System
3
+ - Default URL ingestion
4
+ - Sidebar PDF upload
5
+ - Incremental indexing
6
+ - Question answering with sources
7
+ """
8
+
9
+ import streamlit as st
10
+ from pathlib import Path
11
+ import sys
12
+ import time
13
+ import os
14
+
15
+ # -------------------------------------------------
16
+ # Path setup
17
+ # -------------------------------------------------
18
+ sys.path.append(str(Path(__file__).parent))
19
+
20
+ # -------------------------------------------------
21
+ # Project imports
22
+ # -------------------------------------------------
23
+ from src.config.config import Config
24
+ from src.document_ingestion.document_processor import DocumentProcessor
25
+ from src.vectorstore.vectorstore import VectorStore
26
+ from src.graph_builder.graph_builder import GraphBuilder
27
+
28
+ # -------------------------------------------------
29
+ # Page configuration
30
+ # -------------------------------------------------
31
+ st.set_page_config(
32
+ page_title="πŸ€– Agentic RAG Search",
33
+ page_icon="πŸ”",
34
+ layout="centered"
35
+ )
36
+
37
+ # -------------------------------------------------
38
+ # Simple CSS
39
+ # -------------------------------------------------
40
+ st.markdown(
41
+ """
42
+ <style>
43
+ .stButton > button {
44
+ width: 100%;
45
+ background-color: #4CAF50;
46
+ color: white;
47
+ font-weight: bold;
48
+ }
49
+ </style>
50
+ """,
51
+ unsafe_allow_html=True
52
+ )
53
+
54
+ # -------------------------------------------------
55
+ # Session state initialization
56
+ # -------------------------------------------------
57
+ def init_session_state():
58
+ if "rag_system" not in st.session_state:
59
+ st.session_state.rag_system = None
60
+ if "initialized" not in st.session_state:
61
+ st.session_state.initialized = False
62
+ if "history" not in st.session_state:
63
+ st.session_state.history = []
64
+ if "processed_files" not in st.session_state:
65
+ st.session_state.processed_files = []
66
+
67
+ # -------------------------------------------------
68
+ # RAG system initialization (cached)
69
+ # -------------------------------------------------
70
+ @st.cache_resource
71
+ def initialize_rag():
72
+ """
73
+ Initializes RAG using default URLs.
74
+ This runs ONLY once due to caching.
75
+ """
76
+ try:
77
+ llm = Config.get_llm()
78
+
79
+ doc_processor = DocumentProcessor(
80
+ chunk_size=Config.CHUNK_SIZE,
81
+ chunk_overlap=Config.CHUNK_OVERLAP
82
+ )
83
+
84
+ vector_store = VectorStore()
85
+
86
+ # Load default URLs
87
+ urls = Config.DEFAULT_URLS
88
+ documents = doc_processor.process_urls(urls)
89
+
90
+ # Create vector store
91
+ vector_store.create_vectorstore(documents)
92
+
93
+ # Build agentic graph
94
+ graph_builder = GraphBuilder(
95
+ retriever=vector_store.get_retriever(),
96
+ llm=llm
97
+ )
98
+ graph_builder.build()
99
+
100
+ return graph_builder, vector_store, doc_processor, len(documents)
101
+
102
+ except Exception as e:
103
+ st.error(f"Initialization failed: {str(e)}")
104
+ return None, None, None, 0
105
+
106
+ # -------------------------------------------------
107
+ # Main app
108
+ # -------------------------------------------------
109
+ def main():
110
+ init_session_state()
111
+
112
+ # -------------------------------
113
+ # Title
114
+ # -------------------------------
115
+ st.title("πŸ” Agentic RAG Document Search")
116
+ st.markdown("Ask questions over default docs or uploaded PDFs")
117
+
118
+ # -------------------------------
119
+ # Initialize RAG system
120
+ # -------------------------------
121
+ if not st.session_state.initialized:
122
+ with st.spinner("Loading RAG system..."):
123
+ rag_system, vector_store, doc_processor, num_chunks = initialize_rag()
124
+
125
+ if rag_system:
126
+ st.session_state.rag_system = rag_system
127
+ st.session_state.vector_store = vector_store
128
+ st.session_state.doc_processor = doc_processor
129
+ st.session_state.initialized = True
130
+
131
+ st.success(f"βœ… System ready! ({num_chunks} chunks indexed)")
132
+
133
+ # -------------------------------------------------
134
+ # Sidebar: PDF Upload
135
+ # -------------------------------------------------
136
+ st.sidebar.header("πŸ“„ Upload Project PDFs")
137
+
138
+ uploaded_files = st.sidebar.file_uploader(
139
+ "Upload PDF documents",
140
+ type="pdf",
141
+ accept_multiple_files=True
142
+ )
143
+
144
+ if uploaded_files:
145
+ uploaded_names = {f.name for f in uploaded_files}
146
+
147
+ if (
148
+ not st.session_state.processed_files
149
+ or set(st.session_state.processed_files) != uploaded_names
150
+ ):
151
+ with st.spinner("Analyzing uploaded PDFs..."):
152
+ temp_dir = "temp"
153
+ os.makedirs(temp_dir, exist_ok=True)
154
+
155
+ paths = []
156
+ for f in uploaded_files:
157
+ path = os.path.join(temp_dir, f.name)
158
+ with open(path, "wb") as out:
159
+ out.write(f.getbuffer())
160
+ paths.append(path)
161
+
162
+ # Process PDFs
163
+ documents = st.session_state.doc_processor.process_pdfs(paths)
164
+
165
+ # Add to existing vector store
166
+ st.session_state.vector_store.add_documents(documents)
167
+
168
+ # Update processed file list
169
+ st.session_state.processed_files = list(uploaded_names)
170
+
171
+ st.sidebar.success("πŸ“š PDFs indexed successfully!")
172
+
173
+ st.markdown("---")
174
+
175
+ # -------------------------------------------------
176
+ # Query input
177
+ # -------------------------------------------------
178
+ with st.form("search_form"):
179
+ question = st.text_input(
180
+ "Enter your question:",
181
+ placeholder="Ask something about the documents..."
182
+ )
183
+ submit = st.form_submit_button("πŸ” Search")
184
+
185
+ # -------------------------------------------------
186
+ # Query processing
187
+ # -------------------------------------------------
188
+ if submit and question:
189
+ if st.session_state.rag_system:
190
+ with st.spinner("Searching..."):
191
+ start_time = time.time()
192
+
193
+ result = st.session_state.rag_system.run(question)
194
+
195
+ elapsed_time = time.time() - start_time
196
+
197
+ # Save history
198
+ st.session_state.history.append(
199
+ {
200
+ "question": question,
201
+ "answer": result["answer"],
202
+ "time": elapsed_time,
203
+ }
204
+ )
205
+
206
+ # Display answer
207
+ st.markdown("### πŸ’‘ Answer")
208
+ st.success(result["answer"])
209
+
210
+ # Show retrieved documents
211
+ with st.expander("πŸ“„ Source Documents"):
212
+ for i, doc in enumerate(result["retrieved_docs"], 1):
213
+ st.text_area(
214
+ f"Document {i}",
215
+ doc.page_content[:300] + "...",
216
+ height=100,
217
+ disabled=True,
218
+ )
219
+
220
+ st.caption(f"⏱️ Response time: {elapsed_time:.2f} seconds")
221
+
222
+ # -------------------------------------------------
223
+ # Search history
224
+ # -------------------------------------------------
225
+ if st.session_state.history:
226
+ st.markdown("---")
227
+ st.markdown("### πŸ“œ Recent Searches")
228
+
229
+ for item in reversed(st.session_state.history[-3:]):
230
+ st.markdown(f"**Q:** {item['question']}")
231
+ st.markdown(f"**A:** {item['answer'][:200]}...")
232
+ st.caption(f"Time: {item['time']:.2f}s")
233
+
234
+ # -------------------------------------------------
235
+ # Entry point
236
+ # -------------------------------------------------
237
+ if __name__ == "__main__":
238
+ main()