Dinesh310 commited on
Commit
383ee63
Β·
verified Β·
1 Parent(s): 8586611

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +118 -166
streamlit_app.py CHANGED
@@ -1,12 +1,11 @@
 
 
1
  import streamlit as st
2
  from pathlib import Path
3
  import sys
4
- import os
5
- import hashlib
6
 
7
- # -------------------------------------------------
8
- # Path setup
9
- # -------------------------------------------------
10
  sys.path.append(str(Path(__file__).parent))
11
 
12
  from src.config.config import Config
@@ -14,186 +13,139 @@ from src.document_ingestion.document_processor import DocumentProcessor
14
  from src.vectorstore.vectorstore import VectorStore
15
  from src.graph_builder.graph_builder import GraphBuilder
16
 
17
- # -------------------------------------------------
18
- # Page config
19
- # -------------------------------------------------
20
  st.set_page_config(
21
- page_title="Agentic PDF RAG",
22
- page_icon="🧠",
23
- layout="wide"
24
  )
25
 
26
- # -------------------------------------------------
27
- # Styles
28
- # -------------------------------------------------
29
  st.markdown("""
30
- <style>
31
- .stChatMessage { border-radius: 10px; margin-bottom: 10px; }
32
- .stSidebar { background-color: #f8f9fa; }
33
- </style>
 
 
 
 
34
  """, unsafe_allow_html=True)
35
 
36
- # -------------------------------------------------
37
- # Session state
38
- # -------------------------------------------------
39
  def init_session_state():
40
- defaults = {
41
- "rag_system": None,
42
- "messages": [
43
- {"role": "assistant", "content": "πŸ‘‹ Upload one or more PDFs from the sidebar and start chatting across them."}
44
- ],
45
- "processed_files": [],
46
- "kb_hash": None
47
- }
48
- for k, v in defaults.items():
49
- if k not in st.session_state:
50
- st.session_state[k] = v
51
-
52
- # -------------------------------------------------
53
- # Helpers
54
- # -------------------------------------------------
55
- def compute_files_hash(files):
56
- """Prevents rebuilding KB for same PDFs"""
57
- hasher = hashlib.md5()
58
- for f in files:
59
- hasher.update(f.name.encode())
60
- hasher.update(f.getvalue())
61
- return hasher.hexdigest()
62
-
63
- def process_documents(uploaded_files):
64
- """
65
- Ingests multiple PDFs into ONE knowledge base
66
- """
67
  try:
68
- processor = DocumentProcessor(
 
 
69
  chunk_size=Config.CHUNK_SIZE,
70
  chunk_overlap=Config.CHUNK_OVERLAP
71
  )
72
-
73
- temp_dir = Path("temp_uploads")
74
- temp_dir.mkdir(exist_ok=True)
75
-
76
- all_docs = []
77
-
78
- for file in uploaded_files:
79
- temp_path = temp_dir / file.name
80
- with open(temp_path, "wb") as f:
81
- f.write(file.getvalue())
82
-
83
- docs = processor.process_pdf(str(temp_path))
84
-
85
- # Ensure filename metadata exists
86
- for d in docs:
87
- d.metadata["source"] = file.name
88
-
89
- all_docs.extend(docs)
90
- os.remove(temp_path)
91
-
92
- if not all_docs:
93
- return None, 0
94
-
95
  vector_store = VectorStore()
96
- vector_store.create_vectorstore(all_docs)
97
-
98
- graph = GraphBuilder(
 
 
 
 
 
 
 
 
 
99
  retriever=vector_store.get_retriever(),
100
- llm=Config.get_llm()
101
  )
102
- graph.build()
103
-
104
- return graph, len(all_docs)
105
-
106
  except Exception as e:
107
- st.error(f"Ingestion failed: {e}")
108
  return None, 0
109
 
110
- # -------------------------------------------------
111
- # Main app
112
- # -------------------------------------------------
113
  def main():
 
114
  init_session_state()
115
-
116
- # ---------------- Sidebar ----------------
117
- with st.sidebar:
118
- st.header("πŸ“„ Document Ingestion")
119
-
120
- uploaded_files = st.file_uploader(
121
- "Upload PDF files",
122
- type="pdf",
123
- accept_multiple_files=True
 
 
 
 
 
 
 
 
 
 
 
 
124
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
- if st.button("πŸ› οΈ Build Knowledge Base", type="primary"):
127
- if not uploaded_files:
128
- st.warning("Upload at least one PDF.")
129
- else:
130
- new_hash = compute_files_hash(uploaded_files)
131
-
132
- if new_hash == st.session_state.kb_hash:
133
- st.info("Knowledge base already built for these PDFs.")
134
- else:
135
- with st.spinner("Indexing PDFs and building agent graph..."):
136
- rag, chunks = process_documents(uploaded_files)
137
-
138
- if rag:
139
- st.session_state.rag_system = rag
140
- st.session_state.processed_files = [f.name for f in uploaded_files]
141
- st.session_state.kb_hash = new_hash
142
-
143
- msg = (
144
- f"βœ… Knowledge base ready!\n\n"
145
- f"Indexed **{chunks} chunks** from:\n"
146
- + "\n".join(f"- {f}" for f in st.session_state.processed_files)
147
- )
148
- st.session_state.messages.append({"role": "assistant", "content": msg})
149
- st.rerun()
150
-
151
- if st.session_state.processed_files:
152
- st.markdown("---")
153
- st.subheader("πŸ“š Loaded PDFs")
154
- for f in st.session_state.processed_files:
155
- st.caption(f"βœ” {f}")
156
-
157
- if st.button("🧹 Clear Chat"):
158
- st.session_state.messages = [
159
- {"role": "assistant", "content": "Chat cleared. Ask anything about the loaded PDFs!"}
160
- ]
161
- st.rerun()
162
-
163
- # ---------------- Main Chat ----------------
164
- st.title("πŸ” Agentic Multi-PDF Chat")
165
- st.caption("Ask questions across all uploaded documents")
166
-
167
- for msg in st.session_state.messages:
168
- with st.chat_message(msg["role"]):
169
- st.markdown(msg["content"])
170
-
171
- if prompt := st.chat_input("Ask a question across all PDFs..."):
172
- st.session_state.messages.append({"role": "user", "content": prompt})
173
- st.chat_message("user").markdown(prompt)
174
-
175
- if not st.session_state.rag_system:
176
- st.warning("Build the knowledge base first.")
177
- return
178
-
179
- with st.chat_message("assistant"):
180
- with st.spinner("Thinking across documents..."):
181
- result = st.session_state.rag_system.run(prompt)
182
-
183
- answer = result.get("answer", "No clear answer found.")
184
- st.markdown(answer)
185
-
186
- if result.get("retrieved_docs"):
187
- with st.expander("πŸ“Œ Sources"):
188
- for i, doc in enumerate(result["retrieved_docs"], 1):
189
- st.markdown(
190
- f"**{i}. {doc.metadata.get('source', 'Unknown')} "
191
- f"(Page {doc.metadata.get('page', 'N/A')})**"
192
- )
193
- st.info(doc.page_content[:400] + "...")
194
-
195
- st.session_state.messages.append({"role": "assistant", "content": answer})
196
-
197
- # -------------------------------------------------
198
  if __name__ == "__main__":
199
- main()
 
1
+ """Streamlit UI for Agentic RAG System - Simplified Version"""
2
+
3
  import streamlit as st
4
  from pathlib import Path
5
  import sys
6
+ import time
 
7
 
8
+ # Add src to path
 
 
9
  sys.path.append(str(Path(__file__).parent))
10
 
11
  from src.config.config import Config
 
13
  from src.vectorstore.vectorstore import VectorStore
14
  from src.graph_builder.graph_builder import GraphBuilder
15
 
16
+ # Page configuration
 
 
17
  st.set_page_config(
18
+ page_title="πŸ€– RAG Search",
19
+ page_icon="πŸ”",
20
+ layout="centered"
21
  )
22
 
23
+ # Simple CSS
 
 
24
  st.markdown("""
25
+ <style>
26
+ .stButton > button {
27
+ width: 100%;
28
+ background-color: #4CAF50;
29
+ color: white;
30
+ font-weight: bold;
31
+ }
32
+ </style>
33
  """, unsafe_allow_html=True)
34
 
 
 
 
35
  def init_session_state():
36
+ """Initialize session state variables"""
37
+ if 'rag_system' not in st.session_state:
38
+ st.session_state.rag_system = None
39
+ if 'initialized' not in st.session_state:
40
+ st.session_state.initialized = False
41
+ if 'history' not in st.session_state:
42
+ st.session_state.history = []
43
+
44
+ @st.cache_resource
45
+ def initialize_rag():
46
+ """Initialize the RAG system (cached)"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  try:
48
+ # Initialize components
49
+ llm = Config.get_llm()
50
+ doc_processor = DocumentProcessor(
51
  chunk_size=Config.CHUNK_SIZE,
52
  chunk_overlap=Config.CHUNK_OVERLAP
53
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  vector_store = VectorStore()
55
+
56
+ # Use default URLs
57
+ urls = Config.DEFAULT_URLS
58
+
59
+ # Process documents
60
+ documents = doc_processor.process_urls(urls)
61
+
62
+ # Create vector store
63
+ vector_store.create_vectorstore(documents)
64
+
65
+ # Build graph
66
+ graph_builder = GraphBuilder(
67
  retriever=vector_store.get_retriever(),
68
+ llm=llm
69
  )
70
+ graph_builder.build()
71
+
72
+ return graph_builder, len(documents)
 
73
  except Exception as e:
74
+ st.error(f"Failed to initialize: {str(e)}")
75
  return None, 0
76
 
 
 
 
77
  def main():
78
+ """Main application"""
79
  init_session_state()
80
+
81
+ # Title
82
+ st.title("πŸ” RAG Document Search")
83
+ st.markdown("Ask questions about the loaded documents")
84
+
85
+ # Initialize system
86
+ if not st.session_state.initialized:
87
+ with st.spinner("Loading system..."):
88
+ rag_system, num_chunks = initialize_rag()
89
+ if rag_system:
90
+ st.session_state.rag_system = rag_system
91
+ st.session_state.initialized = True
92
+ st.success(f"βœ… System ready! ({num_chunks} document chunks loaded)")
93
+
94
+ st.markdown("---")
95
+
96
+ # Search interface
97
+ with st.form("search_form"):
98
+ question = st.text_input(
99
+ "Enter your question:",
100
+ placeholder="What would you like to know?"
101
  )
102
+ submit = st.form_submit_button("πŸ” Search")
103
+
104
+ # Process search
105
+ if submit and question:
106
+ if st.session_state.rag_system:
107
+ with st.spinner("Searching..."):
108
+ start_time = time.time()
109
+
110
+ # Get answer
111
+ result = st.session_state.rag_system.run(question)
112
+
113
+ elapsed_time = time.time() - start_time
114
+
115
+ # Add to history
116
+ st.session_state.history.append({
117
+ 'question': question,
118
+ 'answer': result['answer'],
119
+ 'time': elapsed_time
120
+ })
121
+
122
+ # Display answer
123
+ st.markdown("### πŸ’‘ Answer")
124
+ st.success(result['answer'])
125
+
126
+ # Show retrieved docs in expander
127
+ with st.expander("πŸ“„ Source Documents"):
128
+ for i, doc in enumerate(result['retrieved_docs'], 1):
129
+ st.text_area(
130
+ f"Document {i}",
131
+ doc.page_content[:300] + "...",
132
+ height=100,
133
+ disabled=True
134
+ )
135
+
136
+ st.caption(f"⏱️ Response time: {elapsed_time:.2f} seconds")
137
+
138
+ # Show history
139
+ if st.session_state.history:
140
+ st.markdown("---")
141
+ st.markdown("### πŸ“œ Recent Searches")
142
+
143
+ for item in reversed(st.session_state.history[-3:]): # Show last 3
144
+ with st.container():
145
+ st.markdown(f"**Q:** {item['question']}")
146
+ st.markdown(f"**A:** {item['answer'][:200]}...")
147
+ st.caption(f"Time: {item['time']:.2f}s")
148
+ st.markdown("")
149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  if __name__ == "__main__":
151
+ main()