Dinesh310 commited on
Commit
6b98cd9
Β·
verified Β·
1 Parent(s): d143793

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +65 -72
streamlit_app.py CHANGED
@@ -13,27 +13,22 @@ from src.graph_builder.graph_builder import GraphBuilder
13
 
14
  # Page configuration
15
  st.set_page_config(
16
- page_title="πŸ€– PDF Agentic RAG",
17
- page_icon="πŸ“„",
18
  layout="wide"
19
  )
20
 
21
- # Custom CSS for a cleaner look
22
- st.markdown("""
23
- <style>
24
- .stAlert { margin-top: 1rem; }
25
- .stButton > button { width: 100%; border-radius: 5px; height: 3em; }
26
- </style>
27
- """, unsafe_allow_html=True)
28
-
29
  def init_session_state():
30
- """Initialize session state variables"""
31
  if 'rag_system' not in st.session_state:
32
  st.session_state.rag_system = None
 
 
 
 
 
33
  if 'processed_files' not in st.session_state:
34
  st.session_state.processed_files = []
35
- if 'history' not in st.session_state:
36
- st.session_state.history = []
37
 
38
  def process_new_documents(uploaded_files):
39
  """Processes uploaded PDFs and initializes/updates the RAG system"""
@@ -43,28 +38,24 @@ def process_new_documents(uploaded_files):
43
  chunk_overlap=Config.CHUNK_OVERLAP
44
  )
45
 
46
- # 1. Process PDFs into chunks
47
- # Assuming your DocumentProcessor has a method for uploaded files or local paths
48
- # If not, you may need to save them to a temp directory first
49
  all_docs = []
50
  for uploaded_file in uploaded_files:
51
- # Save temp file
52
  temp_path = Path(f"temp_{uploaded_file.name}")
53
  with open(temp_path, "wb") as f:
54
  f.write(uploaded_file.getvalue())
55
 
56
- # Process (Update this call based on your DocumentProcessor's actual method)
57
  docs = doc_processor.process_pdf(str(temp_path))
58
  all_docs.extend(docs)
59
 
60
  # Cleanup temp file
61
  temp_path.unlink()
62
 
63
- # 2. Initialize components
64
  vector_store = VectorStore()
65
  vector_store.create_vectorstore(all_docs)
66
 
67
- # 3. Build Graph
68
  graph_builder = GraphBuilder(
69
  retriever=vector_store.get_retriever(),
70
  llm=Config.get_llm()
@@ -81,75 +72,77 @@ def main():
81
 
82
  # --- Sidebar: Document Upload ---
83
  with st.sidebar:
84
- st.title("πŸ“ Document Management")
85
  uploaded_files = st.file_uploader(
86
- "Upload PDF documents",
87
  type="pdf",
88
  accept_multiple_files=True
89
  )
90
 
91
- process_btn = st.button("πŸš€ Process Documents")
92
-
93
- if process_btn and uploaded_files:
94
- with st.spinner("Analyzing PDFs and building index..."):
95
  rag_system, num_chunks = process_new_documents(uploaded_files)
96
  if rag_system:
97
  st.session_state.rag_system = rag_system
98
  st.session_state.processed_files = [f.name for f in uploaded_files]
99
- st.success(f"Indexed {len(uploaded_files)} files ({num_chunks} chunks)")
 
 
 
 
 
100
 
101
  if st.session_state.processed_files:
102
  st.markdown("---")
103
- st.markdown("**Currently Loaded:**")
104
  for f in st.session_state.processed_files:
105
- st.caption(f"βœ… {f}")
 
 
 
106
 
107
- # --- Main UI: Search ---
108
- st.title("πŸ” Agentic RAG Search")
109
-
110
- if not st.session_state.rag_system:
111
- st.info("πŸ‘ˆ Please upload and process PDF documents in the sidebar to start searching.")
112
- return
113
 
114
- # Search interface
115
- with st.container():
116
- question = st.text_input("Ask a question about your documents:")
117
- search_cols = st.columns([1, 4])
118
- submit = search_cols[0].button("Search")
119
 
120
- if (submit or question) and question:
121
- with st.spinner("Agent is thinking..."):
122
- start_time = time.time()
123
-
124
- # Execute RAG pipeline
125
- result = st.session_state.rag_system.run(question)
126
-
127
- elapsed_time = time.time() - start_time
128
-
129
- # Update History
130
- st.session_state.history.append({
131
- 'question': question,
132
- 'answer': result['answer'],
133
- 'time': elapsed_time
134
- })
135
-
136
- # Display results
137
- st.markdown("### πŸ’‘ Answer")
138
- st.write(result['answer'])
139
-
140
- with st.expander("πŸ“„ View Source Context"):
141
- for i, doc in enumerate(result.get('retrieved_docs', []), 1):
142
- st.markdown(f"**Source {i}:**")
143
- st.info(doc.page_content)
144
 
145
- # --- History Section ---
146
- if st.session_state.history:
147
- st.markdown("---")
148
- st.subheader("πŸ“œ Search History")
149
- for item in reversed(st.session_state.history):
150
- with st.expander(f"Q: {item['question']}"):
151
- st.write(item['answer'])
152
- st.caption(f"Response time: {item['time']:.2f}s")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
  if __name__ == "__main__":
155
  main()
 
13
 
14
  # Page configuration
15
  st.set_page_config(
16
+ page_title="πŸ€– PDF Agentic Chat",
17
+ page_icon="πŸ’¬",
18
  layout="wide"
19
  )
20
 
 
 
 
 
 
 
 
 
21
  def init_session_state():
22
+ """Initialize session state variables for chat history and system state"""
23
  if 'rag_system' not in st.session_state:
24
  st.session_state.rag_system = None
25
+ if 'messages' not in st.session_state:
26
+ # Initialize with a greeting
27
+ st.session_state.messages = [
28
+ {"role": "assistant", "content": "Hi! Upload some PDFs in the sidebar, and I'll help you analyze them."}
29
+ ]
30
  if 'processed_files' not in st.session_state:
31
  st.session_state.processed_files = []
 
 
32
 
33
  def process_new_documents(uploaded_files):
34
  """Processes uploaded PDFs and initializes/updates the RAG system"""
 
38
  chunk_overlap=Config.CHUNK_OVERLAP
39
  )
40
 
 
 
 
41
  all_docs = []
42
  for uploaded_file in uploaded_files:
43
+ # Save temp file for processing
44
  temp_path = Path(f"temp_{uploaded_file.name}")
45
  with open(temp_path, "wb") as f:
46
  f.write(uploaded_file.getvalue())
47
 
48
+ # Use the processor to extract text and chunk
49
  docs = doc_processor.process_pdf(str(temp_path))
50
  all_docs.extend(docs)
51
 
52
  # Cleanup temp file
53
  temp_path.unlink()
54
 
55
+ # Build the vector store and graph
56
  vector_store = VectorStore()
57
  vector_store.create_vectorstore(all_docs)
58
 
 
59
  graph_builder = GraphBuilder(
60
  retriever=vector_store.get_retriever(),
61
  llm=Config.get_llm()
 
72
 
73
  # --- Sidebar: Document Upload ---
74
  with st.sidebar:
75
+ st.title("πŸ“ Document Portal")
76
  uploaded_files = st.file_uploader(
77
+ "Upload PDFs",
78
  type="pdf",
79
  accept_multiple_files=True
80
  )
81
 
82
+ if st.button("πŸš€ Index Documents") and uploaded_files:
83
+ with st.spinner("Processing documents..."):
 
 
84
  rag_system, num_chunks = process_new_documents(uploaded_files)
85
  if rag_system:
86
  st.session_state.rag_system = rag_system
87
  st.session_state.processed_files = [f.name for f in uploaded_files]
88
+ st.success(f"Successfully indexed {num_chunks} chunks.")
89
+ # Add a status message to chat
90
+ st.session_state.messages.append({
91
+ "role": "assistant",
92
+ "content": f"I've finished reading: {', '.join(st.session_state.processed_files)}. What would you like to know?"
93
+ })
94
 
95
  if st.session_state.processed_files:
96
  st.markdown("---")
97
+ st.markdown("**Active Documents:**")
98
  for f in st.session_state.processed_files:
99
+ st.caption(f"πŸ“„ {f}")
100
+ if st.button("Clear Chat History"):
101
+ st.session_state.messages = [{"role": "assistant", "content": "Chat history cleared. How can I help?"}]
102
+ st.rerun()
103
 
104
+ # --- Main Chat Interface ---
105
+ st.title("πŸ’¬ PDF AI Assistant")
 
 
 
 
106
 
107
+ # Display chat messages from history on app rerun
108
+ for message in st.session_state.messages:
109
+ with st.chat_message(message["role"]):
110
+ st.markdown(message["content"])
 
111
 
112
+ # React to user input
113
+ if prompt := st.chat_input("Ask a question about your documents..."):
114
+ # Display user message in chat message container
115
+ st.chat_message("user").markdown(prompt)
116
+ # Add user message to chat history
117
+ st.session_state.messages.append({"role": "user", "content": prompt})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
+ # Generate response
120
+ if st.session_state.rag_system:
121
+ with st.chat_message("assistant"):
122
+ with st.spinner("Thinking..."):
123
+ try:
124
+ # Run the Agentic RAG pipeline
125
+ result = st.session_state.rag_system.run(prompt)
126
+ response = result['answer']
127
+
128
+ # Display response
129
+ st.markdown(response)
130
+
131
+ # Optional: Show sources in an expader inside the bubble
132
+ if result.get('retrieved_docs'):
133
+ with st.expander("View Sources"):
134
+ for i, doc in enumerate(result['retrieved_docs'], 1):
135
+ st.markdown(f"**Source {i}:**\n{doc.page_content[:500]}...")
136
+
137
+ # Add assistant response to chat history
138
+ st.session_state.messages.append({"role": "assistant", "content": response})
139
+ except Exception as e:
140
+ error_msg = f"I encountered an error: {str(e)}"
141
+ st.error(error_msg)
142
+ st.session_state.messages.append({"role": "assistant", "content": error_msg})
143
+ else:
144
+ with st.chat_message("assistant"):
145
+ st.warning("Please upload and index some PDFs in the sidebar first!")
146
 
147
  if __name__ == "__main__":
148
  main()