Dinesh310 commited on
Commit
23a41be
Β·
verified Β·
1 Parent(s): 35bacbf

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +166 -145
streamlit_app.py CHANGED
@@ -1,151 +1,172 @@
1
- """Streamlit UI for Agentic RAG System - Simplified Version"""
2
 
3
  import streamlit as st
4
- from pathlib import Path
5
- import sys
6
- import time
7
-
8
- # Add src to path
9
- sys.path.append(str(Path(__file__).parent))
10
-
11
- from src.config.config import Config
12
- from src.document_ingestion.document_processor import DocumentProcessor
13
- from src.vectorstore.vectorstore import VectorStore
14
- from src.graph_builder.graph_builder import GraphBuilder
15
-
16
- # Page configuration
17
- st.set_page_config(
18
- page_title="πŸ€– RAG Search",
19
- page_icon="πŸ”",
20
- layout="centered"
21
  )
22
 
23
- # Simple CSS
24
- st.markdown("""
25
- <style>
26
- .stButton > button {
27
- width: 100%;
28
- background-color: #4CAF50;
29
- color: white;
30
- font-weight: bold;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  }
32
- </style>
33
- """, unsafe_allow_html=True)
34
-
35
- def init_session_state():
36
- """Initialize session state variables"""
37
- if 'rag_system' not in st.session_state:
38
- st.session_state.rag_system = None
39
- if 'initialized' not in st.session_state:
40
- st.session_state.initialized = False
41
- if 'history' not in st.session_state:
42
- st.session_state.history = []
43
-
44
- @st.cache_resource
45
- def initialize_rag():
46
- """Initialize the RAG system (cached)"""
47
- try:
48
- # Initialize components
49
- llm = Config.get_llm()
50
- doc_processor = DocumentProcessor(
51
- chunk_size=Config.CHUNK_SIZE,
52
- chunk_overlap=Config.CHUNK_OVERLAP
53
- )
54
- vector_store = VectorStore()
55
-
56
- # Use default URLs
57
- urls = Config.DEFAULT_URLS
58
-
59
- # Process documents
60
- documents = doc_processor.process_urls(urls)
61
-
62
- # Create vector store
63
- vector_store.create_vectorstore(documents)
64
-
65
- # Build graph
66
- graph_builder = GraphBuilder(
67
- retriever=vector_store.get_retriever(),
68
- llm=llm
69
- )
70
- graph_builder.build()
71
-
72
- return graph_builder, len(documents)
73
- except Exception as e:
74
- st.error(f"Failed to initialize: {str(e)}")
75
- return None, 0
76
-
77
- def main():
78
- """Main application"""
79
- init_session_state()
80
-
81
- # Title
82
- st.title("πŸ” RAG Document Search")
83
- st.markdown("Ask questions about the loaded documents")
84
-
85
- # Initialize system
86
- if not st.session_state.initialized:
87
- with st.spinner("Loading system..."):
88
- rag_system, num_chunks = initialize_rag()
89
- if rag_system:
90
- st.session_state.rag_system = rag_system
91
- st.session_state.initialized = True
92
- st.success(f"βœ… System ready! ({num_chunks} document chunks loaded)")
93
-
94
- st.markdown("---")
95
-
96
- # Search interface
97
- with st.form("search_form"):
98
- question = st.text_input(
99
- "Enter your question:",
100
- placeholder="What would you like to know?"
101
- )
102
- submit = st.form_submit_button("πŸ” Search")
103
-
104
- # Process search
105
- if submit and question:
106
- if st.session_state.rag_system:
107
- with st.spinner("Searching..."):
108
- start_time = time.time()
109
-
110
- # Get answer
111
- result = st.session_state.rag_system.run(question)
112
-
113
- elapsed_time = time.time() - start_time
114
-
115
- # Add to history
116
- st.session_state.history.append({
117
- 'question': question,
118
- 'answer': result['answer'],
119
- 'time': elapsed_time
120
- })
121
-
122
- # Display answer
123
- st.markdown("### πŸ’‘ Answer")
124
- st.success(result['answer'])
125
-
126
- # Show retrieved docs in expander
127
- with st.expander("πŸ“„ Source Documents"):
128
- for i, doc in enumerate(result['retrieved_docs'], 1):
129
- st.text_area(
130
- f"Document {i}",
131
- doc.page_content[:300] + "...",
132
- height=100,
133
- disabled=True
134
  )
135
-
136
- st.caption(f"⏱️ Response time: {elapsed_time:.2f} seconds")
137
-
138
- # Show history
139
- if st.session_state.history:
140
- st.markdown("---")
141
- st.markdown("### πŸ“œ Recent Searches")
142
-
143
- for item in reversed(st.session_state.history[-3:]): # Show last 3
144
- with st.container():
145
- st.markdown(f"**Q:** {item['question']}")
146
- st.markdown(f"**A:** {item['answer'][:200]}...")
147
- st.caption(f"Time: {item['time']:.2f}s")
148
- st.markdown("")
149
-
150
- if __name__ == "__main__":
151
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uuid
2
 
3
  import streamlit as st
4
+ from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
5
+
6
+ from src.langraph_rag_backend import (
7
+ chatbot,
8
+ ingest_pdf,
9
+ retrieve_all_threads,
10
+ thread_document_metadata,
 
 
 
 
 
 
 
 
 
 
11
  )
12
 
13
+
14
+ # =========================== Utilities ===========================
15
+ def generate_thread_id():
16
+ return uuid.uuid4()
17
+
18
+
19
+ def reset_chat():
20
+ thread_id = generate_thread_id()
21
+ st.session_state["thread_id"] = thread_id
22
+ add_thread(thread_id)
23
+ st.session_state["message_history"] = []
24
+
25
+
26
+ def add_thread(thread_id):
27
+ if thread_id not in st.session_state["chat_threads"]:
28
+ st.session_state["chat_threads"].append(thread_id)
29
+
30
+
31
+ def load_conversation(thread_id):
32
+ state = chatbot.get_state(config={"configurable": {"thread_id": thread_id}})
33
+ return state.values.get("messages", [])
34
+
35
+
36
+ # ======================= Session Initialization ===================
37
+ if "message_history" not in st.session_state:
38
+ st.session_state["message_history"] = []
39
+
40
+ if "thread_id" not in st.session_state:
41
+ st.session_state["thread_id"] = generate_thread_id()
42
+
43
+ if "chat_threads" not in st.session_state:
44
+ st.session_state["chat_threads"] = retrieve_all_threads()
45
+
46
+ if "ingested_docs" not in st.session_state:
47
+ st.session_state["ingested_docs"] = {}
48
+
49
+ add_thread(st.session_state["thread_id"])
50
+
51
+ thread_key = str(st.session_state["thread_id"])
52
+ thread_docs = st.session_state["ingested_docs"].setdefault(thread_key, {})
53
+ threads = st.session_state["chat_threads"][::-1]
54
+ selected_thread = None
55
+
56
+ # ============================ Sidebar ============================
57
+ st.sidebar.title("LangGraph PDF Chatbot")
58
+ st.sidebar.markdown(f"**Thread ID:** `{thread_key}`")
59
+
60
+ if st.sidebar.button("New Chat", use_container_width=True):
61
+ reset_chat()
62
+ st.rerun()
63
+
64
+ if thread_docs:
65
+ latest_doc = list(thread_docs.values())[-1]
66
+ st.sidebar.success(
67
+ f"Using `{latest_doc.get('filename')}` "
68
+ f"({latest_doc.get('chunks')} chunks from {latest_doc.get('documents')} pages)"
69
+ )
70
+ else:
71
+ st.sidebar.info("No PDF indexed yet.")
72
+
73
+ uploaded_pdfs = st.sidebar.file_uploader("Upload a PDF for this chat", type=["pdf"], accept_multiple_files=True)
74
+ if uploaded_pdfs:
75
+ for uploaded_pdf in uploaded_pdfs:
76
+ if uploaded_pdf.name in thread_docs:
77
+ st.sidebar.info(f"`{uploaded_pdf.name}` already processed for this chat.")
78
+ else:
79
+ with st.sidebar.status("Indexing PDF…", expanded=True) as status_box:
80
+ summary = ingest_pdf(
81
+ uploaded_pdf.getvalue(),
82
+ thread_id=thread_key,
83
+ filename=uploaded_pdf.name,
84
+ )
85
+ thread_docs[uploaded_pdf.name] = summary
86
+ status_box.update(label="βœ… PDF indexed", state="complete", expanded=False)
87
+
88
+ st.sidebar.subheader("Past conversations")
89
+ if not threads:
90
+ st.sidebar.write("No past conversations yet.")
91
+ else:
92
+ for thread_id in threads:
93
+ if st.sidebar.button(str(thread_id), key=f"side-thread-{thread_id}"):
94
+ selected_thread = thread_id
95
+
96
+ # ============================ Main Layout ========================
97
+ st.title("Multi Utility Chatbot")
98
+
99
+ # Chat area
100
+ for message in st.session_state["message_history"]:
101
+ with st.chat_message(message["role"]):
102
+ st.text(message["content"])
103
+
104
+ user_input = st.chat_input("Ask about your document or use tools")
105
+
106
+ if user_input:
107
+ st.session_state["message_history"].append({"role": "user", "content": user_input})
108
+ with st.chat_message("user"):
109
+ st.text(user_input)
110
+
111
+ CONFIG = {
112
+ "configurable": {"thread_id": thread_key},
113
+ "metadata": {"thread_id": thread_key},
114
+ "run_name": "chat_turn",
115
  }
116
+
117
+ with st.chat_message("assistant"):
118
+ status_holder = {"box": None}
119
+
120
+ def ai_only_stream():
121
+ for message_chunk, _ in chatbot.stream(
122
+ {"messages": [HumanMessage(content=user_input)]},
123
+ config=CONFIG,
124
+ stream_mode="messages",
125
+ ):
126
+ if isinstance(message_chunk, ToolMessage):
127
+ tool_name = getattr(message_chunk, "name", "tool")
128
+ if status_holder["box"] is None:
129
+ status_holder["box"] = st.status(
130
+ f"πŸ”§ Using `{tool_name}` …", expanded=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  )
132
+ else:
133
+ status_holder["box"].update(
134
+ label=f"πŸ”§ Using `{tool_name}` …",
135
+ state="running",
136
+ expanded=True,
137
+ )
138
+
139
+ if isinstance(message_chunk, AIMessage):
140
+ yield message_chunk.content
141
+
142
+ ai_message = st.write_stream(ai_only_stream())
143
+
144
+ if status_holder["box"] is not None:
145
+ status_holder["box"].update(
146
+ label="βœ… Tool finished", state="complete", expanded=False
147
+ )
148
+
149
+ st.session_state["message_history"].append(
150
+ {"role": "assistant", "content": ai_message}
151
+ )
152
+
153
+ doc_meta = thread_document_metadata(thread_key)
154
+ if doc_meta:
155
+ st.caption(
156
+ f"Document indexed: {doc_meta.get('filename')} "
157
+ f"(chunks: {doc_meta.get('chunks')}, pages: {doc_meta.get('documents')})"
158
+ )
159
+
160
+ st.divider()
161
+
162
+ if selected_thread:
163
+ st.session_state["thread_id"] = selected_thread
164
+ messages = load_conversation(selected_thread)
165
+
166
+ temp_messages = []
167
+ for msg in messages:
168
+ role = "user" if isinstance(msg, HumanMessage) else "assistant"
169
+ temp_messages.append({"role": role, "content": msg.content})
170
+ st.session_state["message_history"] = temp_messages
171
+ st.session_state["ingested_docs"].setdefault(str(selected_thread), {})
172
+ st.rerun()