Shubham170793 commited on
Commit
f12246b
Β·
verified Β·
1 Parent(s): c216a27

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +74 -44
src/streamlit_app.py CHANGED
@@ -158,6 +158,30 @@ with st.sidebar:
158
  st.markdown("---")
159
  st.caption("✨ Built by Shubham Sharma")
160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  # ==========================================================
162
  # 🧠 SESSION STATE
163
  # ==========================================================
@@ -185,50 +209,78 @@ st.caption("Query SAP documentation and enterprise PDFs β€” powered by reasoning
185
  doc_choice = st.radio("Select a document:", ["-- Select --", "Sample PDF", "Upload Custom PDF"], index=0)
186
 
187
  # ==========================================================
188
- # πŸ“‚ DOCUMENT HANDLING
189
  # ==========================================================
190
  if doc_choice == "-- Select --":
191
  st.info("⬅️ Select or upload a document to begin.")
192
  else:
 
 
 
193
  if doc_choice == "Sample PDF":
194
  temp_path = os.path.join(os.path.dirname(__file__), "sample.pdf")
195
- st.success("πŸ“˜ Sample document loaded successfully β€” you can start asking your questions below.")
196
  else:
197
- uploaded_file = st.file_uploader("", type="pdf", label_visibility="collapsed")
 
 
 
 
198
  if uploaded_file:
199
  temp_path = os.path.join("/tmp", uploaded_file.name)
200
  with open(temp_path, "wb") as f:
201
  f.write(uploaded_file.getbuffer())
202
- st.success("βœ… Document processed successfully β€” you can start asking your questions below.")
 
 
 
 
 
 
 
 
203
  else:
204
- temp_path = None
205
 
 
206
  if temp_path:
207
- with st.spinner("πŸ” Processing document..."):
208
- text, toc, toc_source = extract_text_from_pdf(temp_path)
209
- chunks = chunk_text(text, chunk_size=chunk_size, overlap=overlap)
 
 
 
 
 
 
 
 
 
210
 
211
- with st.spinner("βš™οΈ Building search index..."):
212
- embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
213
- index = build_faiss_index(embeddings)
 
 
 
214
 
 
215
  doc_name = os.path.basename(temp_path)
216
  if st.session_state["last_doc"] != doc_name:
217
  query_suggestions = generate_dynamic_suggestions_from_toc(toc, chunks, doc_name)
218
- st.session_state["query_suggestions_fixed"] = query_suggestions
219
- st.session_state["last_doc"] = doc_name
220
- st.session_state["user_query_input"] = ""
221
- st.session_state["selected_suggestion"] = None
222
- st.session_state["show_more"] = False
 
 
223
  st.rerun()
224
  else:
225
  query_suggestions = st.session_state["query_suggestions_fixed"]
226
 
227
- # ----------------------------------------------------------
228
- # πŸ’¬ ASK SECTION
229
- # ----------------------------------------------------------
230
  st.markdown("### πŸ’¬ Ask the Assistant")
231
-
232
  if query_suggestions:
233
  visible = query_suggestions if st.session_state["show_more"] else query_suggestions[:3]
234
  cols = st.columns(min(3, len(visible)))
@@ -243,14 +295,12 @@ else:
243
 
244
  user_query = st.text_input("Type your question or click one above:", key="user_query_input")
245
 
246
- # ----------------------------------------------------------
247
- # πŸ’‘ RESPONSE SECTION
248
- # ----------------------------------------------------------
249
  if user_query.strip():
250
  reasoning_mode = mode == "Extended (Document + General)"
251
  with st.spinner("πŸ’­ Generating your answer..."):
252
  retrieved = retrieve_chunks(user_query, index, chunks, top_k=top_k, embeddings=embeddings)
253
  answer = generate_answer(user_query, retrieved, reasoning_mode=reasoning_mode)
 
254
 
255
  st.markdown("### πŸ€– Assistant’s Answer")
256
 
@@ -259,27 +309,7 @@ else:
259
  answer = re.sub(r"(^|\n)-\s*", r"\1<br>β€’ ", answer)
260
  st.markdown(f"<div class='answer-box'>{answer}</div>", unsafe_allow_html=True)
261
 
262
- # ----------------------------------------------------------
263
- # 🧩 SIDEBAR β€” COLLAPSIBLE DEVELOPER INSIGHTS
264
- # ----------------------------------------------------------
265
- if show_dev:
266
- with st.sidebar:
267
- st.markdown("---")
268
- with st.expander("🧩 Developer Insights", expanded=False):
269
- st.markdown("**Context Chunks (Retrieved):**")
270
- for i, r in enumerate(retrieved, start=1):
271
- st.markdown(f"- **Chunk {i}:** {r}")
272
-
273
- if toc:
274
- st.markdown("---")
275
- st.markdown("**Document Sections (TOC):**")
276
- toc_text = "\n".join([f"{sec}. {title}" for sec, title in toc])
277
- st.text_area("", toc_text, height=120)
278
-
279
- st.markdown("---")
280
- st.markdown("**Document Preview:**")
281
- st.text_area("", text[:1000], height=120)
282
- st.caption(f"{len(chunks)} chunks processed.")
283
 
284
  # ==========================================================
285
  # 🎨 Optional Sidebar Scroll Styling (keeps it clean)
 
158
  st.markdown("---")
159
  st.caption("✨ Built by Shubham Sharma")
160
 
161
+ # 🧩 Developer Insights (Moved up here from main block)
162
+ if show_dev:
163
+ st.markdown("---")
164
+ with st.expander("🧩 Developer Insights", expanded=False):
165
+ st.markdown("**Retrieved Chunks (Context):**")
166
+ retrieved_chunks = st.session_state.get("retrieved", [])
167
+ for i, r in enumerate(retrieved_chunks, start=1):
168
+ st.markdown(f"- **Chunk {i}:** {r}")
169
+
170
+ toc_data = st.session_state.get("toc", [])
171
+ if toc_data:
172
+ st.markdown("---")
173
+ st.markdown("**Document Sections (TOC):**")
174
+ toc_text = "\n".join([f"{sec}. {title}" for sec, title in toc_data])
175
+ st.text_area("", toc_text, height=120)
176
+
177
+ doc_text = st.session_state.get("text", "")
178
+ if doc_text:
179
+ st.markdown("---")
180
+ st.markdown("**Document Preview:**")
181
+ st.text_area("", doc_text[:1000], height=120)
182
+ st.caption(f"{len(st.session_state.get('chunks', []))} chunks processed.")
183
+
184
+
185
  # ==========================================================
186
  # 🧠 SESSION STATE
187
  # ==========================================================
 
209
  doc_choice = st.radio("Select a document:", ["-- Select --", "Sample PDF", "Upload Custom PDF"], index=0)
210
 
211
  # ==========================================================
212
+ # πŸ“‚ DOCUMENT HANDLING β€” CLEAN + ACCURATE UI FLOW
213
  # ==========================================================
214
  if doc_choice == "-- Select --":
215
  st.info("⬅️ Select or upload a document to begin.")
216
  else:
217
+ temp_path = None
218
+
219
+ # --- File selection ---
220
  if doc_choice == "Sample PDF":
221
  temp_path = os.path.join(os.path.dirname(__file__), "sample.pdf")
222
+ st.markdown("βœ… **Sample PDF selected.** Preparing document...")
223
  else:
224
+ uploaded_file = st.file_uploader(
225
+ "Upload a PDF document (max 200MB):",
226
+ type="pdf",
227
+ label_visibility="collapsed"
228
+ )
229
  if uploaded_file:
230
  temp_path = os.path.join("/tmp", uploaded_file.name)
231
  with open(temp_path, "wb") as f:
232
  f.write(uploaded_file.getbuffer())
233
+
234
+ # File badge (nice compact indicator)
235
+ st.markdown(
236
+ f"<div style='background:#0f172a;border-radius:8px;"
237
+ f"padding:6px 10px;margin-top:4px;display:inline-block;'>πŸ“„ "
238
+ f"<b>{uploaded_file.name}</b> β€” {uploaded_file.size/1024:.1f} KB</div>",
239
+ unsafe_allow_html=True
240
+ )
241
+
242
  else:
243
+ st.stop() # wait until file uploaded before proceeding
244
 
245
+ # --- Real processing begins here ---
246
  if temp_path:
247
+ status = st.empty()
248
+ status.info("πŸ“€ Upload complete β€” reading document...")
249
+
250
+ text, toc, toc_source = extract_text_from_pdf(temp_path)
251
+ status.info("πŸ“‘ Parsing and chunking document...")
252
+ chunks = chunk_text(text, chunk_size=chunk_size, overlap=overlap)
253
+
254
+ status.info("🧠 Building embeddings and search index...")
255
+ embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
256
+ index = build_faiss_index(embeddings)
257
+
258
+ status.success("βœ… Document ready β€” you can start asking your questions below.")
259
 
260
+ # Save for dev sidebar
261
+ st.session_state.update({
262
+ "text": text,
263
+ "toc": toc,
264
+ "chunks": chunks
265
+ })
266
 
267
+ # --- Suggestion setup ---
268
  doc_name = os.path.basename(temp_path)
269
  if st.session_state["last_doc"] != doc_name:
270
  query_suggestions = generate_dynamic_suggestions_from_toc(toc, chunks, doc_name)
271
+ st.session_state.update({
272
+ "query_suggestions_fixed": query_suggestions,
273
+ "last_doc": doc_name,
274
+ "user_query_input": "",
275
+ "selected_suggestion": None,
276
+ "show_more": False
277
+ })
278
  st.rerun()
279
  else:
280
  query_suggestions = st.session_state["query_suggestions_fixed"]
281
 
282
+ # --- Ask section ---
 
 
283
  st.markdown("### πŸ’¬ Ask the Assistant")
 
284
  if query_suggestions:
285
  visible = query_suggestions if st.session_state["show_more"] else query_suggestions[:3]
286
  cols = st.columns(min(3, len(visible)))
 
295
 
296
  user_query = st.text_input("Type your question or click one above:", key="user_query_input")
297
 
 
 
 
298
  if user_query.strip():
299
  reasoning_mode = mode == "Extended (Document + General)"
300
  with st.spinner("πŸ’­ Generating your answer..."):
301
  retrieved = retrieve_chunks(user_query, index, chunks, top_k=top_k, embeddings=embeddings)
302
  answer = generate_answer(user_query, retrieved, reasoning_mode=reasoning_mode)
303
+ st.session_state["retrieved"] = retrieved
304
 
305
  st.markdown("### πŸ€– Assistant’s Answer")
306
 
 
309
  answer = re.sub(r"(^|\n)-\s*", r"\1<br>β€’ ", answer)
310
  st.markdown(f"<div class='answer-box'>{answer}</div>", unsafe_allow_html=True)
311
 
312
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
313
 
314
  # ==========================================================
315
  # 🎨 Optional Sidebar Scroll Styling (keeps it clean)