Shubham170793 commited on
Commit
dc571c1
Β·
verified Β·
1 Parent(s): 59b2329

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +32 -21
src/streamlit_app.py CHANGED
@@ -212,8 +212,19 @@ st.caption("Query SAP documentation and enterprise PDFs β€” powered by reasoning
212
  doc_choice = st.radio("Select a document:", ["-- Select --", "Sample PDF", "Upload Custom PDF"], index=0)
213
 
214
  # ==========================================================
215
- # πŸ“‚ DOCUMENT HANDLING β€” POLISHED UI FLOW
216
  # ==========================================================
 
 
 
 
 
 
 
 
 
 
 
217
  if doc_choice == "-- Select --":
218
  st.info("⬅️ Select or upload a document to begin.")
219
  else:
@@ -234,31 +245,35 @@ else:
234
  with open(temp_path, "wb") as f:
235
  f.write(uploaded_file.getbuffer())
236
  else:
237
- st.stop() # Wait until file is uploaded
238
 
239
- # --- Real processing begins here ---
240
  if temp_path:
241
  doc_name = os.path.basename(temp_path)
 
 
242
 
243
- # Process only once per document
244
- if "doc_ready" not in st.session_state or st.session_state.get("last_doc") != doc_name:
245
  status = st.empty()
246
- status.info("πŸ“€ Upload complete β€” hang tight while we process your document...")
247
 
248
- # Step 1: Extract text
249
  text, toc, toc_source = extract_text_from_pdf(temp_path)
 
 
250
  status.info("πŸ“‘ Parsing and chunking document...")
251
  chunks = chunk_text(text, chunk_size=chunk_size, overlap=overlap)
252
 
253
- # Step 2: Build embeddings + FAISS index
254
  status.info("🧠 Building embeddings and search index...")
255
  embeddings = cache_embeddings(doc_name, chunks, embed_chunks)
256
  index = build_faiss_index(embeddings)
257
 
258
- # Step 3: Final ready state
259
- status.success("βœ… All set β€” your AI assistant is ready to help.")
260
 
261
- # Persist session data for reruns
262
  st.session_state.update({
263
  "text": text,
264
  "toc": toc,
@@ -266,32 +281,29 @@ else:
266
  "embeddings": embeddings,
267
  "index": index,
268
  "doc_ready": True,
269
- "last_doc": doc_name,
270
- "status_text": "πŸ“„ Document is ready for queries."
271
  })
272
 
273
- # Build question suggestions (once per doc)
274
  query_suggestions = generate_dynamic_suggestions_from_toc(toc, chunks, doc_name)
275
  st.session_state["query_suggestions_fixed"] = query_suggestions
276
  st.session_state["user_query_input"] = ""
277
  st.session_state["selected_suggestion"] = None
278
  st.session_state["show_more"] = False
279
-
280
- # Refresh to cleanly show "ready" state
281
  st.rerun()
282
 
283
  else:
284
- # --- Reuse existing state (rerun-safe) ---
285
  text = st.session_state["text"]
286
  toc = st.session_state["toc"]
287
  chunks = st.session_state["chunks"]
288
  embeddings = st.session_state["embeddings"]
289
  index = st.session_state["index"]
290
  query_suggestions = st.session_state.get("query_suggestions_fixed", [])
291
-
292
  st.info(st.session_state.get("status_text", f"πŸ“„ {doc_name} is ready for queries."))
293
 
294
- # --- Ask the Assistant section ---
295
  st.markdown("### πŸ’¬ Ask the Assistant")
296
  if query_suggestions:
297
  visible = query_suggestions if st.session_state["show_more"] else query_suggestions[:3]
@@ -305,7 +317,6 @@ else:
305
  st.session_state["show_more"] = not st.session_state["show_more"]
306
  st.rerun()
307
 
308
- # --- Query input box ---
309
  user_query = st.text_input("Type your question or click one above:", key="user_query_input")
310
 
311
  if user_query.strip():
@@ -317,13 +328,13 @@ else:
317
 
318
  st.markdown("### πŸ€– Assistant’s Answer")
319
 
320
- # Clean up answer format (bullets, bold)
321
  if not reasoning_mode and not answer.startswith("⚠️"):
322
  answer = re.sub(r"\*\*(.*?)\*\*", r"\1", answer)
323
  answer = re.sub(r"(^|\n)-\s*", r"\1<br>β€’ ", answer)
324
  st.markdown(f"<div class='answer-box'>{answer}</div>", unsafe_allow_html=True)
325
 
326
 
 
327
  # ==========================================================
328
  # 🎨 Optional Sidebar Scroll Styling (keeps it clean)
329
  # ==========================================================
 
212
  doc_choice = st.radio("Select a document:", ["-- Select --", "Sample PDF", "Upload Custom PDF"], index=0)
213
 
214
  # ==========================================================
215
+ # πŸ“‚ DOCUMENT HANDLING β€” CLEAN, ACCURATE, AND BYTE-AWARE
216
  # ==========================================================
217
+ import hashlib
218
+
219
+ def _hash_content(file_path):
220
+ """Generate a short SHA256 hash of the file's actual binary content."""
221
+ hasher = hashlib.sha256()
222
+ with open(file_path, "rb") as f:
223
+ while chunk := f.read(8192):
224
+ hasher.update(chunk)
225
+ return hasher.hexdigest()[:12] # keep short hash for filenames
226
+
227
+ # --- Document selection ---
228
  if doc_choice == "-- Select --":
229
  st.info("⬅️ Select or upload a document to begin.")
230
  else:
 
245
  with open(temp_path, "wb") as f:
246
  f.write(uploaded_file.getbuffer())
247
  else:
248
+ st.stop()
249
 
250
+ # --- Start processing if file exists ---
251
  if temp_path:
252
  doc_name = os.path.basename(temp_path)
253
+ file_hash = _hash_content(temp_path)
254
+ doc_identifier = f"{doc_name}_{file_hash}" # unique per content
255
 
256
+ # πŸ” Reprocess only if new or changed document
257
+ if "doc_ready" not in st.session_state or st.session_state.get("last_doc") != doc_identifier:
258
  status = st.empty()
259
+ status.info("πŸ“€ Upload complete β€” reading document...")
260
 
261
+ # 🧩 Step 1: Extract text and TOC
262
  text, toc, toc_source = extract_text_from_pdf(temp_path)
263
+
264
+ # 🧩 Step 2: Chunk the text
265
  status.info("πŸ“‘ Parsing and chunking document...")
266
  chunks = chunk_text(text, chunk_size=chunk_size, overlap=overlap)
267
 
268
+ # 🧩 Step 3: Embed and index
269
  status.info("🧠 Building embeddings and search index...")
270
  embeddings = cache_embeddings(doc_name, chunks, embed_chunks)
271
  index = build_faiss_index(embeddings)
272
 
273
+ # 🧩 Step 4: Final success message
274
+ status.success("βœ… Document processed successfully β€” all set to query your assistant!")
275
 
276
+ # 🧠 Store everything in session state
277
  st.session_state.update({
278
  "text": text,
279
  "toc": toc,
 
281
  "embeddings": embeddings,
282
  "index": index,
283
  "doc_ready": True,
284
+ "last_doc": doc_identifier,
285
+ "status_text": "βœ… Document processed successfully β€” all set to query your assistant!"
286
  })
287
 
288
+ # Build fresh suggestions and rerun
289
  query_suggestions = generate_dynamic_suggestions_from_toc(toc, chunks, doc_name)
290
  st.session_state["query_suggestions_fixed"] = query_suggestions
291
  st.session_state["user_query_input"] = ""
292
  st.session_state["selected_suggestion"] = None
293
  st.session_state["show_more"] = False
 
 
294
  st.rerun()
295
 
296
  else:
297
+ # ♻️ Reuse cached session state (same file)
298
  text = st.session_state["text"]
299
  toc = st.session_state["toc"]
300
  chunks = st.session_state["chunks"]
301
  embeddings = st.session_state["embeddings"]
302
  index = st.session_state["index"]
303
  query_suggestions = st.session_state.get("query_suggestions_fixed", [])
 
304
  st.info(st.session_state.get("status_text", f"πŸ“„ {doc_name} is ready for queries."))
305
 
306
+ # --- Ask section ---
307
  st.markdown("### πŸ’¬ Ask the Assistant")
308
  if query_suggestions:
309
  visible = query_suggestions if st.session_state["show_more"] else query_suggestions[:3]
 
317
  st.session_state["show_more"] = not st.session_state["show_more"]
318
  st.rerun()
319
 
 
320
  user_query = st.text_input("Type your question or click one above:", key="user_query_input")
321
 
322
  if user_query.strip():
 
328
 
329
  st.markdown("### πŸ€– Assistant’s Answer")
330
 
 
331
  if not reasoning_mode and not answer.startswith("⚠️"):
332
  answer = re.sub(r"\*\*(.*?)\*\*", r"\1", answer)
333
  answer = re.sub(r"(^|\n)-\s*", r"\1<br>β€’ ", answer)
334
  st.markdown(f"<div class='answer-box'>{answer}</div>", unsafe_allow_html=True)
335
 
336
 
337
+
338
  # ==========================================================
339
  # 🎨 Optional Sidebar Scroll Styling (keeps it clean)
340
  # ==========================================================