Shubham170793 commited on
Commit
bb5b13b
Β·
verified Β·
1 Parent(s): f12246b

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +64 -76
src/streamlit_app.py CHANGED
@@ -181,24 +181,31 @@ with st.sidebar:
181
  st.text_area("", doc_text[:1000], height=120)
182
  st.caption(f"{len(st.session_state.get('chunks', []))} chunks processed.")
183
 
184
-
185
  # ==========================================================
186
- # 🧠 SESSION STATE
187
  # ==========================================================
188
- for key, val in {
189
- "user_query_input": "",
190
- "show_more": False,
191
- "selected_suggestion": None,
192
- "query_suggestions_fixed": None,
193
- "last_doc": None,
194
- }.items():
195
- if key not in st.session_state:
196
- st.session_state[key] = val
197
-
198
- def set_user_query(q, idx):
199
- st.session_state["user_query_input"] = q
200
- st.session_state["selected_suggestion"] = idx
201
- st.rerun()
 
 
 
 
 
 
 
 
202
 
203
  # ==========================================================
204
  # πŸ“„ MAIN SECTION
@@ -209,78 +216,50 @@ st.caption("Query SAP documentation and enterprise PDFs β€” powered by reasoning
209
  doc_choice = st.radio("Select a document:", ["-- Select --", "Sample PDF", "Upload Custom PDF"], index=0)
210
 
211
  # ==========================================================
212
- # πŸ“‚ DOCUMENT HANDLING β€” CLEAN + ACCURATE UI FLOW
213
  # ==========================================================
214
  if doc_choice == "-- Select --":
215
  st.info("⬅️ Select or upload a document to begin.")
216
  else:
217
- temp_path = None
218
-
219
- # --- File selection ---
220
  if doc_choice == "Sample PDF":
221
  temp_path = os.path.join(os.path.dirname(__file__), "sample.pdf")
222
- st.markdown("βœ… **Sample PDF selected.** Preparing document...")
223
  else:
224
- uploaded_file = st.file_uploader(
225
- "Upload a PDF document (max 200MB):",
226
- type="pdf",
227
- label_visibility="collapsed"
228
- )
229
  if uploaded_file:
230
  temp_path = os.path.join("/tmp", uploaded_file.name)
231
  with open(temp_path, "wb") as f:
232
  f.write(uploaded_file.getbuffer())
233
-
234
- # File badge (nice compact indicator)
235
- st.markdown(
236
- f"<div style='background:#0f172a;border-radius:8px;"
237
- f"padding:6px 10px;margin-top:4px;display:inline-block;'>πŸ“„ "
238
- f"<b>{uploaded_file.name}</b> β€” {uploaded_file.size/1024:.1f} KB</div>",
239
- unsafe_allow_html=True
240
- )
241
-
242
  else:
243
- st.stop() # wait until file uploaded before proceeding
244
 
245
- # --- Real processing begins here ---
246
  if temp_path:
247
- status = st.empty()
248
- status.info("πŸ“€ Upload complete β€” reading document...")
249
-
250
- text, toc, toc_source = extract_text_from_pdf(temp_path)
251
- status.info("πŸ“‘ Parsing and chunking document...")
252
- chunks = chunk_text(text, chunk_size=chunk_size, overlap=overlap)
253
-
254
- status.info("🧠 Building embeddings and search index...")
255
- embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
256
- index = build_faiss_index(embeddings)
257
 
258
- status.success("βœ… Document ready β€” you can start asking your questions below.")
 
 
259
 
260
- # Save for dev sidebar
261
- st.session_state.update({
262
- "text": text,
263
- "toc": toc,
264
- "chunks": chunks
265
- })
266
-
267
- # --- Suggestion setup ---
268
  doc_name = os.path.basename(temp_path)
269
  if st.session_state["last_doc"] != doc_name:
270
  query_suggestions = generate_dynamic_suggestions_from_toc(toc, chunks, doc_name)
271
- st.session_state.update({
272
- "query_suggestions_fixed": query_suggestions,
273
- "last_doc": doc_name,
274
- "user_query_input": "",
275
- "selected_suggestion": None,
276
- "show_more": False
277
- })
278
  st.rerun()
279
  else:
280
  query_suggestions = st.session_state["query_suggestions_fixed"]
281
 
282
- # --- Ask section ---
 
 
283
  st.markdown("### πŸ’¬ Ask the Assistant")
 
284
  if query_suggestions:
285
  visible = query_suggestions if st.session_state["show_more"] else query_suggestions[:3]
286
  cols = st.columns(min(3, len(visible)))
@@ -295,12 +274,14 @@ else:
295
 
296
  user_query = st.text_input("Type your question or click one above:", key="user_query_input")
297
 
 
 
 
298
  if user_query.strip():
299
  reasoning_mode = mode == "Extended (Document + General)"
300
  with st.spinner("πŸ’­ Generating your answer..."):
301
  retrieved = retrieve_chunks(user_query, index, chunks, top_k=top_k, embeddings=embeddings)
302
  answer = generate_answer(user_query, retrieved, reasoning_mode=reasoning_mode)
303
- st.session_state["retrieved"] = retrieved
304
 
305
  st.markdown("### πŸ€– Assistant’s Answer")
306
 
@@ -309,16 +290,23 @@ else:
309
  answer = re.sub(r"(^|\n)-\s*", r"\1<br>β€’ ", answer)
310
  st.markdown(f"<div class='answer-box'>{answer}</div>", unsafe_allow_html=True)
311
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
 
313
 
314
- # ==========================================================
315
- # 🎨 Optional Sidebar Scroll Styling (keeps it clean)
316
- # ==========================================================
317
- st.markdown("""
318
- <style>
319
- section[data-testid="stSidebar"] div.stExpander {
320
- max-height: 480px;
321
- overflow-y: auto;
322
- }
323
- </style>
324
- """, unsafe_allow_html=True)
 
181
  st.text_area("", doc_text[:1000], height=120)
182
  st.caption(f"{len(st.session_state.get('chunks', []))} chunks processed.")
183
 
 
184
  # ==========================================================
185
+ # 🧭 SIDEBAR (Old Clean Version)
186
  # ==========================================================
187
+ with st.sidebar:
188
+ st.markdown("### 🧭 Response Style")
189
+ mode = st.radio(
190
+ "",
191
+ ("Strict (Document-only)", "Extended (Document + General)"),
192
+ index=0,
193
+ help="Strict = answers only from the uploaded document. Extended = may include related general info.",
194
+ )
195
+
196
+ st.markdown("---")
197
+ show_dev = st.checkbox("Show advanced settings (for developers)", value=False)
198
+ if show_dev:
199
+ st.markdown("### βš™οΈ Developer Options")
200
+ chunk_size = st.slider("Chunk Size", 200, 1500, 1000, step=50)
201
+ overlap = st.slider("Chunk Overlap", 50, 200, 120, step=10)
202
+ top_k = st.slider("Top K Results", 1, 10, 7)
203
+ else:
204
+ chunk_size, overlap, top_k = 1000, 120, 5
205
+
206
+ st.markdown("---")
207
+ st.caption("✨ Built by Shubham Sharma")
208
+
209
 
210
  # ==========================================================
211
  # πŸ“„ MAIN SECTION
 
216
  doc_choice = st.radio("Select a document:", ["-- Select --", "Sample PDF", "Upload Custom PDF"], index=0)
217
 
218
  # ==========================================================
219
+ # πŸ“‚ DOCUMENT HANDLING (Old Layout β€” Dev Logs Below Answer)
220
  # ==========================================================
221
  if doc_choice == "-- Select --":
222
  st.info("⬅️ Select or upload a document to begin.")
223
  else:
 
 
 
224
  if doc_choice == "Sample PDF":
225
  temp_path = os.path.join(os.path.dirname(__file__), "sample.pdf")
226
+ st.success("πŸ“˜ Sample document loaded successfully β€” you can start asking your questions below.")
227
  else:
228
+ uploaded_file = st.file_uploader("", type="pdf", label_visibility="collapsed")
 
 
 
 
229
  if uploaded_file:
230
  temp_path = os.path.join("/tmp", uploaded_file.name)
231
  with open(temp_path, "wb") as f:
232
  f.write(uploaded_file.getbuffer())
233
+ st.success("βœ… Document processed successfully β€” you can start asking your questions below.")
 
 
 
 
 
 
 
 
234
  else:
235
+ temp_path = None
236
 
 
237
  if temp_path:
238
+ with st.spinner("πŸ” Processing document..."):
239
+ text, toc, toc_source = extract_text_from_pdf(temp_path)
240
+ chunks = chunk_text(text, chunk_size=chunk_size, overlap=overlap)
 
 
 
 
 
 
 
241
 
242
+ with st.spinner("βš™οΈ Building search index..."):
243
+ embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
244
+ index = build_faiss_index(embeddings)
245
 
 
 
 
 
 
 
 
 
246
  doc_name = os.path.basename(temp_path)
247
  if st.session_state["last_doc"] != doc_name:
248
  query_suggestions = generate_dynamic_suggestions_from_toc(toc, chunks, doc_name)
249
+ st.session_state["query_suggestions_fixed"] = query_suggestions
250
+ st.session_state["last_doc"] = doc_name
251
+ st.session_state["user_query_input"] = ""
252
+ st.session_state["selected_suggestion"] = None
253
+ st.session_state["show_more"] = False
 
 
254
  st.rerun()
255
  else:
256
  query_suggestions = st.session_state["query_suggestions_fixed"]
257
 
258
+ # ----------------------------------------------------------
259
+ # πŸ’¬ ASK SECTION
260
+ # ----------------------------------------------------------
261
  st.markdown("### πŸ’¬ Ask the Assistant")
262
+
263
  if query_suggestions:
264
  visible = query_suggestions if st.session_state["show_more"] else query_suggestions[:3]
265
  cols = st.columns(min(3, len(visible)))
 
274
 
275
  user_query = st.text_input("Type your question or click one above:", key="user_query_input")
276
 
277
+ # ----------------------------------------------------------
278
+ # πŸ’‘ RESPONSE SECTION
279
+ # ----------------------------------------------------------
280
  if user_query.strip():
281
  reasoning_mode = mode == "Extended (Document + General)"
282
  with st.spinner("πŸ’­ Generating your answer..."):
283
  retrieved = retrieve_chunks(user_query, index, chunks, top_k=top_k, embeddings=embeddings)
284
  answer = generate_answer(user_query, retrieved, reasoning_mode=reasoning_mode)
 
285
 
286
  st.markdown("### πŸ€– Assistant’s Answer")
287
 
 
290
  answer = re.sub(r"(^|\n)-\s*", r"\1<br>β€’ ", answer)
291
  st.markdown(f"<div class='answer-box'>{answer}</div>", unsafe_allow_html=True)
292
 
293
+ # ----------------------------------------------------------
294
+ # 🧩 DEV LOGS BELOW ANSWER (Old placement)
295
+ # ----------------------------------------------------------
296
+ if show_dev:
297
+ st.markdown("---")
298
+ st.markdown("### 🧩 Developer Insights")
299
+ with st.expander("πŸ“˜ Supporting Context", expanded=False):
300
+ for i, r in enumerate(retrieved, start=1):
301
+ st.markdown(f"**Chunk {i}:** {r}")
302
+
303
+ if toc:
304
+ with st.expander("πŸ“š Explore Document Sections", expanded=False):
305
+ toc_text = "\n".join([f"{sec}. {title}" for sec, title in toc])
306
+ st.text_area("", toc_text, height=140)
307
+
308
+ with st.expander("πŸ“„ Document Preview", expanded=False):
309
+ st.text_area("", text[:1000], height=140)
310
+ st.caption(f"{len(chunks)} chunks processed.")
311
 
312