Shubham170793 commited on
Commit
f571fb6
Β·
verified Β·
1 Parent(s): a1fa58d

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +238 -184
src/streamlit_app.py CHANGED
@@ -1,16 +1,49 @@
 
1
  import os
2
  import re
 
3
  import streamlit as st
4
  import torch
5
 
6
  # ==========================================================
7
- # βœ… Environment Setup
8
  # ==========================================================
9
  st.set_page_config(page_title="Enterprise Knowledge Assistant", layout="wide")
10
  print("CUDA available:", torch.cuda.is_available())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  # ==========================================================
13
- # βš™οΈ Hugging Face Cache Setup
14
  # ==========================================================
15
  CACHE_DIR = "/tmp/hf_cache"
16
  os.makedirs(CACHE_DIR, exist_ok=True)
@@ -22,221 +55,242 @@ os.environ.update({
22
  })
23
 
24
  # ==========================================================
25
- # πŸ“¦ Imports
 
 
 
26
  # ==========================================================
27
  from ingestion import extract_text_from_pdf, chunk_text
28
  from vectorstore import build_faiss_index
29
  from qa import retrieve_chunks, generate_answer, cache_embeddings, embed_chunks, genai_generate
30
 
31
  # ==========================================================
32
- # 🧠 Smart Suggestion Generator
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  # ==========================================================
34
- def generate_dynamic_suggestions_from_toc(toc, chunks, doc_name="Document"):
35
- """Generate contextual, short, and relevant questions dynamically from the document."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  if not toc or not chunks:
37
  return []
38
-
39
  titles = []
40
  for sec, raw_title in toc:
41
- title = re.sub(r"^\s*[\dA-Za-z.\-]+\s*", "", raw_title)
42
- title = re.sub(r"\.{2,}\s*\d+$", "", title).strip()
43
- if 4 < len(title) < 120:
44
- titles.append(title)
45
-
46
- context_sample = " ".join(chunks[:3])[:4000]
47
  prompt = f"""
48
- You are generating user-friendly, context-aware questions based on the document "{doc_name}".
49
- Use the Table of Contents and text sample below.
50
-
51
- TABLE OF CONTENTS:
52
- {chr(10).join(['- ' + t for t in titles[:8]])}
53
 
54
- TEXT SAMPLE:
55
- {context_sample}
56
-
57
- Generate 5–7 concise, practical questions (max 18 words each) that help a user understand or use this document.
58
- """
59
 
 
 
60
  try:
61
- ai_response = genai_generate(prompt)
62
- questions = re.findall(r"[-β€’]?\s*(.+?)\?", ai_response)
63
- clean_qs = [q.strip("β€’-β€” ").strip() + "?" for q in questions if 8 < len(q) < 120]
64
- seen, final = set(), []
65
- for q in clean_qs:
66
- if q.lower() not in seen:
67
- seen.add(q.lower())
 
 
 
 
 
 
 
 
 
 
68
  final.append(q)
69
- return final[:7]
 
70
  except Exception:
71
- return ["What is this document about?", "How can I start using this guide?"]
72
-
73
- # ==========================================================
74
- # 🎨 Styling β€” Customer-Ready Theme
75
- # ==========================================================
76
- st.markdown("""
77
- <style>
78
- div.block-container {padding-top: 1.5rem; max-width: 1000px;}
79
- h1, h2, h3, h4 {font-weight: 600; color: #f3f4f6;}
80
- hr {border: none; border-top: 1px solid #2c2c2c; margin: 1rem 0;}
81
- .suggest-chip {
82
- background-color: #1f2937;
83
- border: 1px solid #374151;
84
- border-radius: 16px;
85
- color: #e5e7eb;
86
- padding: 6px 12px;
87
- cursor: pointer;
88
- font-size: 13px;
89
- transition: all 0.2s ease-in-out;
90
- }
91
- .suggest-chip:hover {
92
- background-color: #2563eb;
93
- border-color: #3b82f6;
94
- color: white;
95
- box-shadow: 0 0 8px rgba(59,130,246,0.4);
96
- }
97
- .answer-box {
98
- background: linear-gradient(135deg, #0f172a, #1e293b);
99
- border-left: 4px solid #3b82f6;
100
- border-radius: 8px;
101
- padding: 14px 16px;
102
- color: #f1f5f9;
103
- margin-top: 1rem;
104
- box-shadow: 0 0 10px rgba(59,130,246,0.1);
105
- }
106
- .stTextInput > div > div > input {
107
- background-color: #0f172a;
108
- color: #f1f5f9;
109
- border-radius: 6px;
110
- border: 1px solid #334155;
111
- padding: 6px 10px;
112
- }
113
- .stTextArea > div > div > textarea {
114
- background-color: #0f172a;
115
- color: #f1f5f9;
116
- border-radius: 6px;
117
- border: 1px solid #334155;
118
- }
119
- </style>
120
- """, unsafe_allow_html=True)
121
 
122
  # ==========================================================
123
- # 🧭 Sidebar
124
  # ==========================================================
125
  with st.sidebar:
126
- st.markdown("### πŸ’¬ Answer Style")
127
- if "reasoning_mode" not in st.session_state:
128
- st.session_state.reasoning_mode = False
129
- style = st.radio(
130
- "Choose how detailed answers should be:",
131
- ["Concise", "Detailed"],
132
- index=0 if not st.session_state.reasoning_mode else 1,
133
- )
134
- st.session_state.reasoning_mode = style == "Detailed"
135
 
136
  st.markdown("---")
137
- st.markdown("### βš™οΈ Advanced Settings")
138
- with st.expander("Show Advanced Options"):
139
- chunk_size = st.slider("Chunk Size", 200, 1500, 1000, step=50)
140
- overlap = st.slider("Chunk Overlap", 50, 200, 120, step=10)
141
- top_k = st.slider("Top K Results", 1, 10, 5)
 
 
 
 
142
  st.markdown("---")
143
- st.caption("✨ Built by Shubham Sharma")
144
 
145
  # ==========================================================
146
- # πŸ“„ Main Flow
147
  # ==========================================================
148
  st.title("Enterprise Knowledge Assistant")
149
- st.caption("Upload or select a document to ask intelligent, document-based questions.")
 
 
 
 
150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  text, chunks, index, embeddings, toc = None, None, None, None, None
 
 
 
 
152
 
153
- # Initialize session variables safely
154
- if "user_query_input" not in st.session_state:
155
- st.session_state["user_query_input"] = ""
156
- if "show_more" not in st.session_state:
157
- st.session_state["show_more"] = False
 
 
 
 
158
 
159
- def set_user_query(q):
160
- st.session_state["user_query_input"] = q
161
 
162
- # ----------------------------------------------------------
163
- # πŸ“‚ Document Selection
164
- # ----------------------------------------------------------
165
- doc_choice = st.radio("Select a document source:", ["-- Select --", "Sample PDF", "Upload Custom PDF"], index=0)
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
- if doc_choice == "-- Select --":
168
- st.info("πŸ“„ Please choose or upload a document to get started.")
169
- else:
170
- if doc_choice == "Sample PDF":
171
- temp_path = os.path.join(os.path.dirname(__file__), "sample.pdf")
172
- st.success("πŸ“˜ Using built-in Sample PDF.")
173
- else:
174
- uploaded_file = st.file_uploader("πŸ“‚ Upload your PDF", type="pdf")
175
- if uploaded_file:
176
- temp_path = os.path.join("/tmp", uploaded_file.name)
177
- with open(temp_path, "wb") as f:
178
- f.write(uploaded_file.getbuffer())
179
- st.success(f"βœ… '{uploaded_file.name}' uploaded successfully.")
180
- else:
181
- temp_path = None
182
-
183
- # ----------------------------------------------------------
184
- # 🧠 Process the Document
185
- # ----------------------------------------------------------
186
- if temp_path:
187
- with st.spinner("πŸ” Analyzing your document..."):
188
- text, toc = extract_text_from_pdf(temp_path)
189
- chunks = chunk_text(text, chunk_size=1000)
190
- query_suggestions = generate_dynamic_suggestions_from_toc(toc, chunks, os.path.basename(temp_path))
191
-
192
- with st.spinner("βš™οΈ Preparing intelligent search..."):
193
- embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
194
- index = build_faiss_index(embeddings)
195
- st.success("βœ… Your document is ready! Ask the assistant below.")
196
-
197
- # ----------------------------------------------------------
198
- # πŸ’¬ Ask the Assistant
199
- # ----------------------------------------------------------
200
- st.markdown("### πŸ’¬ Ask the Assistant")
201
-
202
- if query_suggestions:
203
- visible = query_suggestions if st.session_state.show_more else query_suggestions[:3]
204
- cols = st.columns(min(3, len(visible)))
205
-
206
- for i, q in enumerate(visible):
207
- cols[i % 3].button(f"πŸ” {q}", key=f"suggest_{i}", on_click=set_user_query, args=(q,))
208
-
209
- toggle_text = "More Suggestions β–Ό" if not st.session_state.show_more else "Fewer Suggestions β–²"
210
- if st.button(toggle_text):
211
- st.session_state.show_more = not st.session_state.show_more
212
- st.experimental_rerun()
213
-
214
- user_query = st.text_input("Type your question or click a suggestion:", key="user_query_input")
215
-
216
- if user_query.strip():
217
- with st.spinner("πŸ’­ Thinking..."):
218
- retrieved = retrieve_chunks(user_query, index, chunks, top_k=5, embeddings=embeddings)
219
- answer = generate_answer(user_query, retrieved, reasoning_mode=st.session_state.reasoning_mode)
220
-
221
- st.markdown("### βœ… Assistant’s Answer")
222
- st.markdown(f"<div class='answer-box'>πŸ’‘ {answer}</div>", unsafe_allow_html=True)
223
-
224
- with st.expander("See how this was answered (Advanced)"):
225
- for i, r in enumerate(retrieved, start=1):
226
- st.markdown(f"**Chunk {i}:** {r}")
227
-
228
- # ----------------------------------------------------------
229
- # πŸ“š Table of Contents
230
- # ----------------------------------------------------------
231
- if toc:
232
- with st.expander("πŸ“š View Table of Contents", expanded=True):
233
- toc_text = "\n".join([f"{sec}. {title}" for sec, title in toc])
234
- st.text_area("", toc_text, height=150)
235
 
236
- # ----------------------------------------------------------
237
- # πŸ“„ Document Preview
238
- # ----------------------------------------------------------
 
 
 
 
 
 
 
 
 
 
239
  if chunks:
240
- with st.expander("πŸ“„ View Extracted Text"):
241
- st.text_area("", text[:1000], height=150)
242
- st.caption(f"{len(chunks)} sections processed.")
 
 
 
 
 
1
+ # streamlit_app.py
2
  import os
3
  import re
4
+ import shutil
5
  import streamlit as st
6
  import torch
7
 
8
  # ==========================================================
9
+ # βœ… Environment & Page
10
  # ==========================================================
11
  st.set_page_config(page_title="Enterprise Knowledge Assistant", layout="wide")
12
  print("CUDA available:", torch.cuda.is_available())
13
+ if torch.cuda.is_available():
14
+ try:
15
+ print("GPU:", torch.cuda.get_device_name(0))
16
+ except Exception:
17
+ pass
18
+
19
+ # minimal cache cleanup (safe)
20
+ def clean_cache(max_size_gb: float = 2.0):
21
+ folders = [
22
+ "/root/.cache/huggingface",
23
+ "/root/.cache/transformers",
24
+ "/root/.cache/torch",
25
+ ]
26
+ total_deleted = 0.0
27
+ for folder in folders:
28
+ if os.path.exists(folder):
29
+ try:
30
+ size_gb = sum(
31
+ os.path.getsize(os.path.join(dp, f))
32
+ for dp, _, files in os.walk(folder)
33
+ for f in files
34
+ ) / (1024**3)
35
+ except Exception:
36
+ size_gb = 0.0
37
+ if size_gb > max_size_gb or "torch" in folder:
38
+ shutil.rmtree(folder, ignore_errors=True)
39
+ total_deleted += size_gb
40
+ os.makedirs("/tmp/hf_cache", exist_ok=True)
41
+ return total_deleted
42
+
43
+ clean_cache()
44
 
45
  # ==========================================================
46
+ # βš™οΈ HF cache env (keeps HuggingFace caches local)
47
  # ==========================================================
48
  CACHE_DIR = "/tmp/hf_cache"
49
  os.makedirs(CACHE_DIR, exist_ok=True)
 
55
  })
56
 
57
  # ==========================================================
58
+ # πŸ“¦ Project imports (assumed to exist in your repo)
59
+ # - ingestion.extract_text_from_pdf, chunk_text
60
+ # - vectorstore.build_faiss_index
61
+ # - qa.retrieve_chunks, generate_answer, cache_embeddings, embed_chunks, genai_generate
62
  # ==========================================================
63
  from ingestion import extract_text_from_pdf, chunk_text
64
  from vectorstore import build_faiss_index
65
  from qa import retrieve_chunks, generate_answer, cache_embeddings, embed_chunks, genai_generate
66
 
67
  # ==========================================================
68
+ # 🎨 UI styles (concise, production friendly)
69
+ # ==========================================================
70
+ st.markdown(
71
+ """
72
+ <style>
73
+ div.block-container { padding-top: 1.2rem; max-width: 1050px; }
74
+ .status-line { background:#0f172a; border-left:4px solid #10b981; padding:10px 14px; border-radius:8px; color:#d1fae5; margin-bottom:10px; }
75
+ .suggest-chip { background:#111827; border:1px solid #2b3440; border-radius:16px; padding:8px 14px; color:#e5e7eb; margin:6px 6px 6px 0; cursor:pointer; display:inline-block; font-size:13px; }
76
+ .suggest-chip:hover { background:#2563eb; border-color:#3b82f6; color:#fff; box-shadow:0 0 8px rgba(59,130,246,0.25); }
77
+ .answer-box { background: linear-gradient(135deg,#0b1220,#0f1b2b); border-left:4px solid #3b82f6; padding:14px; border-radius:8px; color:#f1f5f9; box-shadow:0 6px 18px rgba(2,6,23,0.5); }
78
+ .small-muted { color:#9ca3af; font-size:13px; margin-top:6px; }
79
+ .sidebar-small { font-size:14px; color:#d1d5db; }
80
+ .section-title { font-weight:700; font-size:20px; margin-top:8px; margin-bottom:10px; color:#f3f4f6; }
81
+ .compact-expander > div[role="button"] { padding:10px 12px; border-radius:8px; background:#0f172a; border:1px solid #1f2937; color:#e5e7eb;}
82
+ </style>
83
+ """,
84
+ unsafe_allow_html=True,
85
+ )
86
+
87
+ # ==========================================================
88
+ # πŸ”§ Helper: safe session-state initialization
89
  # ==========================================================
90
+ default_state = {
91
+ "user_query_input": "",
92
+ "show_more": False,
93
+ "selected_suggestion": None,
94
+ "response_mode": "strict", # 'strict' or 'extended'
95
+ "last_doc_path": None,
96
+ }
97
+ for k, v in default_state.items():
98
+ if k not in st.session_state:
99
+ st.session_state[k] = v
100
+
101
+ # ==========================================================
102
+ # 🧠 Suggestion generator (uses TOC + text sample; robust fallback)
103
+ # ==========================================================
104
+ def generate_suggestions_from_toc(toc, chunks, doc_name="Document"):
105
+ """Try AI first (genai_generate), otherwise deterministic fallback based on TOC."""
106
  if not toc or not chunks:
107
  return []
108
+ # clean titles
109
  titles = []
110
  for sec, raw_title in toc:
111
+ t = re.sub(r"^\s*[\dA-Za-z.\-]+\s*", "", raw_title)
112
+ t = re.sub(r"\.{2,}\s*\d+$", "", t).strip()
113
+ if 4 < len(t) < 120:
114
+ titles.append(t)
115
+ # prompt
116
+ sample = " ".join(chunks[:3])[:3000]
117
  prompt = f"""
118
+ You are generating concise, document-based suggestion questions for a user exploring the document named "{doc_name}".
119
+ TABLE OF CONTENTS:
120
+ {chr(10).join(['- '+t for t in titles[:8]])}
 
 
121
 
122
+ SAMPLE:
123
+ {sample}
 
 
 
124
 
125
+ Generate 5 short, professional questions (each under 18 words) that a user could ask about this document. Focus strictly on the document content.
126
+ """
127
  try:
128
+ ai_resp = genai_generate(prompt)
129
+ # extract lines that look like questions
130
+ found = re.findall(r"[-β€’]?\s*(.+?)\?", ai_resp)
131
+ qs = []
132
+ for s in found:
133
+ s = s.strip("β€’-β€” ").strip()
134
+ if 8 < len(s) < 120:
135
+ if not s.endswith("?"):
136
+ s = s + "?"
137
+ qs.append(s)
138
+ # dedupe while preserving order
139
+ seen = set()
140
+ final = []
141
+ for q in qs:
142
+ low = q.lower()
143
+ if low not in seen:
144
+ seen.add(low)
145
  final.append(q)
146
+ if final:
147
+ return final[:7]
148
  except Exception:
149
+ pass
150
+
151
+ # deterministic fallback: form 'What is ...' or 'How do I ...' from TOC lines
152
+ fallback = []
153
+ for t in titles[:10]:
154
+ lt = t.lower()
155
+ if re.search(r"\b(setup|configure|installation|setup|enable|configure|install)\b", lt):
156
+ fallback.append(f"How do I {lt.strip()}?")
157
+ elif re.search(r"\b(overview|purpose|objective|introduction|summary)\b", lt):
158
+ fallback.append(f"What is the {lt.strip()}?")
159
+ else:
160
+ fallback.append(f"What does '{t}' cover?")
161
+ # cleanup & unique
162
+ out = []
163
+ seen = set()
164
+ for q in fallback:
165
+ q = q.strip()
166
+ if q.lower() not in seen and 10 < len(q) < 120:
167
+ seen.add(q.lower())
168
+ out.append(q)
169
+ return out[:7]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
  # ==========================================================
172
+ # πŸŽ› Sidebar (controls)
173
  # ==========================================================
174
  with st.sidebar:
175
+ st.markdown("### Response Mode")
176
+ # user-facing naming: Strict vs Extended
177
+ mode = st.radio("", ["Strict (Document-only)", "Extended (Document + general)"], index=0, key="ui_response_mode")
178
+ # map to internal key
179
+ st.session_state.response_mode = "strict" if "Strict" in mode else "extended"
 
 
 
 
180
 
181
  st.markdown("---")
182
+ with st.expander("Advanced Settings (for power users)", expanded=False):
183
+ st.markdown("**Indexing & retrieval**")
184
+ chunk_size = st.slider("Chunk size (chars)", 200, 1500, 1000, step=50)
185
+ overlap = st.slider("Chunk overlap (chars)", 50, 300, 120, step=10)
186
+ top_k = st.slider("Top K results", 1, 10, 5)
187
+ st.session_state["adv_chunk_size"] = chunk_size
188
+ st.session_state["adv_overlap"] = overlap
189
+ st.session_state["adv_top_k"] = top_k
190
+
191
  st.markdown("---")
192
+ st.caption("✨ Built by Shubham Sharma", unsafe_allow_html=True)
193
 
194
  # ==========================================================
195
+ # πŸ“„ Main content flow
196
  # ==========================================================
197
  st.title("Enterprise Knowledge Assistant")
198
+ st.caption("Query SAP documentation and enterprise PDFs β€” powered by retrieval and reasoning.", unsafe_allow_html=True)
199
+
200
+ # Document selection
201
+ st.markdown("#### Select a document")
202
+ doc_choice = st.radio("", ["-- Select --", "Sample PDF", "Upload Custom PDF"], index=0, key="doc_choice_radio")
203
 
204
+ temp_path = None
205
+ if doc_choice == "Sample PDF":
206
+ sample_path = os.path.join(os.path.dirname(__file__), "sample.pdf")
207
+ temp_path = sample_path
208
+ elif doc_choice == "Upload Custom PDF":
209
+ uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
210
+ if uploaded_file:
211
+ temp_path = os.path.join("/tmp", uploaded_file.name)
212
+ with open(temp_path, "wb") as f:
213
+ f.write(uploaded_file.getbuffer())
214
+
215
+ # If user selects Sample PDF by mistake (user wanted default select), keep default as Select.
216
+ # (We set index=0 above, so default is Select.)
217
+ # If temp_path is set, process document:
218
  text, chunks, index, embeddings, toc = None, None, None, None, None
219
+ if temp_path:
220
+ # avoid re-processing same file repeatedly in the same session unless path changes
221
+ if st.session_state.get("last_doc_path") != temp_path:
222
+ st.session_state.last_doc_path = temp_path
223
 
224
+ with st.spinner("Processing document..."):
225
+ text, toc = extract_text_from_pdf(temp_path)
226
+ # chunk size from advanced settings if present else default
227
+ chunk_size = st.session_state.get("adv_chunk_size", 1000)
228
+ chunks = chunk_text(text, chunk_size=chunk_size)
229
+ query_suggestions = generate_suggestions_from_toc(toc, chunks, os.path.basename(temp_path))
230
+ with st.spinner("Preparing embeddings and index..."):
231
+ embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
232
+ index = build_faiss_index(embeddings)
233
 
234
+ # single, subtle status line
235
+ st.markdown(f"<div class='status-line'>βœ… Your document is ready. Ask the assistant below.</div>", unsafe_allow_html=True)
236
 
237
+ # ------------------------
238
+ # Suggested questions (compact chips)
239
+ # ------------------------
240
+ st.markdown("<div class='section-title'>Ask the Assistant</div>", unsafe_allow_html=True)
241
+ if query_suggestions:
242
+ visible = query_suggestions if st.session_state.show_more else query_suggestions[:3]
243
+ for i, q in enumerate(visible):
244
+ # show suggestion chips; clicking sets the input and clears selection for re-query
245
+ if st.button(q, key=f"sugg_btn_{i}"):
246
+ st.session_state.user_query_input = q
247
+ st.session_state.selected_suggestion = i
248
+ # show toggle
249
+ toggle_text = "Show less β–²" if st.session_state.show_more else "More suggestions β–Ό"
250
+ if st.button(toggle_text, key="toggle_more"):
251
+ st.session_state.show_more = not st.session_state.show_more
252
+ st.experimental_rerun()
253
 
254
+ # input
255
+ user_query = st.text_input("Type your question or pick one above:", key="user_query_input", value=st.session_state.user_query_input)
256
+
257
+ # Answer generation
258
+ if user_query and user_query.strip():
259
+ # small caption about mode
260
+ mode_label = "Strict (document-only)" if st.session_state.response_mode == "strict" else "Extended (document + general)"
261
+ st.markdown(f"<div class='small-muted'>Mode: {mode_label}</div>", unsafe_allow_html=True)
262
+
263
+ with st.spinner("Retrieving context and generating answer..."):
264
+ # use top_k from adv settings if available
265
+ top_k = st.session_state.get("adv_top_k", 5)
266
+ retrieved = retrieve_chunks(user_query, index, chunks, top_k=top_k, embeddings=embeddings)
267
+ # generate_answer should accept a reasoning_mode flag or similar; map our response_mode
268
+ reasoning_mode_flag = True if st.session_state.response_mode == "extended" else False
269
+ answer = generate_answer(user_query, retrieved, reasoning_mode=reasoning_mode_flag)
270
+
271
+ # present answer in a card
272
+ st.markdown("<div class='section-title'>Assistant</div>", unsafe_allow_html=True)
273
+ st.markdown(f"<div class='answer-box'>πŸ’‘ {answer}</div>", unsafe_allow_html=True)
274
+ st.caption("Answer is based on the uploaded document; Extended mode may include general insights.", unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
 
276
+ # supporting context (collapsed)
277
+ with st.expander("Supporting context (document chunks)"):
278
+ for i, c in enumerate(retrieved, start=1):
279
+ st.markdown(f"**Chunk {i}:** {c}")
280
+
281
+ # ------------------------
282
+ # Optional: Document explorer (single expander containing TOC + preview)
283
+ # ------------------------
284
+ with st.expander("Explore document (TOC & preview)", expanded=False):
285
+ if toc:
286
+ st.markdown("**Table of Contents**")
287
+ toc_text = "\n".join([f"{sec}. {title}" for sec, title in toc])
288
+ st.text_area("", toc_text, height=140)
289
  if chunks:
290
+ st.markdown("**Extracted text preview**")
291
+ st.text_area("", text[:1600], height=180)
292
+ st.caption(f"{len(chunks)} chunks processed.", unsafe_allow_html=True)
293
+
294
+ # If no document selected, show gentle onboarding hint
295
+ else:
296
+ st.info("Select 'Sample PDF' or upload a PDF to begin. Suggested questions are extracted from the document itself (no pre-seeded suggestions).")