Update src/streamlit_app.py
Browse files- src/streamlit_app.py +74 -44
src/streamlit_app.py
CHANGED
|
@@ -158,6 +158,30 @@ with st.sidebar:
|
|
| 158 |
st.markdown("---")
|
| 159 |
st.caption("β¨ Built by Shubham Sharma")
|
| 160 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
# ==========================================================
|
| 162 |
# π§ SESSION STATE
|
| 163 |
# ==========================================================
|
|
@@ -185,50 +209,78 @@ st.caption("Query SAP documentation and enterprise PDFs β powered by reasoning
|
|
| 185 |
doc_choice = st.radio("Select a document:", ["-- Select --", "Sample PDF", "Upload Custom PDF"], index=0)
|
| 186 |
|
| 187 |
# ==========================================================
|
| 188 |
-
# π DOCUMENT HANDLING
|
| 189 |
# ==========================================================
|
| 190 |
if doc_choice == "-- Select --":
|
| 191 |
st.info("β¬
οΈ Select or upload a document to begin.")
|
| 192 |
else:
|
|
|
|
|
|
|
|
|
|
| 193 |
if doc_choice == "Sample PDF":
|
| 194 |
temp_path = os.path.join(os.path.dirname(__file__), "sample.pdf")
|
| 195 |
-
st.
|
| 196 |
else:
|
| 197 |
-
uploaded_file = st.file_uploader(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
if uploaded_file:
|
| 199 |
temp_path = os.path.join("/tmp", uploaded_file.name)
|
| 200 |
with open(temp_path, "wb") as f:
|
| 201 |
f.write(uploaded_file.getbuffer())
|
| 202 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
else:
|
| 204 |
-
|
| 205 |
|
|
|
|
| 206 |
if temp_path:
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
|
|
|
|
|
|
|
|
|
| 214 |
|
|
|
|
| 215 |
doc_name = os.path.basename(temp_path)
|
| 216 |
if st.session_state["last_doc"] != doc_name:
|
| 217 |
query_suggestions = generate_dynamic_suggestions_from_toc(toc, chunks, doc_name)
|
| 218 |
-
st.session_state
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
|
|
|
|
|
|
| 223 |
st.rerun()
|
| 224 |
else:
|
| 225 |
query_suggestions = st.session_state["query_suggestions_fixed"]
|
| 226 |
|
| 227 |
-
#
|
| 228 |
-
# π¬ ASK SECTION
|
| 229 |
-
# ----------------------------------------------------------
|
| 230 |
st.markdown("### π¬ Ask the Assistant")
|
| 231 |
-
|
| 232 |
if query_suggestions:
|
| 233 |
visible = query_suggestions if st.session_state["show_more"] else query_suggestions[:3]
|
| 234 |
cols = st.columns(min(3, len(visible)))
|
|
@@ -243,14 +295,12 @@ else:
|
|
| 243 |
|
| 244 |
user_query = st.text_input("Type your question or click one above:", key="user_query_input")
|
| 245 |
|
| 246 |
-
# ----------------------------------------------------------
|
| 247 |
-
# π‘ RESPONSE SECTION
|
| 248 |
-
# ----------------------------------------------------------
|
| 249 |
if user_query.strip():
|
| 250 |
reasoning_mode = mode == "Extended (Document + General)"
|
| 251 |
with st.spinner("π Generating your answer..."):
|
| 252 |
retrieved = retrieve_chunks(user_query, index, chunks, top_k=top_k, embeddings=embeddings)
|
| 253 |
answer = generate_answer(user_query, retrieved, reasoning_mode=reasoning_mode)
|
|
|
|
| 254 |
|
| 255 |
st.markdown("### π€ Assistantβs Answer")
|
| 256 |
|
|
@@ -259,27 +309,7 @@ else:
|
|
| 259 |
answer = re.sub(r"(^|\n)-\s*", r"\1<br>β’ ", answer)
|
| 260 |
st.markdown(f"<div class='answer-box'>{answer}</div>", unsafe_allow_html=True)
|
| 261 |
|
| 262 |
-
|
| 263 |
-
# π§© SIDEBAR β COLLAPSIBLE DEVELOPER INSIGHTS
|
| 264 |
-
# ----------------------------------------------------------
|
| 265 |
-
if show_dev:
|
| 266 |
-
with st.sidebar:
|
| 267 |
-
st.markdown("---")
|
| 268 |
-
with st.expander("π§© Developer Insights", expanded=False):
|
| 269 |
-
st.markdown("**Context Chunks (Retrieved):**")
|
| 270 |
-
for i, r in enumerate(retrieved, start=1):
|
| 271 |
-
st.markdown(f"- **Chunk {i}:** {r}")
|
| 272 |
-
|
| 273 |
-
if toc:
|
| 274 |
-
st.markdown("---")
|
| 275 |
-
st.markdown("**Document Sections (TOC):**")
|
| 276 |
-
toc_text = "\n".join([f"{sec}. {title}" for sec, title in toc])
|
| 277 |
-
st.text_area("", toc_text, height=120)
|
| 278 |
-
|
| 279 |
-
st.markdown("---")
|
| 280 |
-
st.markdown("**Document Preview:**")
|
| 281 |
-
st.text_area("", text[:1000], height=120)
|
| 282 |
-
st.caption(f"{len(chunks)} chunks processed.")
|
| 283 |
|
| 284 |
# ==========================================================
|
| 285 |
# π¨ Optional Sidebar Scroll Styling (keeps it clean)
|
|
|
|
| 158 |
st.markdown("---")
|
| 159 |
st.caption("β¨ Built by Shubham Sharma")
|
| 160 |
|
| 161 |
+
# π§© Developer Insights (Moved up here from main block)
|
| 162 |
+
if show_dev:
|
| 163 |
+
st.markdown("---")
|
| 164 |
+
with st.expander("π§© Developer Insights", expanded=False):
|
| 165 |
+
st.markdown("**Retrieved Chunks (Context):**")
|
| 166 |
+
retrieved_chunks = st.session_state.get("retrieved", [])
|
| 167 |
+
for i, r in enumerate(retrieved_chunks, start=1):
|
| 168 |
+
st.markdown(f"- **Chunk {i}:** {r}")
|
| 169 |
+
|
| 170 |
+
toc_data = st.session_state.get("toc", [])
|
| 171 |
+
if toc_data:
|
| 172 |
+
st.markdown("---")
|
| 173 |
+
st.markdown("**Document Sections (TOC):**")
|
| 174 |
+
toc_text = "\n".join([f"{sec}. {title}" for sec, title in toc_data])
|
| 175 |
+
st.text_area("", toc_text, height=120)
|
| 176 |
+
|
| 177 |
+
doc_text = st.session_state.get("text", "")
|
| 178 |
+
if doc_text:
|
| 179 |
+
st.markdown("---")
|
| 180 |
+
st.markdown("**Document Preview:**")
|
| 181 |
+
st.text_area("", doc_text[:1000], height=120)
|
| 182 |
+
st.caption(f"{len(st.session_state.get('chunks', []))} chunks processed.")
|
| 183 |
+
|
| 184 |
+
|
| 185 |
# ==========================================================
|
| 186 |
# π§ SESSION STATE
|
| 187 |
# ==========================================================
|
|
|
|
| 209 |
doc_choice = st.radio("Select a document:", ["-- Select --", "Sample PDF", "Upload Custom PDF"], index=0)
|
| 210 |
|
| 211 |
# ==========================================================
|
| 212 |
+
# π DOCUMENT HANDLING β CLEAN + ACCURATE UI FLOW
|
| 213 |
# ==========================================================
|
| 214 |
if doc_choice == "-- Select --":
|
| 215 |
st.info("β¬
οΈ Select or upload a document to begin.")
|
| 216 |
else:
|
| 217 |
+
temp_path = None
|
| 218 |
+
|
| 219 |
+
# --- File selection ---
|
| 220 |
if doc_choice == "Sample PDF":
|
| 221 |
temp_path = os.path.join(os.path.dirname(__file__), "sample.pdf")
|
| 222 |
+
st.markdown("β
**Sample PDF selected.** Preparing document...")
|
| 223 |
else:
|
| 224 |
+
uploaded_file = st.file_uploader(
|
| 225 |
+
"Upload a PDF document (max 200MB):",
|
| 226 |
+
type="pdf",
|
| 227 |
+
label_visibility="collapsed"
|
| 228 |
+
)
|
| 229 |
if uploaded_file:
|
| 230 |
temp_path = os.path.join("/tmp", uploaded_file.name)
|
| 231 |
with open(temp_path, "wb") as f:
|
| 232 |
f.write(uploaded_file.getbuffer())
|
| 233 |
+
|
| 234 |
+
# File badge (nice compact indicator)
|
| 235 |
+
st.markdown(
|
| 236 |
+
f"<div style='background:#0f172a;border-radius:8px;"
|
| 237 |
+
f"padding:6px 10px;margin-top:4px;display:inline-block;'>π "
|
| 238 |
+
f"<b>{uploaded_file.name}</b> β {uploaded_file.size/1024:.1f} KB</div>",
|
| 239 |
+
unsafe_allow_html=True
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
else:
|
| 243 |
+
st.stop() # wait until file uploaded before proceeding
|
| 244 |
|
| 245 |
+
# --- Real processing begins here ---
|
| 246 |
if temp_path:
|
| 247 |
+
status = st.empty()
|
| 248 |
+
status.info("π€ Upload complete β reading document...")
|
| 249 |
+
|
| 250 |
+
text, toc, toc_source = extract_text_from_pdf(temp_path)
|
| 251 |
+
status.info("π Parsing and chunking document...")
|
| 252 |
+
chunks = chunk_text(text, chunk_size=chunk_size, overlap=overlap)
|
| 253 |
+
|
| 254 |
+
status.info("π§ Building embeddings and search index...")
|
| 255 |
+
embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
|
| 256 |
+
index = build_faiss_index(embeddings)
|
| 257 |
+
|
| 258 |
+
status.success("β
Document ready β you can start asking your questions below.")
|
| 259 |
|
| 260 |
+
# Save for dev sidebar
|
| 261 |
+
st.session_state.update({
|
| 262 |
+
"text": text,
|
| 263 |
+
"toc": toc,
|
| 264 |
+
"chunks": chunks
|
| 265 |
+
})
|
| 266 |
|
| 267 |
+
# --- Suggestion setup ---
|
| 268 |
doc_name = os.path.basename(temp_path)
|
| 269 |
if st.session_state["last_doc"] != doc_name:
|
| 270 |
query_suggestions = generate_dynamic_suggestions_from_toc(toc, chunks, doc_name)
|
| 271 |
+
st.session_state.update({
|
| 272 |
+
"query_suggestions_fixed": query_suggestions,
|
| 273 |
+
"last_doc": doc_name,
|
| 274 |
+
"user_query_input": "",
|
| 275 |
+
"selected_suggestion": None,
|
| 276 |
+
"show_more": False
|
| 277 |
+
})
|
| 278 |
st.rerun()
|
| 279 |
else:
|
| 280 |
query_suggestions = st.session_state["query_suggestions_fixed"]
|
| 281 |
|
| 282 |
+
# --- Ask section ---
|
|
|
|
|
|
|
| 283 |
st.markdown("### π¬ Ask the Assistant")
|
|
|
|
| 284 |
if query_suggestions:
|
| 285 |
visible = query_suggestions if st.session_state["show_more"] else query_suggestions[:3]
|
| 286 |
cols = st.columns(min(3, len(visible)))
|
|
|
|
| 295 |
|
| 296 |
user_query = st.text_input("Type your question or click one above:", key="user_query_input")
|
| 297 |
|
|
|
|
|
|
|
|
|
|
| 298 |
if user_query.strip():
|
| 299 |
reasoning_mode = mode == "Extended (Document + General)"
|
| 300 |
with st.spinner("π Generating your answer..."):
|
| 301 |
retrieved = retrieve_chunks(user_query, index, chunks, top_k=top_k, embeddings=embeddings)
|
| 302 |
answer = generate_answer(user_query, retrieved, reasoning_mode=reasoning_mode)
|
| 303 |
+
st.session_state["retrieved"] = retrieved
|
| 304 |
|
| 305 |
st.markdown("### π€ Assistantβs Answer")
|
| 306 |
|
|
|
|
| 309 |
answer = re.sub(r"(^|\n)-\s*", r"\1<br>β’ ", answer)
|
| 310 |
st.markdown(f"<div class='answer-box'>{answer}</div>", unsafe_allow_html=True)
|
| 311 |
|
| 312 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 313 |
|
| 314 |
# ==========================================================
|
| 315 |
# π¨ Optional Sidebar Scroll Styling (keeps it clean)
|