Update src/streamlit_app.py
Browse files- src/streamlit_app.py +37 -19
src/streamlit_app.py
CHANGED
|
@@ -63,10 +63,10 @@ from vectorstore import build_faiss_index
|
|
| 63 |
from qa import retrieve_chunks, generate_answer, cache_embeddings, embed_chunks, genai_generate
|
| 64 |
|
| 65 |
# ==========================================================
|
| 66 |
-
# π§ Smart Suggestion Generator
|
| 67 |
# ==========================================================
|
| 68 |
def generate_dynamic_suggestions_from_toc(toc, chunks, doc_name="Document"):
|
| 69 |
-
"""Generate
|
| 70 |
if not toc or not chunks:
|
| 71 |
return []
|
| 72 |
|
|
@@ -79,16 +79,15 @@ def generate_dynamic_suggestions_from_toc(toc, chunks, doc_name="Document"):
|
|
| 79 |
|
| 80 |
context_sample = " ".join(chunks[:3])[:4000]
|
| 81 |
prompt = f"""
|
| 82 |
-
You are
|
| 83 |
-
|
|
|
|
|
|
|
| 84 |
TABLE OF CONTENTS:
|
| 85 |
{chr(10).join(['- ' + t for t in titles[:10]])}
|
| 86 |
|
| 87 |
CONTENT SAMPLE:
|
| 88 |
{context_sample}
|
| 89 |
-
|
| 90 |
-
Generate 5β7 smart, helpful, and professional questions that a user might ask about this document.
|
| 91 |
-
Keep them short (<20 words), clear, and well-formed.
|
| 92 |
"""
|
| 93 |
|
| 94 |
try:
|
|
@@ -105,7 +104,7 @@ def generate_dynamic_suggestions_from_toc(toc, chunks, doc_name="Document"):
|
|
| 105 |
return [
|
| 106 |
"What is this document about?",
|
| 107 |
"How do I start using this process?",
|
| 108 |
-
"What
|
| 109 |
]
|
| 110 |
|
| 111 |
# ==========================================================
|
|
@@ -149,20 +148,23 @@ with st.sidebar:
|
|
| 149 |
st.caption("β¨ Built by Shubham Sharma")
|
| 150 |
|
| 151 |
# ==========================================================
|
| 152 |
-
# π§Ύ Document Handling
|
| 153 |
# ==========================================================
|
| 154 |
text, chunks, index, embeddings, toc = None, None, None, None, None
|
| 155 |
|
| 156 |
-
# --- Initialize
|
| 157 |
for key, default in {
|
| 158 |
"show_more": False,
|
| 159 |
"user_query_input": "",
|
| 160 |
"selected_suggestion": None,
|
|
|
|
| 161 |
}.items():
|
| 162 |
if key not in st.session_state:
|
| 163 |
st.session_state[key] = default
|
| 164 |
|
| 165 |
-
#
|
|
|
|
|
|
|
| 166 |
if doc_choice == "-- Select --":
|
| 167 |
st.info("β¬
οΈ Please choose a document from the sidebar to begin.")
|
| 168 |
else:
|
|
@@ -184,7 +186,17 @@ else:
|
|
| 184 |
text, toc = extract_text_from_pdf(temp_path)
|
| 185 |
chunks = chunk_text(text, chunk_size=chunk_size)
|
| 186 |
st.markdown("β
Document loaded successfully.")
|
| 187 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
|
| 189 |
with st.spinner("βοΈ Preparing search index..."):
|
| 190 |
embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
|
|
@@ -192,7 +204,7 @@ else:
|
|
| 192 |
st.markdown("π Document ready β you can now ask questions below.")
|
| 193 |
|
| 194 |
# ----------------------------------------------------------
|
| 195 |
-
# π¬ Ask a Question
|
| 196 |
# ----------------------------------------------------------
|
| 197 |
st.markdown("## π€ Ask a Question")
|
| 198 |
|
|
@@ -204,25 +216,29 @@ else:
|
|
| 204 |
|
| 205 |
for i, q in enumerate(visible):
|
| 206 |
col = cols[i % 3]
|
| 207 |
-
if col.button(f"π {q}", key=f"
|
| 208 |
-
st.session_state.
|
| 209 |
-
st.
|
| 210 |
|
| 211 |
toggle_text = "Show less β²" if st.session_state.show_more else "Show more βΌ"
|
| 212 |
if st.button(toggle_text, key="toggle_show_more"):
|
| 213 |
st.session_state.show_more = not st.session_state.show_more
|
| 214 |
st.experimental_rerun()
|
| 215 |
|
| 216 |
-
# ---
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
user_query = st.text_input(
|
| 218 |
"Type your question or pick one above:",
|
| 219 |
value=st.session_state.user_query_input,
|
| 220 |
key="user_query_input",
|
| 221 |
)
|
| 222 |
|
| 223 |
-
# --- Answer Generation ---
|
| 224 |
if user_query.strip():
|
| 225 |
st.caption("Mode: π§ Reasoning" if st.session_state.reasoning_mode else "Mode: π Strict Document")
|
|
|
|
| 226 |
with st.spinner("π Analyzing your document..."):
|
| 227 |
retrieved = retrieve_chunks(user_query, index, chunks, top_k=top_k, embeddings=embeddings)
|
| 228 |
answer = generate_answer(user_query, retrieved, reasoning_mode=st.session_state.reasoning_mode)
|
|
@@ -234,7 +250,9 @@ else:
|
|
| 234 |
for i, r in enumerate(retrieved, start=1):
|
| 235 |
st.markdown(f"**Chunk {i}:** {r}")
|
| 236 |
|
| 237 |
-
#
|
|
|
|
|
|
|
| 238 |
if chunks:
|
| 239 |
st.markdown("---")
|
| 240 |
st.subheader("π Document Preview")
|
|
|
|
| 63 |
from qa import retrieve_chunks, generate_answer, cache_embeddings, embed_chunks, genai_generate
|
| 64 |
|
| 65 |
# ==========================================================
|
| 66 |
+
# π§ Smart Suggestion Generator
|
| 67 |
# ==========================================================
|
| 68 |
def generate_dynamic_suggestions_from_toc(toc, chunks, doc_name="Document"):
|
| 69 |
+
"""Generate natural-sounding, concise professional questions from TOC + context."""
|
| 70 |
if not toc or not chunks:
|
| 71 |
return []
|
| 72 |
|
|
|
|
| 79 |
|
| 80 |
context_sample = " ".join(chunks[:3])[:4000]
|
| 81 |
prompt = f"""
|
| 82 |
+
You are an enterprise documentation assistant.
|
| 83 |
+
Generate 5β7 professional, relevant questions a user might ask about this document: "{doc_name}".
|
| 84 |
+
Use the table of contents and content sample below.
|
| 85 |
+
|
| 86 |
TABLE OF CONTENTS:
|
| 87 |
{chr(10).join(['- ' + t for t in titles[:10]])}
|
| 88 |
|
| 89 |
CONTENT SAMPLE:
|
| 90 |
{context_sample}
|
|
|
|
|
|
|
|
|
|
| 91 |
"""
|
| 92 |
|
| 93 |
try:
|
|
|
|
| 104 |
return [
|
| 105 |
"What is this document about?",
|
| 106 |
"How do I start using this process?",
|
| 107 |
+
"What configurations are needed?",
|
| 108 |
]
|
| 109 |
|
| 110 |
# ==========================================================
|
|
|
|
| 148 |
st.caption("β¨ Built by Shubham Sharma")
|
| 149 |
|
| 150 |
# ==========================================================
|
| 151 |
+
# π§Ύ Document Handling
|
| 152 |
# ==========================================================
|
| 153 |
text, chunks, index, embeddings, toc = None, None, None, None, None
|
| 154 |
|
| 155 |
+
# --- Initialize session state ---
|
| 156 |
for key, default in {
|
| 157 |
"show_more": False,
|
| 158 |
"user_query_input": "",
|
| 159 |
"selected_suggestion": None,
|
| 160 |
+
"pending_query": None,
|
| 161 |
}.items():
|
| 162 |
if key not in st.session_state:
|
| 163 |
st.session_state[key] = default
|
| 164 |
|
| 165 |
+
# ----------------------------------------------------------
|
| 166 |
+
# π Document Handling + Ask a Question
|
| 167 |
+
# ----------------------------------------------------------
|
| 168 |
if doc_choice == "-- Select --":
|
| 169 |
st.info("β¬
οΈ Please choose a document from the sidebar to begin.")
|
| 170 |
else:
|
|
|
|
| 186 |
text, toc = extract_text_from_pdf(temp_path)
|
| 187 |
chunks = chunk_text(text, chunk_size=chunk_size)
|
| 188 |
st.markdown("β
Document loaded successfully.")
|
| 189 |
+
|
| 190 |
+
if toc:
|
| 191 |
+
st.markdown("### π§ Table of Contents Detected")
|
| 192 |
+
toc_text = "\n".join([f"{sec}. {title}" for sec, title in toc])
|
| 193 |
+
st.text_area("TOC Preview", toc_text, height=180)
|
| 194 |
+
query_suggestions = generate_dynamic_suggestions_from_toc(toc, chunks, os.path.basename(temp_path))
|
| 195 |
+
else:
|
| 196 |
+
query_suggestions = [
|
| 197 |
+
"What is this document about?",
|
| 198 |
+
"How do I start using this process?",
|
| 199 |
+
]
|
| 200 |
|
| 201 |
with st.spinner("βοΈ Preparing search index..."):
|
| 202 |
embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
|
|
|
|
| 204 |
st.markdown("π Document ready β you can now ask questions below.")
|
| 205 |
|
| 206 |
# ----------------------------------------------------------
|
| 207 |
+
# π¬ Ask a Question (Perfect Suggestion Sync)
|
| 208 |
# ----------------------------------------------------------
|
| 209 |
st.markdown("## π€ Ask a Question")
|
| 210 |
|
|
|
|
| 216 |
|
| 217 |
for i, q in enumerate(visible):
|
| 218 |
col = cols[i % 3]
|
| 219 |
+
if col.button(f"π {q}", key=f"suggest_{i}"):
|
| 220 |
+
st.session_state.pending_query = q
|
| 221 |
+
st.experimental_rerun()
|
| 222 |
|
| 223 |
toggle_text = "Show less β²" if st.session_state.show_more else "Show more βΌ"
|
| 224 |
if st.button(toggle_text, key="toggle_show_more"):
|
| 225 |
st.session_state.show_more = not st.session_state.show_more
|
| 226 |
st.experimental_rerun()
|
| 227 |
|
| 228 |
+
# --- Handle query sync ---
|
| 229 |
+
if st.session_state.pending_query:
|
| 230 |
+
st.session_state.user_query_input = st.session_state.pending_query
|
| 231 |
+
st.session_state.pending_query = None
|
| 232 |
+
|
| 233 |
user_query = st.text_input(
|
| 234 |
"Type your question or pick one above:",
|
| 235 |
value=st.session_state.user_query_input,
|
| 236 |
key="user_query_input",
|
| 237 |
)
|
| 238 |
|
|
|
|
| 239 |
if user_query.strip():
|
| 240 |
st.caption("Mode: π§ Reasoning" if st.session_state.reasoning_mode else "Mode: π Strict Document")
|
| 241 |
+
|
| 242 |
with st.spinner("π Analyzing your document..."):
|
| 243 |
retrieved = retrieve_chunks(user_query, index, chunks, top_k=top_k, embeddings=embeddings)
|
| 244 |
answer = generate_answer(user_query, retrieved, reasoning_mode=st.session_state.reasoning_mode)
|
|
|
|
| 250 |
for i, r in enumerate(retrieved, start=1):
|
| 251 |
st.markdown(f"**Chunk {i}:** {r}")
|
| 252 |
|
| 253 |
+
# ----------------------------------------------------------
|
| 254 |
+
# π Document Preview
|
| 255 |
+
# ----------------------------------------------------------
|
| 256 |
if chunks:
|
| 257 |
st.markdown("---")
|
| 258 |
st.subheader("π Document Preview")
|