Update src/streamlit_app.py
Browse files- src/streamlit_app.py +72 -1
src/streamlit_app.py
CHANGED
|
@@ -90,6 +90,54 @@ from ingestion import extract_text_from_pdf, chunk_text
|
|
| 90 |
from vectorstore import build_faiss_index
|
| 91 |
from qa import retrieve_chunks, generate_answer, cache_embeddings, embed_chunks
|
| 92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
# ==========================================================
|
| 94 |
# π Paths
|
| 95 |
# ==========================================================
|
|
@@ -164,6 +212,16 @@ elif doc_choice == "Sample PDF":
|
|
| 164 |
toc_text = "\n".join([f"{sec}. {title}" for sec, title in toc])
|
| 165 |
st.text_area("TOC Preview", toc_text, height=200)
|
| 166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
# β
Cached Embeddings
|
| 168 |
with st.spinner("βοΈ Loading cached embeddings or generating new ones..."):
|
| 169 |
embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
|
|
@@ -194,6 +252,16 @@ elif doc_choice == "Upload Custom PDF":
|
|
| 194 |
toc_text = "\n".join([f"{sec}. {title}" for sec, title in toc])
|
| 195 |
st.text_area("TOC Preview", toc_text, height=200)
|
| 196 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
with st.spinner("βοΈ Loading cached embeddings or generating new ones..."):
|
| 198 |
embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
|
| 199 |
hash_name = hashlib.md5(os.path.basename(temp_path).encode()).hexdigest()
|
|
@@ -222,7 +290,10 @@ if index and chunks:
|
|
| 222 |
st.markdown("---")
|
| 223 |
st.subheader("π€ Ask a Question")
|
| 224 |
|
| 225 |
-
user_query = st.text_input(
|
|
|
|
|
|
|
|
|
|
| 226 |
|
| 227 |
if user_query:
|
| 228 |
mode_label = (
|
|
|
|
| 90 |
from vectorstore import build_faiss_index
|
| 91 |
from qa import retrieve_chunks, generate_answer, cache_embeddings, embed_chunks
|
| 92 |
|
| 93 |
+
# ==========================================================
|
| 94 |
+
# π§ TOC-Based Smart Question Generator
|
| 95 |
+
# ==========================================================
|
| 96 |
+
def clean_toc_titles(toc):
|
| 97 |
+
"""Removes section numbers and keeps only meaningful text."""
|
| 98 |
+
clean_titles = []
|
| 99 |
+
for _, title in toc:
|
| 100 |
+
title = re.sub(r"^\d+(\.\d+)*\s*", "", title) # remove numbering like 3.1
|
| 101 |
+
title = title.strip()
|
| 102 |
+
if len(title) > 3:
|
| 103 |
+
clean_titles.append(title)
|
| 104 |
+
return clean_titles
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
def generate_query_suggestions(toc_titles):
|
| 108 |
+
"""Converts section titles into conversational question suggestions."""
|
| 109 |
+
suggestions = []
|
| 110 |
+
for t in toc_titles:
|
| 111 |
+
lower = t.lower()
|
| 112 |
+
|
| 113 |
+
if "prerequisite" in lower:
|
| 114 |
+
suggestions.append("What are the prerequisites for setting this up?")
|
| 115 |
+
elif "restriction" in lower or "limitation" in lower:
|
| 116 |
+
suggestions.append("What are the key restrictions or limitations?")
|
| 117 |
+
elif "configuration" in lower or "setup" in lower:
|
| 118 |
+
suggestions.append(f"How do I {t.lower()}?")
|
| 119 |
+
elif "overview" in lower or "introduction" in lower:
|
| 120 |
+
suggestions.append("Can you give me an overview of this document?")
|
| 121 |
+
elif "purpose" in lower:
|
| 122 |
+
suggestions.append("What is the purpose of this guide?")
|
| 123 |
+
elif "example" in lower:
|
| 124 |
+
suggestions.append("Can you show an example from this document?")
|
| 125 |
+
elif "process" in lower:
|
| 126 |
+
suggestions.append(f"Can you explain the {t.lower()} process?")
|
| 127 |
+
elif "use" in lower:
|
| 128 |
+
suggestions.append(f"How do I {t.lower()}?")
|
| 129 |
+
else:
|
| 130 |
+
suggestions.append(f"Explain the section about {t.lower()}.")
|
| 131 |
+
|
| 132 |
+
# Deduplicate & limit
|
| 133 |
+
seen, final = set(), []
|
| 134 |
+
for s in suggestions:
|
| 135 |
+
if s not in seen:
|
| 136 |
+
seen.add(s)
|
| 137 |
+
final.append(s)
|
| 138 |
+
return final[:6] # Show top 6
|
| 139 |
+
|
| 140 |
+
|
| 141 |
# ==========================================================
|
| 142 |
# π Paths
|
| 143 |
# ==========================================================
|
|
|
|
| 212 |
toc_text = "\n".join([f"{sec}. {title}" for sec, title in toc])
|
| 213 |
st.text_area("TOC Preview", toc_text, height=200)
|
| 214 |
|
| 215 |
+
# π‘ Generate and display smart suggestions
|
| 216 |
+
clean_titles = clean_toc_titles(toc)
|
| 217 |
+
query_suggestions = generate_query_suggestions(clean_titles)
|
| 218 |
+
if query_suggestions:
|
| 219 |
+
st.markdown("#### π‘ Suggested Questions")
|
| 220 |
+
cols = st.columns(2)
|
| 221 |
+
for i, q in enumerate(query_suggestions):
|
| 222 |
+
if cols[i % 2].button(f"π {q}"):
|
| 223 |
+
st.session_state["user_query"] = q
|
| 224 |
+
|
| 225 |
# β
Cached Embeddings
|
| 226 |
with st.spinner("βοΈ Loading cached embeddings or generating new ones..."):
|
| 227 |
embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
|
|
|
|
| 252 |
toc_text = "\n".join([f"{sec}. {title}" for sec, title in toc])
|
| 253 |
st.text_area("TOC Preview", toc_text, height=200)
|
| 254 |
|
| 255 |
+
# π‘ Generate and display smart suggestions
|
| 256 |
+
clean_titles = clean_toc_titles(toc)
|
| 257 |
+
query_suggestions = generate_query_suggestions(clean_titles)
|
| 258 |
+
if query_suggestions:
|
| 259 |
+
st.markdown("#### π‘ Suggested Questions")
|
| 260 |
+
cols = st.columns(2)
|
| 261 |
+
for i, q in enumerate(query_suggestions):
|
| 262 |
+
if cols[i % 2].button(f"π {q}"):
|
| 263 |
+
st.session_state["user_query"] = q
|
| 264 |
+
|
| 265 |
with st.spinner("βοΈ Loading cached embeddings or generating new ones..."):
|
| 266 |
embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
|
| 267 |
hash_name = hashlib.md5(os.path.basename(temp_path).encode()).hexdigest()
|
|
|
|
| 290 |
st.markdown("---")
|
| 291 |
st.subheader("π€ Ask a Question")
|
| 292 |
|
| 293 |
+
user_query = st.text_input(
|
| 294 |
+
"π Your question about the document:",
|
| 295 |
+
value=st.session_state.get("user_query", "")
|
| 296 |
+
)
|
| 297 |
|
| 298 |
if user_query:
|
| 299 |
mode_label = (
|