Spaces:

Shubham170793
/

enterprise-knowledge-assistant

Sleeping

App Files Files Community

Shubham170793 commited on Oct 18

Commit

f5088d3

verified ·

1 Parent(s): 499bde3

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +72 -1

src/streamlit_app.py CHANGED Viewed

@@ -90,6 +90,54 @@ from ingestion import extract_text_from_pdf, chunk_text
 from vectorstore import build_faiss_index
 from qa import retrieve_chunks, generate_answer, cache_embeddings, embed_chunks
 # ==========================================================
 # 📁 Paths
 # ==========================================================
@@ -164,6 +212,16 @@ elif doc_choice == "Sample PDF":
             toc_text = "\n".join([f"{sec}. {title}" for sec, title in toc])
             st.text_area("TOC Preview", toc_text, height=200)
     # ✅ Cached Embeddings
     with st.spinner("⚙️ Loading cached embeddings or generating new ones..."):
         embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
@@ -194,6 +252,16 @@ elif doc_choice == "Upload Custom PDF":
                 toc_text = "\n".join([f"{sec}. {title}" for sec, title in toc])
                 st.text_area("TOC Preview", toc_text, height=200)
         with st.spinner("⚙️ Loading cached embeddings or generating new ones..."):
             embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
             hash_name = hashlib.md5(os.path.basename(temp_path).encode()).hexdigest()
@@ -222,7 +290,10 @@ if index and chunks:
     st.markdown("---")
     st.subheader("🤖 Ask a Question")
-    user_query = st.text_input("🔍 Your question about the document:")
     if user_query:
         mode_label = (

 from vectorstore import build_faiss_index
 from qa import retrieve_chunks, generate_answer, cache_embeddings, embed_chunks
+# ==========================================================
+# 🧠 TOC-Based Smart Question Generator
+# ==========================================================
+def clean_toc_titles(toc):
+    """Removes section numbers and keeps only meaningful text."""
+    clean_titles = []
+    for _, title in toc:
+        title = re.sub(r"^\d+(\.\d+)*\s*", "", title)  # remove numbering like 3.1
+        title = title.strip()
+        if len(title) > 3:
+            clean_titles.append(title)
+    return clean_titles
+def generate_query_suggestions(toc_titles):
+    """Converts section titles into conversational question suggestions."""
+    suggestions = []
+    for t in toc_titles:
+        lower = t.lower()
+        if "prerequisite" in lower:
+            suggestions.append("What are the prerequisites for setting this up?")
+        elif "restriction" in lower or "limitation" in lower:
+            suggestions.append("What are the key restrictions or limitations?")
+        elif "configuration" in lower or "setup" in lower:
+            suggestions.append(f"How do I {t.lower()}?")
+        elif "overview" in lower or "introduction" in lower:
+            suggestions.append("Can you give me an overview of this document?")
+        elif "purpose" in lower:
+            suggestions.append("What is the purpose of this guide?")
+        elif "example" in lower:
+            suggestions.append("Can you show an example from this document?")
+        elif "process" in lower:
+            suggestions.append(f"Can you explain the {t.lower()} process?")
+        elif "use" in lower:
+            suggestions.append(f"How do I {t.lower()}?")
+        else:
+            suggestions.append(f"Explain the section about {t.lower()}.")
+    # Deduplicate & limit
+    seen, final = set(), []
+    for s in suggestions:
+        if s not in seen:
+            seen.add(s)
+            final.append(s)
+    return final[:6]  # Show top 6
 # ==========================================================
 # 📁 Paths
 # ==========================================================
             toc_text = "\n".join([f"{sec}. {title}" for sec, title in toc])
             st.text_area("TOC Preview", toc_text, height=200)
+            # 💡 Generate and display smart suggestions
+            clean_titles = clean_toc_titles(toc)
+            query_suggestions = generate_query_suggestions(clean_titles)
+            if query_suggestions:
+                st.markdown("#### 💡 Suggested Questions")
+                cols = st.columns(2)
+                for i, q in enumerate(query_suggestions):
+                    if cols[i % 2].button(f"🔍 {q}"):
+                        st.session_state["user_query"] = q
     # ✅ Cached Embeddings
     with st.spinner("⚙️ Loading cached embeddings or generating new ones..."):
         embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
                 toc_text = "\n".join([f"{sec}. {title}" for sec, title in toc])
                 st.text_area("TOC Preview", toc_text, height=200)
+                # 💡 Generate and display smart suggestions
+                clean_titles = clean_toc_titles(toc)
+                query_suggestions = generate_query_suggestions(clean_titles)
+                if query_suggestions:
+                    st.markdown("#### 💡 Suggested Questions")
+                    cols = st.columns(2)
+                    for i, q in enumerate(query_suggestions):
+                        if cols[i % 2].button(f"🔍 {q}"):
+                            st.session_state["user_query"] = q
         with st.spinner("⚙️ Loading cached embeddings or generating new ones..."):
             embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
             hash_name = hashlib.md5(os.path.basename(temp_path).encode()).hexdigest()
     st.markdown("---")
     st.subheader("🤖 Ask a Question")
+    user_query = st.text_input(
+        "🔍 Your question about the document:",
+        value=st.session_state.get("user_query", "")
+    )
     if user_query:
         mode_label = (