Spaces:

Shubham170793
/

enterprise-knowledge-assistant

Sleeping

App Files Files Community

Shubham170793 commited on Oct 18

Commit

b8ced0e

verified ·

1 Parent(s): c8ee8ff

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +262 -174

src/streamlit_app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import os
 import re
 import shutil
@@ -5,34 +6,49 @@ import streamlit as st
 import torch
 # ==========================================================
-# ✅ Environment Setup
 # ==========================================================
-st.set_page_config(page_title="Enterprise Knowledge Assistant", layout="wide")
 print("CUDA available:", torch.cuda.is_available())
 # ==========================================================
-# ⚙️ Hugging Face Cache Setup
 # ==========================================================
 CACHE_DIR = "/tmp/hf_cache"
 os.makedirs(CACHE_DIR, exist_ok=True)
-os.environ.update({
-    "HF_HOME": CACHE_DIR,
-    "TRANSFORMERS_CACHE": CACHE_DIR,
-    "HF_DATASETS_CACHE": CACHE_DIR,
-    "HF_MODULES_CACHE": CACHE_DIR
-})
 # ==========================================================
-# 📦 Imports
 # ==========================================================
 from ingestion import extract_text_from_pdf, chunk_text
 from vectorstore import build_faiss_index
 from qa import retrieve_chunks, generate_answer, cache_embeddings, embed_chunks, genai_generate
 # ==========================================================
-# 🧠 Suggestion Generator
 # ==========================================================
 def generate_dynamic_suggestions_from_toc(toc, chunks, doc_name="Document"):
     if not toc or not chunks:
         return []
     titles = []
@@ -44,16 +60,17 @@ def generate_dynamic_suggestions_from_toc(toc, chunks, doc_name="Document"):
     context_sample = " ".join(chunks[:3])[:4000]
     prompt = f"""
-    You are generating concise, document-specific questions for "{doc_name}".
-    Use this TOC and sample text as context.
-    TOC:
     {chr(10).join(['- ' + t for t in titles[:8]])}
     TEXT SAMPLE:
     {context_sample}
-    Generate 5–7 short, document-based questions under 18 words.
     """
     try:
@@ -67,187 +84,258 @@ def generate_dynamic_suggestions_from_toc(toc, chunks, doc_name="Document"):
                 final.append(q)
         return final[:7]
     except Exception:
-        return ["What is this document about?", "How do I use this guide?"]
-# ==========================================================
-# 🎨 Styling
-# ==========================================================
-st.markdown("""
-<style>
-div.block-container { padding-top: 1rem; max-width: 1100px; }
-h2, h3, h4 { color: #f3f4f6; font-weight: 600; }
-h3 { font-size: 1.1rem; margin-bottom: 0.4rem; }
-.status-line {
-    background: #0f172a;
-    border-left: 4px solid #10b981;
-    border-radius: 6px;
-    padding: 8px 14px;
-    color: #d1fae5;
-    margin-bottom: 0.6rem;
-}
-.suggest-chip {
-    background-color: #111827;
-    border: 1px solid #272b33;
-    border-radius: 14px;
-    color: #cbd5e1;
-    padding: 5px 10px;
-    cursor: pointer;
-    font-size: 12.5px;
-    transition: all 0.2s ease-in-out;
-    margin: 3px 3px 3px 0;
-    display: inline-block;
-}
-.suggest-chip:hover {
-    background-color: #2563eb;
-    border-color: #3b82f6;
-    color: white;
-    box-shadow: 0 0 6px rgba(59,130,246,0.4);
-}
-.stTextInput > div > div > input {
-    background-color: #0f172a;
-    color: #f1f5f9;
-    border-radius: 6px;
-    border: 1px solid #334155;
-    padding: 8px 12px;
-    font-size: 14px;
-}
-.answer-box {
-    background: linear-gradient(135deg, #0f172a, #1e293b);
-    border-left: 4px solid #3b82f6;
-    border-radius: 8px;
-    padding: 16px 18px;
-    color: #f1f5f9;
-    margin-top: 0.5rem;
-}
-section.ask-block { margin-top: 0.5rem; margin-bottom: 0.2rem; }
-section.answer-block { margin-top: 0.2rem; }
-</style>
-""", unsafe_allow_html=True)
-# ==========================================================
-# 🧭 Sidebar (simplified)
 # ==========================================================
-with st.sidebar:
-    st.markdown("### 🧠 Response Mode")
-    mode = st.radio("", ["Strict (Document-only)", "Extended (Document + general)"], index=0)
-    reasoning_mode = mode.startswith("Extended")
-    st.caption("Strict = answers only from the document. Extended = may include helpful general info.")
-    with st.expander("⚙️ Advanced Settings (for developers)", expanded=False):
-        chunk_size = st.slider("Chunk Size", 200, 1500, 1000, step=50)
-        overlap = st.slider("Chunk Overlap", 50, 200, 120, step=10)
-        top_k = st.slider("Top K Results", 1, 10, 5)
     st.markdown("---")
     st.caption("✨ Built by Shubham Sharma")
 # ==========================================================
-# 📄 State Initialization
 # ==========================================================
-for key, default in {
-    "user_query_input": "",
-    "selected_suggestion": None,
-    "show_more": False,
-    "last_doc": None,
-    "query_suggestions": [],
-}.items():
-    if key not in st.session_state:
-        st.session_state[key] = default
-def select_suggestion(q):
-    st.session_state.user_query_input = q
-    st.session_state.selected_suggestion = q
 # ==========================================================
-# 📘 Main Layout
 # ==========================================================
-st.title("Enterprise Knowledge Assistant")
-st.caption("Query SAP documentation and enterprise PDFs — powered by reasoning and retrieval.")
-doc_choice = st.radio("Select a document:", ["-- Select --", "Sample PDF", "Upload Custom PDF"], index=0)
-text, chunks, index, embeddings, toc = None, None, None, None, None
 if doc_choice == "-- Select --":
-    st.info("⬅️ Select a document to begin.")
 else:
     if doc_choice == "Sample PDF":
         temp_path = os.path.join(os.path.dirname(__file__), "sample.pdf")
-        st.markdown("<div class='status-line'>📘 Using built-in Sample PDF — ready to query below.</div>", unsafe_allow_html=True)
     else:
         uploaded_file = st.file_uploader("📂 Upload your PDF", type="pdf")
         if uploaded_file:
             temp_path = os.path.join("/tmp", uploaded_file.name)
             with open(temp_path, "wb") as f:
                 f.write(uploaded_file.getbuffer())
-            st.markdown(f"<div class='status-line'>✅ '{uploaded_file.name}' uploaded successfully — ready to query below.</div>", unsafe_allow_html=True)
-        else:
-            temp_path = None
-    if temp_path:
-        with st.spinner("🔍 Processing your document..."):
-            text, toc = extract_text_from_pdf(temp_path)
-            chunks = chunk_text(text, chunk_size=chunk_size)
-            # ✅ Only generate suggestions once per document
-            if st.session_state.get("last_doc") != os.path.basename(temp_path):
-                st.session_state["query_suggestions"] = generate_dynamic_suggestions_from_toc(
-                    toc, chunks, os.path.basename(temp_path)
-                )
-                st.session_state["last_doc"] = os.path.basename(temp_path)
-            query_suggestions = st.session_state["query_suggestions"]
-        with st.spinner("⚙️ Building FAISS index..."):
-            embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
-            index = build_faiss_index(embeddings)
-        # ----------------------------------------------------------
-        # 💬 Ask a Question
-        # ----------------------------------------------------------
-        st.markdown("<section class='ask-block'>", unsafe_allow_html=True)
-        st.subheader("Ask the Assistant")
-        if query_suggestions:
-            visible = query_suggestions if st.session_state.show_more else query_suggestions[:3]
-            cols = st.columns(min(3, len(visible)))
-            for i, q in enumerate(visible):
-                if cols[i % 3].button(f"🔍 {q}", key=f"sugg_{i}", on_click=select_suggestion, args=(q,)):
-                    pass
-            toggle_text = "Show all sample questions ▲" if st.session_state.show_more else "Show all sample questions ▼"
-            if st.button(toggle_text, key="toggle_sugg"):
-                st.session_state.show_more = not st.session_state.show_more
-                st.experimental_rerun()
-        user_query = st.text_input("Type your question or click one above:", key="user_query_input")
-        st.markdown("</section>", unsafe_allow_html=True)
-        # ----------------------------------------------------------
-        # 🤖 Assistant
-        # ----------------------------------------------------------
-        if user_query.strip():
-            with st.spinner("💭 Generating response..."):
-                retrieved = retrieve_chunks(user_query, index, chunks, top_k=top_k, embeddings=embeddings)
-                answer = generate_answer(user_query, retrieved, reasoning_mode=reasoning_mode)
-            st.markdown("<section class='answer-block'>", unsafe_allow_html=True)
-            st.subheader("Assistant")
-            st.markdown(f"<div class='answer-box'>💡 {answer}</div>", unsafe_allow_html=True)
-            st.markdown("</section>", unsafe_allow_html=True)
-            with st.expander("🧩 See Source Passages"):
-                for i, r in enumerate(retrieved, start=1):
-                    st.markdown(f"**Chunk {i}:** {r}")
-        # ----------------------------------------------------------
-        # 📚 Explore Document
-        # ----------------------------------------------------------
-        with st.expander("📖 View Original Document Content"):
-            if toc:
-                st.markdown("**Table of Contents**")
                 toc_text = "\n".join([f"{sec}. {title}" for sec, title in toc])
-                st.text_area("", toc_text, height=150)
-            if chunks:
-                st.markdown("**Extracted Text Preview**")
-                st.text_area("", text[:1000], height=150)
-                st.caption(f"{len(chunks)} chunks processed.")

+# streamlit_app.py
 import os
 import re
 import shutil
 import torch
 # ==========================================================
+# ✅ Environment Diagnostics
 # ==========================================================
 print("CUDA available:", torch.cuda.is_available())
+if torch.cuda.is_available():
+    try:
+        print("GPU:", torch.cuda.get_device_name(0))
+    except Exception:
+        pass
+else:
+    print("Running on CPU")
+# ==========================================================
+# ✅ Page Configuration
+# ==========================================================
+st.set_page_config(page_title="Enterprise Knowledge Assistant", layout="wide")
 # ==========================================================
+# ⚙️ Hugging Face Cache Configuration (non-destructive)
 # ==========================================================
 CACHE_DIR = "/tmp/hf_cache"
 os.makedirs(CACHE_DIR, exist_ok=True)
+os.environ.update(
+    {
+        "HF_HOME": CACHE_DIR,
+        "TRANSFORMERS_CACHE": CACHE_DIR,
+        "HF_DATASETS_CACHE": CACHE_DIR,
+        "HF_MODULES_CACHE": CACHE_DIR,
+    }
+)
 # ==========================================================
+# 📦 Imports AFTER environment setup (your modules)
 # ==========================================================
+# These should be your existing modules — unchanged
 from ingestion import extract_text_from_pdf, chunk_text
 from vectorstore import build_faiss_index
 from qa import retrieve_chunks, generate_answer, cache_embeddings, embed_chunks, genai_generate
 # ==========================================================
+# 🧠 Smart suggestion generator (TOC-based; unchanged semantics)
 # ==========================================================
 def generate_dynamic_suggestions_from_toc(toc, chunks, doc_name="Document"):
+    """Generate short, doc-focused suggestion questions from a TOC"""
     if not toc or not chunks:
         return []
     titles = []
     context_sample = " ".join(chunks[:3])[:4000]
     prompt = f"""
+    You are generating concise, context-aware questions based on the document "{doc_name}".
+    Use this Table of Contents and sample content for inspiration.
+    TABLE OF CONTENTS:
     {chr(10).join(['- ' + t for t in titles[:8]])}
     TEXT SAMPLE:
     {context_sample}
+    Generate 5–7 questions that are short, relevant, and strictly document-based.
+    Each question should be under 18 words.
     """
     try:
                 final.append(q)
         return final[:7]
     except Exception:
+        # safe fallback (minimal, doc-driven)
+        return ["What is this document about?", "How do I start using this process?"]
 # ==========================================================
+# 🎨 Global CSS / UI polish (no functional changes)
+# ==========================================================
+st.markdown(
+    """
+    <style>
+    /* container width and heading style */
+    div.block-container { padding-top:1.2rem; max-width:1100px; }
+    h1 { font-weight:700; color: #f8fafc; }
+    h2, h3 { color:#f1f5f9 }
+    /* Upload / success card */
+    .upload-card {
+        background: linear-gradient(90deg,#0f1724,#0b1220);
+        border-radius:10px;
+        padding:12px 16px;
+        color:#e6eef8;
+        border:1px solid rgba(59,130,246,0.06);
+        margin-top:10px;
+    }
+    /* Suggestion chips */
+    .suggest-chip {
+        background-color: #0f1724;
+        border: 1px solid #374151;
+        border-radius: 14px;
+        color: #e6eef8;
+        padding: 8px 12px;
+        cursor: pointer;
+        font-size: 13px;
+        margin:6px 6px 10px 0;
+        display:inline-block;
+        transition: all 0.15s ease-in-out;
+        max-width: 360px;
+        text-align:left;
+    }
+    .suggest-chip:hover { transform: translateY(-2px); box-shadow: 0 6px 18px rgba(15,23,42,0.35); }
+    .suggest-chip.selected {
+        border-color: #3b82f6;
+        background: linear-gradient(90deg,#13325a,#0f2a4d);
+        color: #eaf2ff;
+        box-shadow: 0 8px 20px rgba(59,130,246,0.12);
+    }
+    /* Input styling */
+    .stTextInput > div > div > input {
+        background-color:#0b1220 !important;
+        color:#e6eef8 !important;
+        border-radius:6px !important;
+        border:1px solid #273244 !important;
+        padding:10px !important;
+    }
+    /* Answer box */
+    .answer-box {
+        background: linear-gradient(180deg,#0b1220,#071027);
+        border-left: 4px solid #3b82f6;
+        border-radius: 8px;
+        padding: 14px 16px;
+        color: #e6eef8;
+        margin-top: 12px;
+        box-shadow: 0 8px 30px rgba(2,6,23,0.6);
+    }
+    .answer-header {
+        font-weight:600; font-size:0.95rem; margin-bottom:8px; color:#dbeafe;
+    }
+    /* mini loader dots used in place of plain text spinner */
+    .dot-loader span { display:inline-block; width:6px; height:6px; margin:0 3px; background:#94a3b8; border-radius:50%; animation: dot 1s infinite linear; }
+    .dot-loader span:nth-child(2){ animation-delay:0.12s }
+    .dot-loader span:nth-child(3){ animation-delay:0.24s }
+    @keyframes dot {
+        0% { transform: translateY(0); opacity:0.3 }
+        50% { transform: translateY(-6px); opacity:1 }
+        100% { transform: translateY(0); opacity:0.3 }
+    }
+    /* subtle expander header styling */
+    .streamlit-expanderHeader { color:#e6eef8 !important; }
+    </style>
+    """,
+    unsafe_allow_html=True,
+)
+# ==========================================================
+# 🧭 Sidebar — keep concise settings for consumers
+# ==========================================================
+with st.sidebar:
+    st.markdown("### 🧭 Response Mode")
+    # default to Strict (document-only) for first-time users
+    mode = st.radio(
+        "",
+        ("Strict (Document-only)", "Extended (Document + general)"),
+        index=0,
+        help="Strict = answers only from the document. Extended = may include helpful general info.",
+    )
+    st.markdown("---")
+    if st.checkbox("Show advanced settings (for devs)", value=False):
+        st.markdown("### Developer Settings")
+        # keep internal knobs but hide by default
+        _chunk_size = st.slider("Chunk Size (chars)", 200, 1500, 1000, step=50)
+        _overlap = st.slider("Chunk overlap (chars)", 50, 300, 120, step=10)
+        _topk = st.slider("Top K Results", 1, 10, 5)
+    else:
+        # expose simple slider for users (but not all dev knobs)
+        _chunk_size = st.slider("Chunk Size", 200, 1500, 1000, step=50)
+        _overlap = 120
+        _topk = st.slider("Top K Results", 1, 10, 5)
     st.markdown("---")
     st.caption("✨ Built by Shubham Sharma")
 # ==========================================================
+# 🗂 Initialize session state keys (prevent widget warnings)
 # ==========================================================
+if "user_query_input" not in st.session_state:
+    st.session_state["user_query_input"] = ""
+if "show_more" not in st.session_state:
+    st.session_state["show_more"] = False
+if "selected_suggestion" not in st.session_state:
+    st.session_state["selected_suggestion"] = None
+if "last_doc_basename" not in st.session_state:
+    st.session_state["last_doc_basename"] = None
+# Helper: set query when a suggestion is clicked (keeps things simple)
+def handle_suggestion_click(q: str, idx: int):
+    st.session_state["user_query_input"] = q
+    st.session_state["selected_suggestion"] = idx
+    # Immediately rerun so user sees query filled and answer generated
+    st.experimental_rerun()
 # ==========================================================
+# 📄 Main application flow
 # ==========================================================
+st.title("📄 Enterprise Knowledge Assistant")
+st.caption("Query SAP documentation and enterprise PDFs — powered by reasoning + retrieval.")
+# Document select/upload
+doc_choice = st.radio(
+    "Select a document:",
+    ("-- Select --", "Sample PDF", "Upload Custom PDF"),
+    index=0,
+)
+temp_path = None
 if doc_choice == "-- Select --":
+    st.info("⬅️ Please select a document from above to begin.")
 else:
     if doc_choice == "Sample PDF":
         temp_path = os.path.join(os.path.dirname(__file__), "sample.pdf")
     else:
         uploaded_file = st.file_uploader("📂 Upload your PDF", type="pdf")
         if uploaded_file:
             temp_path = os.path.join("/tmp", uploaded_file.name)
             with open(temp_path, "wb") as f:
                 f.write(uploaded_file.getbuffer())
+            # success card
+            st.markdown(
+                f"<div class='upload-card'>✅ <b>{uploaded_file.name}</b> uploaded successfully — ready to query below.</div>",
+                unsafe_allow_html=True,
+            )
+# If sample chosen, show small card to confirm (non-intrusive)
+if doc_choice == "Sample PDF" and temp_path:
+    st.markdown(
+        "<div class='upload-card'>📘 Using built-in Sample PDF.</div>", unsafe_allow_html=True
+    )
+# Only proceed when we have a path
+if temp_path:
+    # Process & index (unchanged)
+    with st.spinner("🔍 Processing document..."):
+        text, toc = extract_text_from_pdf(temp_path)
+        chunks = chunk_text(text, chunk_size=_chunk_size if " _chunk_size" in locals() else 1000)
+    # Prepare embeddings & index (use caching as before)
+    with st.spinner("⚙️ Preparing search index..."):
+        embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
+        index = build_faiss_index(embeddings)
+    # Only generate suggestions once per uploaded document during a session
+    basename = os.path.basename(temp_path)
+    if st.session_state.get("last_doc_basename") != basename:
+        # generate suggestions
+        query_suggestions = generate_dynamic_suggestions_from_toc(toc, chunks, basename)
+        st.session_state["query_suggestions_fixed"] = query_suggestions
+        st.session_state["last_doc_basename"] = basename
+        st.session_state["selected_suggestion"] = None
+    else:
+        query_suggestions = st.session_state.get("query_suggestions_fixed", [])
+    # ----------------------------------------------------------
+    # 💬 Ask a Question UI
+    # ----------------------------------------------------------
+    st.markdown("### Ask the Assistant")
+    # Show suggestions as styled chips — fixed for the doc during session
+    if query_suggestions:
+        visible = query_suggestions if st.session_state["show_more"] else query_suggestions[:3]
+        # render chips inline in a single column
+        chip_container = st.container()
+        for i, q in enumerate(visible):
+            # create a simple html chip and a real button for the click behaviour
+            selected = st.session_state.get("selected_suggestion") == i
+            chip_class = "suggest-chip selected" if selected else "suggest-chip"
+            # create horizontal layout: use columns to space them evenly
+            cols = chip_container.columns(3)
+            col = cols[i % 3]
+            # button triggers state change (use the same label so users can also press the button)
+            if col.button(f"🔍 {q}", key=f"sugg_btn_{i}"):
+                handle_suggestion_click(q, i)
+            # render chip visually (non-interactive decoration)
+            col.markdown(f"<div class='{chip_class}'>{q}</div>", unsafe_allow_html=True)
+        # Show more / less toggle
+        toggle_text = "Show less ▲" if st.session_state["show_more"] else "Show more ▼"
+        if st.button(toggle_text, key="toggle_more_suggestions"):
+            st.session_state["show_more"] = not st.session_state["show_more"]
+            st.experimental_rerun()
+    # Type input — rely exclusively on session_state key (pre-initialized above) to avoid warnings
+    user_query = st.text_input("Type your question or click one above:", key="user_query_input")
+    # When a query is filled (either typed or from click), generate answer
+    if user_query and user_query.strip():
+        # show a friendly loader animation while generating (non-invasive)
+        st.markdown("<div class='dot-loader'><span></span><span></span><span></span></div>", unsafe_allow_html=True)
+        # retrieval + answer generation (same semantics as before)
+        retrieved = retrieve_chunks(user_query, index, chunks, top_k=_topk if "_topk" in locals() else _topk)
+        answer = generate_answer(user_query, retrieved, reasoning_mode=(mode.startswith("Extended")))
+        # answer card with header (UI only)
+        st.markdown("### Assistant")
+        st.markdown(
+            f"<div class='answer-box'><div class='answer-header'>Assistant’s Response</div>{answer}</div>",
+            unsafe_allow_html=True,
+        )
+        # supporting context expander (keeps collapsed by default)
+        with st.expander("📄 Supporting Context (source passages)"):
+            for i, r in enumerate(retrieved, start=1):
+                st.markdown(f"**Chunk {i}:** {r}")
+        # Optional helpful blocks (collapsed by default)
+        if toc:
+            with st.expander("📚 Table of Contents"):
                 toc_text = "\n".join([f"{sec}. {title}" for sec, title in toc])
+                st.text_area("", toc_text, height=140)
+        with st.expander("📄 Document Preview"):
+            st.text_area("", text[:1000], height=140)
+            st.caption(f"{len(chunks)} chunks processed.")