Spaces:

Shubham170793
/

enterprise-knowledge-assistant

Sleeping

App Files Files Community

Shubham170793 commited on Oct 18

Commit

abee1e5

verified ·

1 Parent(s): 6d87461

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +13 -3

src/streamlit_app.py CHANGED Viewed

@@ -145,7 +145,7 @@ with st.sidebar:
 # ==========================================================
 # 🧾 Document Handling
 # ==========================================================
-text, chunks, index, embeddings = None, None, None, None
 if doc_choice == "-- Select --":
     st.info("⬅️ Please choose a document from the sidebar.")
@@ -155,10 +155,15 @@ elif doc_choice == "Sample PDF":
     st.success("📘 Using built-in Sample PDF")
     with st.spinner("🔍 Extracting and processing document..."):
-        text = extract_text_from_pdf(temp_path)
         chunks = chunk_text(text, chunk_size=chunk_size)
         st.write(f"📑 Extracted {len(chunks)} chunks.")
     # ✅ Cached Embeddings
     with st.spinner("⚙️ Loading cached embeddings or generating new ones..."):
         embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
@@ -180,10 +185,15 @@ elif doc_choice == "Upload Custom PDF":
         st.success(f"✅ File '{uploaded_file.name}' uploaded successfully")
         with st.spinner("⚙️ Extracting and processing your document..."):
-            text = extract_text_from_pdf(temp_path)
             chunks = chunk_text(text, chunk_size=chunk_size)
             st.write(f"📄 Extracted {len(chunks)} chunks.")
         with st.spinner("⚙️ Loading cached embeddings or generating new ones..."):
             embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
             hash_name = hashlib.md5(os.path.basename(temp_path).encode()).hexdigest()

 # ==========================================================
 # 🧾 Document Handling
 # ==========================================================
+text, chunks, index, embeddings, toc = None, None, None, None, None
 if doc_choice == "-- Select --":
     st.info("⬅️ Please choose a document from the sidebar.")
     st.success("📘 Using built-in Sample PDF")
     with st.spinner("🔍 Extracting and processing document..."):
+        text, toc = extract_text_from_pdf(temp_path)
         chunks = chunk_text(text, chunk_size=chunk_size)
         st.write(f"📑 Extracted {len(chunks)} chunks.")
+        if toc:
+            st.markdown("### 🧭 Detected Table of Contents")
+            toc_text = "\n".join([f"{sec}. {title}" for sec, title in toc])
+            st.text_area("TOC Preview", toc_text, height=200)
     # ✅ Cached Embeddings
     with st.spinner("⚙️ Loading cached embeddings or generating new ones..."):
         embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
         st.success(f"✅ File '{uploaded_file.name}' uploaded successfully")
         with st.spinner("⚙️ Extracting and processing your document..."):
+            text, toc = extract_text_from_pdf(temp_path)
             chunks = chunk_text(text, chunk_size=chunk_size)
             st.write(f"📄 Extracted {len(chunks)} chunks.")
+            if toc:
+                st.markdown("### 🧭 Detected Table of Contents")
+                toc_text = "\n".join([f"{sec}. {title}" for sec, title in toc])
+                st.text_area("TOC Preview", toc_text, height=200)
         with st.spinner("⚙️ Loading cached embeddings or generating new ones..."):
             embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
             hash_name = hashlib.md5(os.path.basename(temp_path).encode()).hexdigest()