Shubham170793 commited on
Commit
abee1e5
Β·
verified Β·
1 Parent(s): 6d87461

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +13 -3
src/streamlit_app.py CHANGED
@@ -145,7 +145,7 @@ with st.sidebar:
145
  # ==========================================================
146
  # 🧾 Document Handling
147
  # ==========================================================
148
- text, chunks, index, embeddings = None, None, None, None
149
 
150
  if doc_choice == "-- Select --":
151
  st.info("⬅️ Please choose a document from the sidebar.")
@@ -155,10 +155,15 @@ elif doc_choice == "Sample PDF":
155
  st.success("πŸ“˜ Using built-in Sample PDF")
156
 
157
  with st.spinner("πŸ” Extracting and processing document..."):
158
- text = extract_text_from_pdf(temp_path)
159
  chunks = chunk_text(text, chunk_size=chunk_size)
160
  st.write(f"πŸ“‘ Extracted {len(chunks)} chunks.")
161
 
 
 
 
 
 
162
  # βœ… Cached Embeddings
163
  with st.spinner("βš™οΈ Loading cached embeddings or generating new ones..."):
164
  embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
@@ -180,10 +185,15 @@ elif doc_choice == "Upload Custom PDF":
180
  st.success(f"βœ… File '{uploaded_file.name}' uploaded successfully")
181
 
182
  with st.spinner("βš™οΈ Extracting and processing your document..."):
183
- text = extract_text_from_pdf(temp_path)
184
  chunks = chunk_text(text, chunk_size=chunk_size)
185
  st.write(f"πŸ“„ Extracted {len(chunks)} chunks.")
186
 
 
 
 
 
 
187
  with st.spinner("βš™οΈ Loading cached embeddings or generating new ones..."):
188
  embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
189
  hash_name = hashlib.md5(os.path.basename(temp_path).encode()).hexdigest()
 
145
  # ==========================================================
146
  # 🧾 Document Handling
147
  # ==========================================================
148
+ text, chunks, index, embeddings, toc = None, None, None, None, None
149
 
150
  if doc_choice == "-- Select --":
151
  st.info("⬅️ Please choose a document from the sidebar.")
 
155
  st.success("πŸ“˜ Using built-in Sample PDF")
156
 
157
  with st.spinner("πŸ” Extracting and processing document..."):
158
+ text, toc = extract_text_from_pdf(temp_path)
159
  chunks = chunk_text(text, chunk_size=chunk_size)
160
  st.write(f"πŸ“‘ Extracted {len(chunks)} chunks.")
161
 
162
+ if toc:
163
+ st.markdown("### 🧭 Detected Table of Contents")
164
+ toc_text = "\n".join([f"{sec}. {title}" for sec, title in toc])
165
+ st.text_area("TOC Preview", toc_text, height=200)
166
+
167
  # βœ… Cached Embeddings
168
  with st.spinner("βš™οΈ Loading cached embeddings or generating new ones..."):
169
  embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
 
185
  st.success(f"βœ… File '{uploaded_file.name}' uploaded successfully")
186
 
187
  with st.spinner("βš™οΈ Extracting and processing your document..."):
188
+ text, toc = extract_text_from_pdf(temp_path)
189
  chunks = chunk_text(text, chunk_size=chunk_size)
190
  st.write(f"πŸ“„ Extracted {len(chunks)} chunks.")
191
 
192
+ if toc:
193
+ st.markdown("### 🧭 Detected Table of Contents")
194
+ toc_text = "\n".join([f"{sec}. {title}" for sec, title in toc])
195
+ st.text_area("TOC Preview", toc_text, height=200)
196
+
197
  with st.spinner("βš™οΈ Loading cached embeddings or generating new ones..."):
198
  embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
199
  hash_name = hashlib.md5(os.path.basename(temp_path).encode()).hexdigest()