Update src/streamlit_app.py
Browse files- src/streamlit_app.py +13 -3
src/streamlit_app.py
CHANGED
|
@@ -145,7 +145,7 @@ with st.sidebar:
|
|
| 145 |
# ==========================================================
|
| 146 |
# π§Ύ Document Handling
|
| 147 |
# ==========================================================
|
| 148 |
-
text, chunks, index, embeddings = None, None, None, None
|
| 149 |
|
| 150 |
if doc_choice == "-- Select --":
|
| 151 |
st.info("β¬
οΈ Please choose a document from the sidebar.")
|
|
@@ -155,10 +155,15 @@ elif doc_choice == "Sample PDF":
|
|
| 155 |
st.success("π Using built-in Sample PDF")
|
| 156 |
|
| 157 |
with st.spinner("π Extracting and processing document..."):
|
| 158 |
-
text = extract_text_from_pdf(temp_path)
|
| 159 |
chunks = chunk_text(text, chunk_size=chunk_size)
|
| 160 |
st.write(f"π Extracted {len(chunks)} chunks.")
|
| 161 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
# β
Cached Embeddings
|
| 163 |
with st.spinner("βοΈ Loading cached embeddings or generating new ones..."):
|
| 164 |
embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
|
|
@@ -180,10 +185,15 @@ elif doc_choice == "Upload Custom PDF":
|
|
| 180 |
st.success(f"β
File '{uploaded_file.name}' uploaded successfully")
|
| 181 |
|
| 182 |
with st.spinner("βοΈ Extracting and processing your document..."):
|
| 183 |
-
text = extract_text_from_pdf(temp_path)
|
| 184 |
chunks = chunk_text(text, chunk_size=chunk_size)
|
| 185 |
st.write(f"π Extracted {len(chunks)} chunks.")
|
| 186 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
with st.spinner("βοΈ Loading cached embeddings or generating new ones..."):
|
| 188 |
embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
|
| 189 |
hash_name = hashlib.md5(os.path.basename(temp_path).encode()).hexdigest()
|
|
|
|
| 145 |
# ==========================================================
|
| 146 |
# π§Ύ Document Handling
|
| 147 |
# ==========================================================
|
| 148 |
+
text, chunks, index, embeddings, toc = None, None, None, None, None
|
| 149 |
|
| 150 |
if doc_choice == "-- Select --":
|
| 151 |
st.info("β¬
οΈ Please choose a document from the sidebar.")
|
|
|
|
| 155 |
st.success("π Using built-in Sample PDF")
|
| 156 |
|
| 157 |
with st.spinner("π Extracting and processing document..."):
|
| 158 |
+
text, toc = extract_text_from_pdf(temp_path)
|
| 159 |
chunks = chunk_text(text, chunk_size=chunk_size)
|
| 160 |
st.write(f"π Extracted {len(chunks)} chunks.")
|
| 161 |
|
| 162 |
+
if toc:
|
| 163 |
+
st.markdown("### π§ Detected Table of Contents")
|
| 164 |
+
toc_text = "\n".join([f"{sec}. {title}" for sec, title in toc])
|
| 165 |
+
st.text_area("TOC Preview", toc_text, height=200)
|
| 166 |
+
|
| 167 |
# β
Cached Embeddings
|
| 168 |
with st.spinner("βοΈ Loading cached embeddings or generating new ones..."):
|
| 169 |
embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
|
|
|
|
| 185 |
st.success(f"β
File '{uploaded_file.name}' uploaded successfully")
|
| 186 |
|
| 187 |
with st.spinner("βοΈ Extracting and processing your document..."):
|
| 188 |
+
text, toc = extract_text_from_pdf(temp_path)
|
| 189 |
chunks = chunk_text(text, chunk_size=chunk_size)
|
| 190 |
st.write(f"π Extracted {len(chunks)} chunks.")
|
| 191 |
|
| 192 |
+
if toc:
|
| 193 |
+
st.markdown("### π§ Detected Table of Contents")
|
| 194 |
+
toc_text = "\n".join([f"{sec}. {title}" for sec, title in toc])
|
| 195 |
+
st.text_area("TOC Preview", toc_text, height=200)
|
| 196 |
+
|
| 197 |
with st.spinner("βοΈ Loading cached embeddings or generating new ones..."):
|
| 198 |
embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
|
| 199 |
hash_name = hashlib.md5(os.path.basename(temp_path).encode()).hexdigest()
|