Spaces:

NavyDevilDoc
/

Semantic_Search

Sleeping

App Files Files Community

NavyDevilDoc commited on Dec 16, 2025

Commit

e546bbb

verified ·

1 Parent(s): e1daca2

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -34

app.py CHANGED Viewed

@@ -194,45 +194,52 @@ if 'engine' not in st.session_state:
     st.session_state.engine = DocSearchEngine()
 with st.sidebar:
-    st.header("🗄️ Upload Documents")
-    uploaded_files = st.file_uploader("Upload Files", accept_multiple_files=True)
-    if uploaded_files and st.button("Index"):
-        progress_bar = st.progress(0)
-        status_text = st.empty()
-        new_chunks = []
-        failed_files = []
-        total = len(uploaded_files)
-        for i, f in enumerate(uploaded_files):
-            status_text.text(f"Processing {i+1}/{total}: {f.name}...")
-            progress_bar.progress((i)/total)
-            # PARSE (With OCR Auto-Switch)
-            txt, fname, method = parse_file(f)
-            if method == "OCR (Slow)":
-                st.toast(f"OCR Used for {fname}", icon="⚠️")
-            if not txt.strip():
-                failed_files.append(f"{fname} (Empty/Unreadable)")
-                continue
-            file_chunks = recursive_chunking(txt, fname)
-            new_chunks.extend(file_chunks)
-        progress_bar.progress(1.0)
-        if new_chunks:
-            with st.spinner("Saving database..."):
-                st.session_state.engine.add_documents(new_chunks)
-                IndexManager.save_to_hub()
-            st.success(f"Indexed {len(new_chunks)} chunks!")
-        if failed_files:
-            with st.expander("Failed Files"):
-                for ff in failed_files: st.write(ff)
     st.divider()
     st.header("⚙️ Manage Index")

     st.session_state.engine = DocSearchEngine()
 with st.sidebar:
+    with st.sidebar:
+        st.header("🗄️ Upload Documents")
+        uploaded_files = st.file_uploader("Upload Files", accept_multiple_files=True)
+        if uploaded_files and st.button("Index"):
+            progress_bar = st.progress(0)
+            status_text = st.empty()
+            new_chunks = []
+            failed_files = []
+            total = len(uploaded_files)
+            for i, f in enumerate(uploaded_files):
+                status_text.text(f"Processing {i+1}/{total}: {f.name}...")
+                progress_bar.progress((i)/total)
+                # PARSE (With OCR Auto-Switch)
+                txt, fname, method = parse_file(f)
+                # --- DEBUGGING: CATCH ACTUAL ERRORS ---
+                if method.startswith("Error"):
+                    st.error(f"System Error on {fname}: {method}")
+                    failed_files.append(f"{fname}: {method}")
+                    continue
+                if method == "OCR (Slow)":
+                    st.toast(f"OCR Used for {fname}", icon="⚠️")
+                if not txt.strip():
+                    failed_files.append(f"{fname} (No text found)")
+                    continue
+                file_chunks = recursive_chunking(txt, fname)
+                new_chunks.extend(file_chunks)
+            progress_bar.progress(1.0)
+            if new_chunks:
+                with st.spinner("Saving database..."):
+                    st.session_state.engine.add_documents(new_chunks)
+                    IndexManager.save_to_hub()
+                st.success(f"Indexed {len(new_chunks)} chunks!")
+            if failed_files:
+                with st.expander("⚠️ Issues Detected", expanded=True):
+                    for ff in failed_files: st.write(ff)
     st.divider()
     st.header("⚙️ Manage Index")