NavyDevilDoc commited on
Commit
e546bbb
·
verified ·
1 Parent(s): e1daca2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -34
app.py CHANGED
@@ -194,45 +194,52 @@ if 'engine' not in st.session_state:
194
  st.session_state.engine = DocSearchEngine()
195
 
196
  with st.sidebar:
197
- st.header("🗄️ Upload Documents")
198
- uploaded_files = st.file_uploader("Upload Files", accept_multiple_files=True)
199
- if uploaded_files and st.button("Index"):
200
- progress_bar = st.progress(0)
201
- status_text = st.empty()
202
-
203
- new_chunks = []
204
- failed_files = []
205
-
206
- total = len(uploaded_files)
207
-
208
- for i, f in enumerate(uploaded_files):
209
- status_text.text(f"Processing {i+1}/{total}: {f.name}...")
210
- progress_bar.progress((i)/total)
211
 
212
- # PARSE (With OCR Auto-Switch)
213
- txt, fname, method = parse_file(f)
214
 
215
- if method == "OCR (Slow)":
216
- st.toast(f"OCR Used for {fname}", icon="⚠️")
 
 
 
 
 
 
217
 
218
- if not txt.strip():
219
- failed_files.append(f"{fname} (Empty/Unreadable)")
220
- continue
 
 
221
 
222
- file_chunks = recursive_chunking(txt, fname)
223
- new_chunks.extend(file_chunks)
 
 
 
 
 
 
 
 
 
224
 
225
- progress_bar.progress(1.0)
226
-
227
- if new_chunks:
228
- with st.spinner("Saving database..."):
229
- st.session_state.engine.add_documents(new_chunks)
230
- IndexManager.save_to_hub()
231
- st.success(f"Indexed {len(new_chunks)} chunks!")
232
-
233
- if failed_files:
234
- with st.expander("Failed Files"):
235
- for ff in failed_files: st.write(ff)
236
 
237
  st.divider()
238
  st.header("⚙️ Manage Index")
 
194
  st.session_state.engine = DocSearchEngine()
195
 
196
  with st.sidebar:
197
+ with st.sidebar:
198
+ st.header("🗄️ Upload Documents")
199
+ uploaded_files = st.file_uploader("Upload Files", accept_multiple_files=True)
200
+ if uploaded_files and st.button("Index"):
201
+ progress_bar = st.progress(0)
202
+ status_text = st.empty()
 
 
 
 
 
 
 
 
203
 
204
+ new_chunks = []
205
+ failed_files = []
206
 
207
+ total = len(uploaded_files)
208
+
209
+ for i, f in enumerate(uploaded_files):
210
+ status_text.text(f"Processing {i+1}/{total}: {f.name}...")
211
+ progress_bar.progress((i)/total)
212
+
213
+ # PARSE (With OCR Auto-Switch)
214
+ txt, fname, method = parse_file(f)
215
 
216
+ # --- DEBUGGING: CATCH ACTUAL ERRORS ---
217
+ if method.startswith("Error"):
218
+ st.error(f"System Error on {fname}: {method}")
219
+ failed_files.append(f"{fname}: {method}")
220
+ continue
221
 
222
+ if method == "OCR (Slow)":
223
+ st.toast(f"OCR Used for {fname}", icon="⚠️")
224
+
225
+ if not txt.strip():
226
+ failed_files.append(f"{fname} (No text found)")
227
+ continue
228
+
229
+ file_chunks = recursive_chunking(txt, fname)
230
+ new_chunks.extend(file_chunks)
231
+
232
+ progress_bar.progress(1.0)
233
 
234
+ if new_chunks:
235
+ with st.spinner("Saving database..."):
236
+ st.session_state.engine.add_documents(new_chunks)
237
+ IndexManager.save_to_hub()
238
+ st.success(f"Indexed {len(new_chunks)} chunks!")
239
+
240
+ if failed_files:
241
+ with st.expander("⚠️ Issues Detected", expanded=True):
242
+ for ff in failed_files: st.write(ff)
 
 
243
 
244
  st.divider()
245
  st.header("⚙️ Manage Index")