Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -194,45 +194,52 @@ if 'engine' not in st.session_state:
|
|
| 194 |
st.session_state.engine = DocSearchEngine()
|
| 195 |
|
| 196 |
with st.sidebar:
|
| 197 |
-
st.
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
new_chunks = []
|
| 204 |
-
failed_files = []
|
| 205 |
-
|
| 206 |
-
total = len(uploaded_files)
|
| 207 |
-
|
| 208 |
-
for i, f in enumerate(uploaded_files):
|
| 209 |
-
status_text.text(f"Processing {i+1}/{total}: {f.name}...")
|
| 210 |
-
progress_bar.progress((i)/total)
|
| 211 |
|
| 212 |
-
|
| 213 |
-
|
| 214 |
|
| 215 |
-
|
| 216 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
|
|
|
|
|
|
| 221 |
|
| 222 |
-
|
| 223 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
st.
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
with st.expander("Failed Files"):
|
| 235 |
-
for ff in failed_files: st.write(ff)
|
| 236 |
|
| 237 |
st.divider()
|
| 238 |
st.header("⚙️ Manage Index")
|
|
|
|
| 194 |
st.session_state.engine = DocSearchEngine()
|
| 195 |
|
| 196 |
with st.sidebar:
|
| 197 |
+
with st.sidebar:
|
| 198 |
+
st.header("🗄️ Upload Documents")
|
| 199 |
+
uploaded_files = st.file_uploader("Upload Files", accept_multiple_files=True)
|
| 200 |
+
if uploaded_files and st.button("Index"):
|
| 201 |
+
progress_bar = st.progress(0)
|
| 202 |
+
status_text = st.empty()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
|
| 204 |
+
new_chunks = []
|
| 205 |
+
failed_files = []
|
| 206 |
|
| 207 |
+
total = len(uploaded_files)
|
| 208 |
+
|
| 209 |
+
for i, f in enumerate(uploaded_files):
|
| 210 |
+
status_text.text(f"Processing {i+1}/{total}: {f.name}...")
|
| 211 |
+
progress_bar.progress((i)/total)
|
| 212 |
+
|
| 213 |
+
# PARSE (With OCR Auto-Switch)
|
| 214 |
+
txt, fname, method = parse_file(f)
|
| 215 |
|
| 216 |
+
# --- DEBUGGING: CATCH ACTUAL ERRORS ---
|
| 217 |
+
if method.startswith("Error"):
|
| 218 |
+
st.error(f"System Error on {fname}: {method}")
|
| 219 |
+
failed_files.append(f"{fname}: {method}")
|
| 220 |
+
continue
|
| 221 |
|
| 222 |
+
if method == "OCR (Slow)":
|
| 223 |
+
st.toast(f"OCR Used for {fname}", icon="⚠️")
|
| 224 |
+
|
| 225 |
+
if not txt.strip():
|
| 226 |
+
failed_files.append(f"{fname} (No text found)")
|
| 227 |
+
continue
|
| 228 |
+
|
| 229 |
+
file_chunks = recursive_chunking(txt, fname)
|
| 230 |
+
new_chunks.extend(file_chunks)
|
| 231 |
+
|
| 232 |
+
progress_bar.progress(1.0)
|
| 233 |
|
| 234 |
+
if new_chunks:
|
| 235 |
+
with st.spinner("Saving database..."):
|
| 236 |
+
st.session_state.engine.add_documents(new_chunks)
|
| 237 |
+
IndexManager.save_to_hub()
|
| 238 |
+
st.success(f"Indexed {len(new_chunks)} chunks!")
|
| 239 |
+
|
| 240 |
+
if failed_files:
|
| 241 |
+
with st.expander("⚠️ Issues Detected", expanded=True):
|
| 242 |
+
for ff in failed_files: st.write(ff)
|
|
|
|
|
|
|
| 243 |
|
| 244 |
st.divider()
|
| 245 |
st.header("⚙️ Manage Index")
|