Spaces:
Sleeping
Sleeping
Commit ·
addfa4f
1
Parent(s): b014e33
Fix document deletion: use point IDs instead of filter-based delete, add dropdown selector
Browse files- app/core/vectorstore.py +29 -5
- app/ui/gradio_app.py +74 -33
app/core/vectorstore.py
CHANGED
|
@@ -8,6 +8,7 @@ from qdrant_client.http.models import (
|
|
| 8 |
MatchAny,
|
| 9 |
MatchValue,
|
| 10 |
PayloadSchemaType,
|
|
|
|
| 11 |
PointStruct,
|
| 12 |
Range,
|
| 13 |
VectorParams,
|
|
@@ -127,14 +128,37 @@ class VectorStoreService:
|
|
| 127 |
]
|
| 128 |
|
| 129 |
def delete_document(self, document_id: str) -> int:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
self.client.delete(
|
| 131 |
collection_name=self.collection_name,
|
| 132 |
-
points_selector=
|
| 133 |
-
must=[FieldCondition(key="document_id", match=MatchValue(value=document_id))]
|
| 134 |
-
),
|
| 135 |
)
|
| 136 |
-
logger.info(f"Deleted
|
| 137 |
-
return
|
| 138 |
|
| 139 |
def scroll_all(self, batch_size: int = 100) -> list[dict]:
|
| 140 |
all_points = []
|
|
|
|
| 8 |
MatchAny,
|
| 9 |
MatchValue,
|
| 10 |
PayloadSchemaType,
|
| 11 |
+
PointIdsList,
|
| 12 |
PointStruct,
|
| 13 |
Range,
|
| 14 |
VectorParams,
|
|
|
|
| 128 |
]
|
| 129 |
|
| 130 |
def delete_document(self, document_id: str) -> int:
|
| 131 |
+
# First, find all point IDs belonging to this document
|
| 132 |
+
doc_filter = Filter(
|
| 133 |
+
must=[FieldCondition(key="document_id", match=MatchValue(value=document_id))]
|
| 134 |
+
)
|
| 135 |
+
point_ids = []
|
| 136 |
+
offset = None
|
| 137 |
+
while True:
|
| 138 |
+
results, next_offset = self.client.scroll(
|
| 139 |
+
collection_name=self.collection_name,
|
| 140 |
+
scroll_filter=doc_filter,
|
| 141 |
+
limit=100,
|
| 142 |
+
offset=offset,
|
| 143 |
+
with_payload=False,
|
| 144 |
+
with_vectors=False,
|
| 145 |
+
)
|
| 146 |
+
point_ids.extend([r.id for r in results])
|
| 147 |
+
if next_offset is None:
|
| 148 |
+
break
|
| 149 |
+
offset = next_offset
|
| 150 |
+
|
| 151 |
+
if not point_ids:
|
| 152 |
+
logger.warning(f"No points found for document '{document_id}'")
|
| 153 |
+
return 0
|
| 154 |
+
|
| 155 |
+
# Delete by point IDs (requires only write permission, not manage)
|
| 156 |
self.client.delete(
|
| 157 |
collection_name=self.collection_name,
|
| 158 |
+
points_selector=PointIdsList(points=point_ids),
|
|
|
|
|
|
|
| 159 |
)
|
| 160 |
+
logger.info(f"Deleted {len(point_ids)} points for document '{document_id}'")
|
| 161 |
+
return len(point_ids)
|
| 162 |
|
| 163 |
def scroll_all(self, batch_size: int = 100) -> list[dict]:
|
| 164 |
all_points = []
|
app/ui/gradio_app.py
CHANGED
|
@@ -123,26 +123,42 @@ def upload_document(file):
|
|
| 123 |
return f"**Upload failed:** {e}"
|
| 124 |
|
| 125 |
|
| 126 |
-
def
|
|
|
|
| 127 |
try:
|
| 128 |
response = httpx.get(f"{API_BASE}/api/documents", timeout=30)
|
| 129 |
if response.status_code == 200:
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
|
| 147 |
|
| 148 |
def get_doc_count():
|
|
@@ -159,14 +175,21 @@ def get_doc_count():
|
|
| 159 |
return "Connecting..."
|
| 160 |
|
| 161 |
|
| 162 |
-
def delete_document(
|
| 163 |
-
if not
|
| 164 |
-
return "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
try:
|
| 166 |
-
response = httpx.delete(f"{API_BASE}/api/documents/{doc_id
|
| 167 |
if response.status_code == 200:
|
| 168 |
-
|
| 169 |
-
|
|
|
|
|
|
|
| 170 |
except Exception as e:
|
| 171 |
return f"**Delete failed:** {e}"
|
| 172 |
|
|
@@ -375,11 +398,6 @@ def create_gradio_app() -> gr.Blocks:
|
|
| 375 |
)
|
| 376 |
|
| 377 |
upload_status = gr.Markdown()
|
| 378 |
-
upload_btn.click(
|
| 379 |
-
fn=upload_document,
|
| 380 |
-
inputs=file_upload,
|
| 381 |
-
outputs=upload_status,
|
| 382 |
-
)
|
| 383 |
|
| 384 |
gr.Markdown("---")
|
| 385 |
gr.Markdown("#### Indexed Documents")
|
|
@@ -392,14 +410,13 @@ def create_gradio_app() -> gr.Blocks:
|
|
| 392 |
elem_classes=["doc-table"],
|
| 393 |
)
|
| 394 |
refresh_btn = gr.Button("Refresh", size="sm")
|
| 395 |
-
refresh_btn.click(fn=list_documents, outputs=doc_table)
|
| 396 |
|
| 397 |
gr.Markdown("---")
|
| 398 |
gr.Markdown("#### Delete a Document")
|
| 399 |
with gr.Row():
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
scale=3,
|
| 404 |
)
|
| 405 |
delete_btn = gr.Button(
|
|
@@ -409,10 +426,34 @@ def create_gradio_app() -> gr.Blocks:
|
|
| 409 |
elem_classes=["danger-btn"],
|
| 410 |
)
|
| 411 |
delete_status = gr.Markdown()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 412 |
delete_btn.click(
|
| 413 |
fn=delete_document,
|
| 414 |
-
inputs=
|
| 415 |
outputs=delete_status,
|
|
|
|
|
|
|
|
|
|
| 416 |
)
|
| 417 |
|
| 418 |
# Footer
|
|
|
|
| 123 |
return f"**Upload failed:** {e}"
|
| 124 |
|
| 125 |
|
| 126 |
+
def _fetch_documents():
|
| 127 |
+
"""Fetch documents from API. Returns list of doc dicts or empty list."""
|
| 128 |
try:
|
| 129 |
response = httpx.get(f"{API_BASE}/api/documents", timeout=30)
|
| 130 |
if response.status_code == 200:
|
| 131 |
+
return response.json().get("documents", [])
|
| 132 |
+
except Exception:
|
| 133 |
+
pass
|
| 134 |
+
return []
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
def list_documents():
|
| 138 |
+
docs = _fetch_documents()
|
| 139 |
+
if not docs:
|
| 140 |
+
return [["—", "—", "—", "—"]]
|
| 141 |
+
return [
|
| 142 |
+
[
|
| 143 |
+
d.get("source", ""),
|
| 144 |
+
d.get("doc_type", "").upper(),
|
| 145 |
+
str(d.get("num_chunks", 0)),
|
| 146 |
+
d.get("document_id", ""),
|
| 147 |
+
]
|
| 148 |
+
for d in docs
|
| 149 |
+
]
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
def get_delete_choices():
|
| 153 |
+
"""Build dropdown choices: 'filename (doc_id)' for each indexed document."""
|
| 154 |
+
docs = _fetch_documents()
|
| 155 |
+
if not docs:
|
| 156 |
+
return gr.update(choices=[], value=None)
|
| 157 |
+
choices = [
|
| 158 |
+
f"{d.get('source', 'unknown')} [{d.get('document_id', '')}]"
|
| 159 |
+
for d in docs
|
| 160 |
+
]
|
| 161 |
+
return gr.update(choices=choices, value=None)
|
| 162 |
|
| 163 |
|
| 164 |
def get_doc_count():
|
|
|
|
| 175 |
return "Connecting..."
|
| 176 |
|
| 177 |
|
| 178 |
+
def delete_document(selection):
|
| 179 |
+
if not selection:
|
| 180 |
+
return "Select a document to delete."
|
| 181 |
+
# Extract document ID from "filename [doc_id]" format
|
| 182 |
+
if "[" in selection and selection.endswith("]"):
|
| 183 |
+
doc_id = selection.rsplit("[", 1)[1][:-1]
|
| 184 |
+
else:
|
| 185 |
+
doc_id = selection.strip()
|
| 186 |
try:
|
| 187 |
+
response = httpx.delete(f"{API_BASE}/api/documents/{doc_id}", timeout=30)
|
| 188 |
if response.status_code == 200:
|
| 189 |
+
filename = selection.split(" [")[0] if " [" in selection else doc_id[:12]
|
| 190 |
+
return f"**'{filename}'** deleted successfully."
|
| 191 |
+
detail = response.json().get("detail", response.text)
|
| 192 |
+
return f"**Error:** {detail}"
|
| 193 |
except Exception as e:
|
| 194 |
return f"**Delete failed:** {e}"
|
| 195 |
|
|
|
|
| 398 |
)
|
| 399 |
|
| 400 |
upload_status = gr.Markdown()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 401 |
|
| 402 |
gr.Markdown("---")
|
| 403 |
gr.Markdown("#### Indexed Documents")
|
|
|
|
| 410 |
elem_classes=["doc-table"],
|
| 411 |
)
|
| 412 |
refresh_btn = gr.Button("Refresh", size="sm")
|
|
|
|
| 413 |
|
| 414 |
gr.Markdown("---")
|
| 415 |
gr.Markdown("#### Delete a Document")
|
| 416 |
with gr.Row():
|
| 417 |
+
delete_dropdown = gr.Dropdown(
|
| 418 |
+
choices=[],
|
| 419 |
+
label="Select document to delete",
|
| 420 |
scale=3,
|
| 421 |
)
|
| 422 |
delete_btn = gr.Button(
|
|
|
|
| 426 |
elem_classes=["danger-btn"],
|
| 427 |
)
|
| 428 |
delete_status = gr.Markdown()
|
| 429 |
+
|
| 430 |
+
# Refresh: updates table, dropdown, and stats
|
| 431 |
+
def refresh_all():
|
| 432 |
+
return list_documents(), get_delete_choices(), get_doc_count()
|
| 433 |
+
|
| 434 |
+
refresh_btn.click(
|
| 435 |
+
fn=refresh_all,
|
| 436 |
+
outputs=[doc_table, delete_dropdown, stats_display],
|
| 437 |
+
)
|
| 438 |
+
|
| 439 |
+
# After upload: refresh table, dropdown, and stats
|
| 440 |
+
upload_btn.click(
|
| 441 |
+
fn=upload_document,
|
| 442 |
+
inputs=file_upload,
|
| 443 |
+
outputs=upload_status,
|
| 444 |
+
).then(
|
| 445 |
+
fn=refresh_all,
|
| 446 |
+
outputs=[doc_table, delete_dropdown, stats_display],
|
| 447 |
+
)
|
| 448 |
+
|
| 449 |
+
# Delete: delete then refresh everything
|
| 450 |
delete_btn.click(
|
| 451 |
fn=delete_document,
|
| 452 |
+
inputs=delete_dropdown,
|
| 453 |
outputs=delete_status,
|
| 454 |
+
).then(
|
| 455 |
+
fn=refresh_all,
|
| 456 |
+
outputs=[doc_table, delete_dropdown, stats_display],
|
| 457 |
)
|
| 458 |
|
| 459 |
# Footer
|