NinjainPJs Claude Opus 4.6 (1M context) commited on
Commit
addfa4f
·
1 Parent(s): b014e33

Fix document deletion: use point IDs instead of filter-based delete, add dropdown selector

Browse files
Files changed (2) hide show
  1. app/core/vectorstore.py +29 -5
  2. app/ui/gradio_app.py +74 -33
app/core/vectorstore.py CHANGED
@@ -8,6 +8,7 @@ from qdrant_client.http.models import (
8
  MatchAny,
9
  MatchValue,
10
  PayloadSchemaType,
 
11
  PointStruct,
12
  Range,
13
  VectorParams,
@@ -127,14 +128,37 @@ class VectorStoreService:
127
  ]
128
 
129
  def delete_document(self, document_id: str) -> int:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  self.client.delete(
131
  collection_name=self.collection_name,
132
- points_selector=Filter(
133
- must=[FieldCondition(key="document_id", match=MatchValue(value=document_id))]
134
- ),
135
  )
136
- logger.info(f"Deleted document '{document_id}' from '{self.collection_name}'")
137
- return 0
138
 
139
  def scroll_all(self, batch_size: int = 100) -> list[dict]:
140
  all_points = []
 
8
  MatchAny,
9
  MatchValue,
10
  PayloadSchemaType,
11
+ PointIdsList,
12
  PointStruct,
13
  Range,
14
  VectorParams,
 
128
  ]
129
 
130
  def delete_document(self, document_id: str) -> int:
131
+ # First, find all point IDs belonging to this document
132
+ doc_filter = Filter(
133
+ must=[FieldCondition(key="document_id", match=MatchValue(value=document_id))]
134
+ )
135
+ point_ids = []
136
+ offset = None
137
+ while True:
138
+ results, next_offset = self.client.scroll(
139
+ collection_name=self.collection_name,
140
+ scroll_filter=doc_filter,
141
+ limit=100,
142
+ offset=offset,
143
+ with_payload=False,
144
+ with_vectors=False,
145
+ )
146
+ point_ids.extend([r.id for r in results])
147
+ if next_offset is None:
148
+ break
149
+ offset = next_offset
150
+
151
+ if not point_ids:
152
+ logger.warning(f"No points found for document '{document_id}'")
153
+ return 0
154
+
155
+ # Delete by point IDs (requires only write permission, not manage)
156
  self.client.delete(
157
  collection_name=self.collection_name,
158
+ points_selector=PointIdsList(points=point_ids),
 
 
159
  )
160
+ logger.info(f"Deleted {len(point_ids)} points for document '{document_id}'")
161
+ return len(point_ids)
162
 
163
  def scroll_all(self, batch_size: int = 100) -> list[dict]:
164
  all_points = []
app/ui/gradio_app.py CHANGED
@@ -123,26 +123,42 @@ def upload_document(file):
123
  return f"**Upload failed:** {e}"
124
 
125
 
126
- def list_documents():
 
127
  try:
128
  response = httpx.get(f"{API_BASE}/api/documents", timeout=30)
129
  if response.status_code == 200:
130
- data = response.json()
131
- docs = data.get("documents", [])
132
- if not docs:
133
- return [["—", "—", "—", "—"]]
134
- return [
135
- [
136
- d.get("source", ""),
137
- d.get("doc_type", "").upper(),
138
- str(d.get("num_chunks", 0)),
139
- d.get("document_id", "")[:12] + "...",
140
- ]
141
- for d in docs
142
- ]
143
- return [["Error loading", "", "", ""]]
144
- except Exception as e:
145
- return [[f"Error: {e}", "", "", ""]]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
 
148
  def get_doc_count():
@@ -159,14 +175,21 @@ def get_doc_count():
159
  return "Connecting..."
160
 
161
 
162
- def delete_document(doc_id):
163
- if not doc_id or not doc_id.strip():
164
- return "Enter a document ID to delete."
 
 
 
 
 
165
  try:
166
- response = httpx.delete(f"{API_BASE}/api/documents/{doc_id.strip()}", timeout=30)
167
  if response.status_code == 200:
168
- return f"Document `{doc_id.strip()[:12]}...` deleted successfully."
169
- return f"**Error:** {response.text}"
 
 
170
  except Exception as e:
171
  return f"**Delete failed:** {e}"
172
 
@@ -375,11 +398,6 @@ def create_gradio_app() -> gr.Blocks:
375
  )
376
 
377
  upload_status = gr.Markdown()
378
- upload_btn.click(
379
- fn=upload_document,
380
- inputs=file_upload,
381
- outputs=upload_status,
382
- )
383
 
384
  gr.Markdown("---")
385
  gr.Markdown("#### Indexed Documents")
@@ -392,14 +410,13 @@ def create_gradio_app() -> gr.Blocks:
392
  elem_classes=["doc-table"],
393
  )
394
  refresh_btn = gr.Button("Refresh", size="sm")
395
- refresh_btn.click(fn=list_documents, outputs=doc_table)
396
 
397
  gr.Markdown("---")
398
  gr.Markdown("#### Delete a Document")
399
  with gr.Row():
400
- delete_id_input = gr.Textbox(
401
- placeholder="Paste full document ID here...",
402
- show_label=False,
403
  scale=3,
404
  )
405
  delete_btn = gr.Button(
@@ -409,10 +426,34 @@ def create_gradio_app() -> gr.Blocks:
409
  elem_classes=["danger-btn"],
410
  )
411
  delete_status = gr.Markdown()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
412
  delete_btn.click(
413
  fn=delete_document,
414
- inputs=delete_id_input,
415
  outputs=delete_status,
 
 
 
416
  )
417
 
418
  # Footer
 
123
  return f"**Upload failed:** {e}"
124
 
125
 
126
+ def _fetch_documents():
127
+ """Fetch documents from API. Returns list of doc dicts or empty list."""
128
  try:
129
  response = httpx.get(f"{API_BASE}/api/documents", timeout=30)
130
  if response.status_code == 200:
131
+ return response.json().get("documents", [])
132
+ except Exception:
133
+ pass
134
+ return []
135
+
136
+
137
+ def list_documents():
138
+ docs = _fetch_documents()
139
+ if not docs:
140
+ return [["", "", "", "—"]]
141
+ return [
142
+ [
143
+ d.get("source", ""),
144
+ d.get("doc_type", "").upper(),
145
+ str(d.get("num_chunks", 0)),
146
+ d.get("document_id", ""),
147
+ ]
148
+ for d in docs
149
+ ]
150
+
151
+
152
+ def get_delete_choices():
153
+ """Build dropdown choices: 'filename (doc_id)' for each indexed document."""
154
+ docs = _fetch_documents()
155
+ if not docs:
156
+ return gr.update(choices=[], value=None)
157
+ choices = [
158
+ f"{d.get('source', 'unknown')} [{d.get('document_id', '')}]"
159
+ for d in docs
160
+ ]
161
+ return gr.update(choices=choices, value=None)
162
 
163
 
164
  def get_doc_count():
 
175
  return "Connecting..."
176
 
177
 
178
+ def delete_document(selection):
179
+ if not selection:
180
+ return "Select a document to delete."
181
+ # Extract document ID from "filename [doc_id]" format
182
+ if "[" in selection and selection.endswith("]"):
183
+ doc_id = selection.rsplit("[", 1)[1][:-1]
184
+ else:
185
+ doc_id = selection.strip()
186
  try:
187
+ response = httpx.delete(f"{API_BASE}/api/documents/{doc_id}", timeout=30)
188
  if response.status_code == 200:
189
+ filename = selection.split(" [")[0] if " [" in selection else doc_id[:12]
190
+ return f"**'{filename}'** deleted successfully."
191
+ detail = response.json().get("detail", response.text)
192
+ return f"**Error:** {detail}"
193
  except Exception as e:
194
  return f"**Delete failed:** {e}"
195
 
 
398
  )
399
 
400
  upload_status = gr.Markdown()
 
 
 
 
 
401
 
402
  gr.Markdown("---")
403
  gr.Markdown("#### Indexed Documents")
 
410
  elem_classes=["doc-table"],
411
  )
412
  refresh_btn = gr.Button("Refresh", size="sm")
 
413
 
414
  gr.Markdown("---")
415
  gr.Markdown("#### Delete a Document")
416
  with gr.Row():
417
+ delete_dropdown = gr.Dropdown(
418
+ choices=[],
419
+ label="Select document to delete",
420
  scale=3,
421
  )
422
  delete_btn = gr.Button(
 
426
  elem_classes=["danger-btn"],
427
  )
428
  delete_status = gr.Markdown()
429
+
430
+ # Refresh: updates table, dropdown, and stats
431
+ def refresh_all():
432
+ return list_documents(), get_delete_choices(), get_doc_count()
433
+
434
+ refresh_btn.click(
435
+ fn=refresh_all,
436
+ outputs=[doc_table, delete_dropdown, stats_display],
437
+ )
438
+
439
+ # After upload: refresh table, dropdown, and stats
440
+ upload_btn.click(
441
+ fn=upload_document,
442
+ inputs=file_upload,
443
+ outputs=upload_status,
444
+ ).then(
445
+ fn=refresh_all,
446
+ outputs=[doc_table, delete_dropdown, stats_display],
447
+ )
448
+
449
+ # Delete: delete then refresh everything
450
  delete_btn.click(
451
  fn=delete_document,
452
+ inputs=delete_dropdown,
453
  outputs=delete_status,
454
+ ).then(
455
+ fn=refresh_all,
456
+ outputs=[doc_table, delete_dropdown, stats_display],
457
  )
458
 
459
  # Footer