Jash Doshi commited on
Commit
b0e5f42
·
1 Parent(s): e94affd

Fix delete all persistence - use bulk delete for ChromaDB metadata and documents

Browse files
Files changed (2) hide show
  1. app.py +25 -29
  2. rag_core.py +79 -0
app.py CHANGED
@@ -856,50 +856,46 @@ def delete_all_endpoint(mode):
856
  deleted_count = 0
857
 
858
  try:
859
- # Step 1: Get all items to delete from JSON
860
  user_data = _load_user_data(user_api_key, mode)
861
-
862
- # Step 2: Also get items from ChromaDB (in case they're not in JSON)
863
  chroma_data = rag_core.load_all_metadata_from_chroma(user_api_key, mode)
864
 
865
- # Combine all item IDs
866
- all_item_ids = set()
867
- for item in user_data:
868
- if item.get('id'):
869
- all_item_ids.add(item.get('id'))
870
- if chroma_data:
871
- for item in chroma_data:
872
- if item.get('id'):
873
- all_item_ids.add(item.get('id'))
874
 
875
- deleted_count = len(all_item_ids)
876
 
877
- # Step 3: Clear JSON file
878
  _save_user_data(user_api_key, mode, [])
 
879
 
880
- # Step 4: Delete from ChromaDB (RAG knowledge base)
881
- for item_id in all_item_ids:
882
- try:
883
- if mode == 'cards':
884
- rag_core.remove_document_from_knowledge_base(user_api_key, item_id, mode)
885
- rag_core.delete_metadata_from_chroma(user_api_key, mode, item_id)
886
- elif mode == 'brochures':
887
- rag_core.remove_document_from_knowledge_base(user_api_key, f"{item_id}_contacts", mode)
888
- rag_core.remove_document_from_knowledge_base(user_api_key, f"{item_id}_info", mode)
889
- rag_core.delete_metadata_from_chroma(user_api_key, mode, item_id)
890
- except Exception as e:
891
- print(f"ChromaDB deletion warning for {item_id}: {e}")
892
 
893
  # Step 5: Delete from SQL Database
894
  user_hash = hashlib.sha256(user_api_key.encode()).hexdigest()
895
  if mode == 'cards':
896
- BusinessCard.query.filter_by(user_hash=user_hash).delete()
 
897
  elif mode == 'brochures':
898
  # Delete all brochures and their contacts (cascade)
899
- Brochure.query.filter_by(user_hash=user_hash).delete()
 
900
 
901
  db.session.commit()
902
- print(f"Successfully deleted all {deleted_count} {mode} from database")
903
 
904
  return jsonify({
905
  'success': True,
 
856
  deleted_count = 0
857
 
858
  try:
859
+ # Step 1: Count items before deletion (from both sources)
860
  user_data = _load_user_data(user_api_key, mode)
 
 
861
  chroma_data = rag_core.load_all_metadata_from_chroma(user_api_key, mode)
862
 
863
+ # Get count from whichever source has more
864
+ deleted_count = max(len(user_data), len(chroma_data) if chroma_data else 0)
865
+
866
+ if deleted_count == 0:
867
+ return jsonify({
868
+ 'success': True,
869
+ 'deleted_count': 0,
870
+ 'message': f'No {mode} to delete'
871
+ })
872
 
873
+ print(f"Starting deletion of {deleted_count} {mode}...")
874
 
875
+ # Step 2: Clear JSON file
876
  _save_user_data(user_api_key, mode, [])
877
+ print(f"Cleared JSON file for {mode}")
878
 
879
+ # Step 3: Delete ALL metadata from ChromaDB (bulk delete)
880
+ metadata_deleted = rag_core.delete_all_metadata_from_chroma(user_api_key, mode)
881
+ print(f"Deleted {metadata_deleted} metadata records from ChromaDB")
882
+
883
+ # Step 4: Delete ALL document chunks from ChromaDB (bulk delete)
884
+ docs_deleted = rag_core.delete_all_documents_from_chroma(user_api_key, mode)
885
+ print(f"Deleted {docs_deleted} document chunks from ChromaDB")
 
 
 
 
 
886
 
887
  # Step 5: Delete from SQL Database
888
  user_hash = hashlib.sha256(user_api_key.encode()).hexdigest()
889
  if mode == 'cards':
890
+ db_deleted = BusinessCard.query.filter_by(user_hash=user_hash).delete()
891
+ print(f"Deleted {db_deleted} business cards from SQL database")
892
  elif mode == 'brochures':
893
  # Delete all brochures and their contacts (cascade)
894
+ db_deleted = Brochure.query.filter_by(user_hash=user_hash).delete()
895
+ print(f"Deleted {db_deleted} brochures from SQL database")
896
 
897
  db.session.commit()
898
+ print(f"Successfully deleted all {deleted_count} {mode} from all storage layers")
899
 
900
  return jsonify({
901
  'success': True,
rag_core.py CHANGED
@@ -813,6 +813,85 @@ def delete_metadata_from_chroma(user_api_key, mode, document_id):
813
  return False
814
 
815
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
816
  def update_metadata_in_chroma(user_api_key, mode, document_id, field, value, contact_id=None):
817
  """
818
  Update a specific field in the metadata stored in ChromaDB.
 
813
  return False
814
 
815
 
816
+ def delete_all_metadata_from_chroma(user_api_key, mode):
817
+ """
818
+ Delete ALL metadata documents from ChromaDB for a user/mode.
819
+ This is used for the 'delete all' feature.
820
+ Returns the count of deleted items.
821
+ """
822
+ if not _rag_system_available:
823
+ print("RAG: System not available, cannot delete metadata from ChromaDB")
824
+ return 0
825
+
826
+ try:
827
+ collection = _get_or_create_collection(user_api_key, mode)
828
+
829
+ # Get all metadata document IDs
830
+ results = collection.get(
831
+ where={"type": "metadata"},
832
+ include=["metadatas"]
833
+ )
834
+
835
+ if not results or not results['ids']:
836
+ print(f"RAG: No metadata to delete for {mode}")
837
+ return 0
838
+
839
+ deleted_count = len(results['ids'])
840
+
841
+ # Delete all metadata documents
842
+ collection.delete(ids=results['ids'])
843
+ print(f"RAG: Deleted {deleted_count} metadata records from ChromaDB for {mode}")
844
+
845
+ return deleted_count
846
+ except Exception as e:
847
+ print(f"RAG: Error deleting all metadata from ChromaDB: {e}")
848
+ import traceback
849
+ traceback.print_exc()
850
+ return 0
851
+
852
+
853
+ def delete_all_documents_from_chroma(user_api_key, mode):
854
+ """
855
+ Delete ALL document chunks from ChromaDB for a user/mode.
856
+ This removes RAG knowledge base entries.
857
+ Returns the count of deleted chunks.
858
+ """
859
+ if not _rag_system_available:
860
+ print("RAG: System not available, cannot delete documents from ChromaDB")
861
+ return 0
862
+
863
+ try:
864
+ collection = _get_or_create_collection(user_api_key, mode)
865
+
866
+ # Get all document chunk IDs
867
+ results = collection.get(
868
+ where={"type": "document_chunk"},
869
+ include=["metadatas"]
870
+ )
871
+
872
+ if not results or not results['ids']:
873
+ print(f"RAG: No document chunks to delete for {mode}")
874
+ return 0
875
+
876
+ deleted_count = len(results['ids'])
877
+
878
+ # Delete all document chunks
879
+ collection.delete(ids=results['ids'])
880
+ print(f"RAG: Deleted {deleted_count} document chunks from ChromaDB for {mode}")
881
+
882
+ # Clear keyword index
883
+ if mode in keyword_indexes and user_api_key in keyword_indexes[mode]:
884
+ keyword_indexes[mode][user_api_key] = {"documents": {}, "vocabulary": {}, "entities": {}}
885
+ _save_keyword_index(user_api_key, mode)
886
+
887
+ return deleted_count
888
+ except Exception as e:
889
+ print(f"RAG: Error deleting all documents from ChromaDB: {e}")
890
+ import traceback
891
+ traceback.print_exc()
892
+ return 0
893
+
894
+
895
  def update_metadata_in_chroma(user_api_key, mode, document_id, field, value, contact_id=None):
896
  """
897
  Update a specific field in the metadata stored in ChromaDB.