Spaces:
Running
Running
Jash Doshi commited on
Commit ·
b0e5f42
1
Parent(s): e94affd
Fix delete all persistence - use bulk delete for ChromaDB metadata and documents
Browse files- app.py +25 -29
- rag_core.py +79 -0
app.py
CHANGED
|
@@ -856,50 +856,46 @@ def delete_all_endpoint(mode):
|
|
| 856 |
deleted_count = 0
|
| 857 |
|
| 858 |
try:
|
| 859 |
-
# Step 1:
|
| 860 |
user_data = _load_user_data(user_api_key, mode)
|
| 861 |
-
|
| 862 |
-
# Step 2: Also get items from ChromaDB (in case they're not in JSON)
|
| 863 |
chroma_data = rag_core.load_all_metadata_from_chroma(user_api_key, mode)
|
| 864 |
|
| 865 |
-
#
|
| 866 |
-
|
| 867 |
-
|
| 868 |
-
|
| 869 |
-
|
| 870 |
-
|
| 871 |
-
|
| 872 |
-
|
| 873 |
-
|
| 874 |
|
| 875 |
-
|
| 876 |
|
| 877 |
-
# Step
|
| 878 |
_save_user_data(user_api_key, mode, [])
|
|
|
|
| 879 |
|
| 880 |
-
# Step
|
| 881 |
-
|
| 882 |
-
|
| 883 |
-
|
| 884 |
-
|
| 885 |
-
|
| 886 |
-
|
| 887 |
-
rag_core.remove_document_from_knowledge_base(user_api_key, f"{item_id}_contacts", mode)
|
| 888 |
-
rag_core.remove_document_from_knowledge_base(user_api_key, f"{item_id}_info", mode)
|
| 889 |
-
rag_core.delete_metadata_from_chroma(user_api_key, mode, item_id)
|
| 890 |
-
except Exception as e:
|
| 891 |
-
print(f"ChromaDB deletion warning for {item_id}: {e}")
|
| 892 |
|
| 893 |
# Step 5: Delete from SQL Database
|
| 894 |
user_hash = hashlib.sha256(user_api_key.encode()).hexdigest()
|
| 895 |
if mode == 'cards':
|
| 896 |
-
BusinessCard.query.filter_by(user_hash=user_hash).delete()
|
|
|
|
| 897 |
elif mode == 'brochures':
|
| 898 |
# Delete all brochures and their contacts (cascade)
|
| 899 |
-
Brochure.query.filter_by(user_hash=user_hash).delete()
|
|
|
|
| 900 |
|
| 901 |
db.session.commit()
|
| 902 |
-
print(f"Successfully deleted all {deleted_count} {mode} from
|
| 903 |
|
| 904 |
return jsonify({
|
| 905 |
'success': True,
|
|
|
|
| 856 |
deleted_count = 0
|
| 857 |
|
| 858 |
try:
|
| 859 |
+
# Step 1: Count items before deletion (from both sources)
|
| 860 |
user_data = _load_user_data(user_api_key, mode)
|
|
|
|
|
|
|
| 861 |
chroma_data = rag_core.load_all_metadata_from_chroma(user_api_key, mode)
|
| 862 |
|
| 863 |
+
# Get count from whichever source has more
|
| 864 |
+
deleted_count = max(len(user_data), len(chroma_data) if chroma_data else 0)
|
| 865 |
+
|
| 866 |
+
if deleted_count == 0:
|
| 867 |
+
return jsonify({
|
| 868 |
+
'success': True,
|
| 869 |
+
'deleted_count': 0,
|
| 870 |
+
'message': f'No {mode} to delete'
|
| 871 |
+
})
|
| 872 |
|
| 873 |
+
print(f"Starting deletion of {deleted_count} {mode}...")
|
| 874 |
|
| 875 |
+
# Step 2: Clear JSON file
|
| 876 |
_save_user_data(user_api_key, mode, [])
|
| 877 |
+
print(f"Cleared JSON file for {mode}")
|
| 878 |
|
| 879 |
+
# Step 3: Delete ALL metadata from ChromaDB (bulk delete)
|
| 880 |
+
metadata_deleted = rag_core.delete_all_metadata_from_chroma(user_api_key, mode)
|
| 881 |
+
print(f"Deleted {metadata_deleted} metadata records from ChromaDB")
|
| 882 |
+
|
| 883 |
+
# Step 4: Delete ALL document chunks from ChromaDB (bulk delete)
|
| 884 |
+
docs_deleted = rag_core.delete_all_documents_from_chroma(user_api_key, mode)
|
| 885 |
+
print(f"Deleted {docs_deleted} document chunks from ChromaDB")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 886 |
|
| 887 |
# Step 5: Delete from SQL Database
|
| 888 |
user_hash = hashlib.sha256(user_api_key.encode()).hexdigest()
|
| 889 |
if mode == 'cards':
|
| 890 |
+
db_deleted = BusinessCard.query.filter_by(user_hash=user_hash).delete()
|
| 891 |
+
print(f"Deleted {db_deleted} business cards from SQL database")
|
| 892 |
elif mode == 'brochures':
|
| 893 |
# Delete all brochures and their contacts (cascade)
|
| 894 |
+
db_deleted = Brochure.query.filter_by(user_hash=user_hash).delete()
|
| 895 |
+
print(f"Deleted {db_deleted} brochures from SQL database")
|
| 896 |
|
| 897 |
db.session.commit()
|
| 898 |
+
print(f"Successfully deleted all {deleted_count} {mode} from all storage layers")
|
| 899 |
|
| 900 |
return jsonify({
|
| 901 |
'success': True,
|
rag_core.py
CHANGED
|
@@ -813,6 +813,85 @@ def delete_metadata_from_chroma(user_api_key, mode, document_id):
|
|
| 813 |
return False
|
| 814 |
|
| 815 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 816 |
def update_metadata_in_chroma(user_api_key, mode, document_id, field, value, contact_id=None):
|
| 817 |
"""
|
| 818 |
Update a specific field in the metadata stored in ChromaDB.
|
|
|
|
| 813 |
return False
|
| 814 |
|
| 815 |
|
| 816 |
+
def delete_all_metadata_from_chroma(user_api_key, mode):
|
| 817 |
+
"""
|
| 818 |
+
Delete ALL metadata documents from ChromaDB for a user/mode.
|
| 819 |
+
This is used for the 'delete all' feature.
|
| 820 |
+
Returns the count of deleted items.
|
| 821 |
+
"""
|
| 822 |
+
if not _rag_system_available:
|
| 823 |
+
print("RAG: System not available, cannot delete metadata from ChromaDB")
|
| 824 |
+
return 0
|
| 825 |
+
|
| 826 |
+
try:
|
| 827 |
+
collection = _get_or_create_collection(user_api_key, mode)
|
| 828 |
+
|
| 829 |
+
# Get all metadata document IDs
|
| 830 |
+
results = collection.get(
|
| 831 |
+
where={"type": "metadata"},
|
| 832 |
+
include=["metadatas"]
|
| 833 |
+
)
|
| 834 |
+
|
| 835 |
+
if not results or not results['ids']:
|
| 836 |
+
print(f"RAG: No metadata to delete for {mode}")
|
| 837 |
+
return 0
|
| 838 |
+
|
| 839 |
+
deleted_count = len(results['ids'])
|
| 840 |
+
|
| 841 |
+
# Delete all metadata documents
|
| 842 |
+
collection.delete(ids=results['ids'])
|
| 843 |
+
print(f"RAG: Deleted {deleted_count} metadata records from ChromaDB for {mode}")
|
| 844 |
+
|
| 845 |
+
return deleted_count
|
| 846 |
+
except Exception as e:
|
| 847 |
+
print(f"RAG: Error deleting all metadata from ChromaDB: {e}")
|
| 848 |
+
import traceback
|
| 849 |
+
traceback.print_exc()
|
| 850 |
+
return 0
|
| 851 |
+
|
| 852 |
+
|
| 853 |
+
def delete_all_documents_from_chroma(user_api_key, mode):
|
| 854 |
+
"""
|
| 855 |
+
Delete ALL document chunks from ChromaDB for a user/mode.
|
| 856 |
+
This removes RAG knowledge base entries.
|
| 857 |
+
Returns the count of deleted chunks.
|
| 858 |
+
"""
|
| 859 |
+
if not _rag_system_available:
|
| 860 |
+
print("RAG: System not available, cannot delete documents from ChromaDB")
|
| 861 |
+
return 0
|
| 862 |
+
|
| 863 |
+
try:
|
| 864 |
+
collection = _get_or_create_collection(user_api_key, mode)
|
| 865 |
+
|
| 866 |
+
# Get all document chunk IDs
|
| 867 |
+
results = collection.get(
|
| 868 |
+
where={"type": "document_chunk"},
|
| 869 |
+
include=["metadatas"]
|
| 870 |
+
)
|
| 871 |
+
|
| 872 |
+
if not results or not results['ids']:
|
| 873 |
+
print(f"RAG: No document chunks to delete for {mode}")
|
| 874 |
+
return 0
|
| 875 |
+
|
| 876 |
+
deleted_count = len(results['ids'])
|
| 877 |
+
|
| 878 |
+
# Delete all document chunks
|
| 879 |
+
collection.delete(ids=results['ids'])
|
| 880 |
+
print(f"RAG: Deleted {deleted_count} document chunks from ChromaDB for {mode}")
|
| 881 |
+
|
| 882 |
+
# Clear keyword index
|
| 883 |
+
if mode in keyword_indexes and user_api_key in keyword_indexes[mode]:
|
| 884 |
+
keyword_indexes[mode][user_api_key] = {"documents": {}, "vocabulary": {}, "entities": {}}
|
| 885 |
+
_save_keyword_index(user_api_key, mode)
|
| 886 |
+
|
| 887 |
+
return deleted_count
|
| 888 |
+
except Exception as e:
|
| 889 |
+
print(f"RAG: Error deleting all documents from ChromaDB: {e}")
|
| 890 |
+
import traceback
|
| 891 |
+
traceback.print_exc()
|
| 892 |
+
return 0
|
| 893 |
+
|
| 894 |
+
|
| 895 |
def update_metadata_in_chroma(user_api_key, mode, document_id, field, value, contact_id=None):
|
| 896 |
"""
|
| 897 |
Update a specific field in the metadata stored in ChromaDB.
|