GraphResearcher / app /storage /document_delete_service.py
yugbirla's picture
Sync GraphRAG fusion quality cleanup and evaluation files
b7d0804
Raw
History Blame Contribute Delete
2.17 kB
import shutil
from pathlib import Path
from typing import Dict, Any, Optional
from app.core.config import settings
from app.storage.status_storage import read_document_status
from app.storage.processed_storage import read_processed_metadata
from app.storage.document_index import remove_document_from_index
def delete_document_by_id(document_id: str) -> Optional[Dict[str, Any]]:
document_dir = settings.PROCESSED_DIR / document_id
status = read_document_status(document_id)
metadata = read_processed_metadata(document_id)
if status is None and metadata is None and not document_dir.exists():
return None
raw_upload_path = None
if status is not None:
raw_upload_path = status.metadata.get("uploaded_file_path")
removed_index_entry = remove_document_from_index(document_id)
deleted_vectors = False
try:
from app.retrieval.vector_store import delete_vectors_for_document
delete_vectors_for_document(document_id)
deleted_vectors = True
except Exception:
deleted_vectors = False
deleted_raw_upload = False
deleted_processed_folder = False
if raw_upload_path:
raw_path = Path(raw_upload_path)
if raw_path.exists() and raw_path.is_file():
raw_path.unlink()
deleted_raw_upload = True
if document_dir.exists() and document_dir.is_dir():
shutil.rmtree(document_dir)
deleted_processed_folder = True
return {
"document_id": document_id,
"source_file_name": (
status.source_file_name if status is not None
else metadata.get("source_file_name") if metadata is not None
else None
),
"file_type": (
status.file_type if status is not None
else metadata.get("file_type") if metadata is not None
else None
),
"deleted_processed_folder": deleted_processed_folder,
"deleted_raw_upload": deleted_raw_upload,
"deleted_vectors": deleted_vectors,
"removed_duplicate_index_entry": removed_index_entry is not None,
"removed_index_entry": removed_index_entry
}