Spaces:
Sleeping
Sleeping
Upload backend/core/management/commands/cleanup_for_hf_legal_only.py with huggingface_hub
Browse files
backend/core/management/commands/cleanup_for_hf_legal_only.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
"""
|
| 4 |
+
Management command to clean structured data for HF Space demo.
|
| 5 |
+
|
| 6 |
+
This command:
|
| 7 |
+
- Deletes all records from structured models: Fine, Procedure, Office, Advisory.
|
| 8 |
+
- Keeps only the four specified LegalDocument and related LegalSection/LegalDocumentImage.
|
| 9 |
+
|
| 10 |
+
Intended to be idempotent and safe to re-run.
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
from typing import List
|
| 14 |
+
|
| 15 |
+
from django.core.management.base import BaseCommand
|
| 16 |
+
|
| 17 |
+
from hue_portal.core.models import (
|
| 18 |
+
Advisory,
|
| 19 |
+
Fine,
|
| 20 |
+
LegalDocument,
|
| 21 |
+
LegalDocumentImage,
|
| 22 |
+
LegalSection,
|
| 23 |
+
Office,
|
| 24 |
+
Procedure,
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
LEGAL_CODES_TO_KEEP: List[str] = [
|
| 29 |
+
"TT-02-BIEN-SOAN",
|
| 30 |
+
"264-QD-TW",
|
| 31 |
+
"QD-69-TW",
|
| 32 |
+
"TT-02-CAND",
|
| 33 |
+
]
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
class Command(BaseCommand):
|
| 37 |
+
"""Clean database so that only 4 legal documents and their sections remain."""
|
| 38 |
+
|
| 39 |
+
help = (
|
| 40 |
+
"Xóa dữ liệu không liên quan cho demo HF Space:\n"
|
| 41 |
+
"- Xóa toàn bộ Fine/Procedure/Office/Advisory.\n"
|
| 42 |
+
"- Giữ lại duy nhất 4 LegalDocument được chỉ định và các LegalSection/LegalDocumentImage liên quan."
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
def add_arguments(self, parser) -> None:
|
| 46 |
+
parser.add_argument(
|
| 47 |
+
"--dry-run",
|
| 48 |
+
action="store_true",
|
| 49 |
+
help="Chỉ in ra số lượng sẽ xóa, không thực hiện xóa.",
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
def handle(self, *args, **options) -> None:
|
| 53 |
+
dry_run: bool = bool(options.get("dry_run"))
|
| 54 |
+
|
| 55 |
+
# 1. Wipe structured data
|
| 56 |
+
self.stdout.write(self.style.MIGRATE_HEADING("🧹 Xóa dữ liệu structured (Fine/Procedure/Office/Advisory)..."))
|
| 57 |
+
structured_models = [Fine, Procedure, Office, Advisory]
|
| 58 |
+
|
| 59 |
+
for model in structured_models:
|
| 60 |
+
qs = model.objects.all()
|
| 61 |
+
count = qs.count()
|
| 62 |
+
if dry_run:
|
| 63 |
+
self.stdout.write(f"[DRY-RUN] Sẽ xóa {count} bản ghi từ {model.__name__}")
|
| 64 |
+
else:
|
| 65 |
+
deleted, _ = qs.delete()
|
| 66 |
+
self.stdout.write(f"Đã xóa {deleted} bản ghi từ {model.__name__}")
|
| 67 |
+
|
| 68 |
+
# 2. Remove legal documents not in the keep-list
|
| 69 |
+
self.stdout.write(self.style.MIGRATE_HEADING("🧹 Xóa LegalDocument/LegalSection/LegalDocumentImage không thuộc 4 mã chỉ định..."))
|
| 70 |
+
|
| 71 |
+
keep_codes_display = ", ".join(LEGAL_CODES_TO_KEEP)
|
| 72 |
+
self.stdout.write(f"Giữ lại các mã: {keep_codes_display}")
|
| 73 |
+
|
| 74 |
+
# Sections & images will be cascaded when deleting documents, but we log counts explicitly.
|
| 75 |
+
sections_to_delete = LegalSection.objects.exclude(document__code__in=LEGAL_CODES_TO_KEEP)
|
| 76 |
+
images_to_delete = LegalDocumentImage.objects.exclude(document__code__in=LEGAL_CODES_TO_KEEP)
|
| 77 |
+
docs_to_delete = LegalDocument.objects.exclude(code__in=LEGAL_CODES_TO_KEEP)
|
| 78 |
+
|
| 79 |
+
sec_count = sections_to_delete.count()
|
| 80 |
+
img_count = images_to_delete.count()
|
| 81 |
+
doc_count = docs_to_delete.count()
|
| 82 |
+
|
| 83 |
+
if dry_run:
|
| 84 |
+
self.stdout.write(
|
| 85 |
+
f"[DRY-RUN] Sẽ xóa {doc_count} LegalDocument, "
|
| 86 |
+
f"{sec_count} LegalSection, {img_count} LegalDocumentImage (nếu tồn tại)."
|
| 87 |
+
)
|
| 88 |
+
else:
|
| 89 |
+
# Delete sections and images explicitly for clearer logging, then documents.
|
| 90 |
+
deleted_sections, _ = sections_to_delete.delete()
|
| 91 |
+
deleted_images, _ = images_to_delete.delete()
|
| 92 |
+
deleted_docs, _ = docs_to_delete.delete()
|
| 93 |
+
self.stdout.write(
|
| 94 |
+
f"Đã xóa {deleted_docs} LegalDocument, "
|
| 95 |
+
f"{deleted_sections} LegalSection, {deleted_images} LegalDocumentImage."
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
# 3. Final summary of remaining legal documents
|
| 99 |
+
remaining_docs = list(
|
| 100 |
+
LegalDocument.objects.filter(code__in=LEGAL_CODES_TO_KEEP).values_list("code", "title")
|
| 101 |
+
)
|
| 102 |
+
self.stdout.write(self.style.SUCCESS("✅ Hoàn tất dọn dữ liệu cho HF Space."))
|
| 103 |
+
self.stdout.write(f"Còn lại {len(remaining_docs)} LegalDocument:")
|
| 104 |
+
for code, title in remaining_docs:
|
| 105 |
+
self.stdout.write(f"- {code}: {title}")
|
| 106 |
+
|
| 107 |
+
|