davidtran999 commited on
Commit
1f34575
·
verified ·
1 Parent(s): 9caeeaf

Upload backend/core/management/commands/cleanup_for_hf_legal_only.py with huggingface_hub

Browse files
backend/core/management/commands/cleanup_for_hf_legal_only.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ """
4
+ Management command to clean structured data for HF Space demo.
5
+
6
+ This command:
7
+ - Deletes all records from structured models: Fine, Procedure, Office, Advisory.
8
+ - Keeps only the four specified LegalDocument and related LegalSection/LegalDocumentImage.
9
+
10
+ Intended to be idempotent and safe to re-run.
11
+ """
12
+
13
+ from typing import List
14
+
15
+ from django.core.management.base import BaseCommand
16
+
17
+ from hue_portal.core.models import (
18
+ Advisory,
19
+ Fine,
20
+ LegalDocument,
21
+ LegalDocumentImage,
22
+ LegalSection,
23
+ Office,
24
+ Procedure,
25
+ )
26
+
27
+
28
+ LEGAL_CODES_TO_KEEP: List[str] = [
29
+ "TT-02-BIEN-SOAN",
30
+ "264-QD-TW",
31
+ "QD-69-TW",
32
+ "TT-02-CAND",
33
+ ]
34
+
35
+
36
+ class Command(BaseCommand):
37
+ """Clean database so that only 4 legal documents and their sections remain."""
38
+
39
+ help = (
40
+ "Xóa dữ liệu không liên quan cho demo HF Space:\n"
41
+ "- Xóa toàn bộ Fine/Procedure/Office/Advisory.\n"
42
+ "- Giữ lại duy nhất 4 LegalDocument được chỉ định và các LegalSection/LegalDocumentImage liên quan."
43
+ )
44
+
45
+ def add_arguments(self, parser) -> None:
46
+ parser.add_argument(
47
+ "--dry-run",
48
+ action="store_true",
49
+ help="Chỉ in ra số lượng sẽ xóa, không thực hiện xóa.",
50
+ )
51
+
52
+ def handle(self, *args, **options) -> None:
53
+ dry_run: bool = bool(options.get("dry_run"))
54
+
55
+ # 1. Wipe structured data
56
+ self.stdout.write(self.style.MIGRATE_HEADING("🧹 Xóa dữ liệu structured (Fine/Procedure/Office/Advisory)..."))
57
+ structured_models = [Fine, Procedure, Office, Advisory]
58
+
59
+ for model in structured_models:
60
+ qs = model.objects.all()
61
+ count = qs.count()
62
+ if dry_run:
63
+ self.stdout.write(f"[DRY-RUN] Sẽ xóa {count} bản ghi từ {model.__name__}")
64
+ else:
65
+ deleted, _ = qs.delete()
66
+ self.stdout.write(f"Đã xóa {deleted} bản ghi từ {model.__name__}")
67
+
68
+ # 2. Remove legal documents not in the keep-list
69
+ self.stdout.write(self.style.MIGRATE_HEADING("🧹 Xóa LegalDocument/LegalSection/LegalDocumentImage không thuộc 4 mã chỉ định..."))
70
+
71
+ keep_codes_display = ", ".join(LEGAL_CODES_TO_KEEP)
72
+ self.stdout.write(f"Giữ lại các mã: {keep_codes_display}")
73
+
74
+ # Sections & images will be cascaded when deleting documents, but we log counts explicitly.
75
+ sections_to_delete = LegalSection.objects.exclude(document__code__in=LEGAL_CODES_TO_KEEP)
76
+ images_to_delete = LegalDocumentImage.objects.exclude(document__code__in=LEGAL_CODES_TO_KEEP)
77
+ docs_to_delete = LegalDocument.objects.exclude(code__in=LEGAL_CODES_TO_KEEP)
78
+
79
+ sec_count = sections_to_delete.count()
80
+ img_count = images_to_delete.count()
81
+ doc_count = docs_to_delete.count()
82
+
83
+ if dry_run:
84
+ self.stdout.write(
85
+ f"[DRY-RUN] Sẽ xóa {doc_count} LegalDocument, "
86
+ f"{sec_count} LegalSection, {img_count} LegalDocumentImage (nếu tồn tại)."
87
+ )
88
+ else:
89
+ # Delete sections and images explicitly for clearer logging, then documents.
90
+ deleted_sections, _ = sections_to_delete.delete()
91
+ deleted_images, _ = images_to_delete.delete()
92
+ deleted_docs, _ = docs_to_delete.delete()
93
+ self.stdout.write(
94
+ f"Đã xóa {deleted_docs} LegalDocument, "
95
+ f"{deleted_sections} LegalSection, {deleted_images} LegalDocumentImage."
96
+ )
97
+
98
+ # 3. Final summary of remaining legal documents
99
+ remaining_docs = list(
100
+ LegalDocument.objects.filter(code__in=LEGAL_CODES_TO_KEEP).values_list("code", "title")
101
+ )
102
+ self.stdout.write(self.style.SUCCESS("✅ Hoàn tất dọn dữ liệu cho HF Space."))
103
+ self.stdout.write(f"Còn lại {len(remaining_docs)} LegalDocument:")
104
+ for code, title in remaining_docs:
105
+ self.stdout.write(f"- {code}: {title}")
106
+
107
+