davidtran999 commited on
Commit
f582ce9
·
verified ·
1 Parent(s): 285fdfa

Upload backend/core/management/commands/load_legal_document.py with huggingface_hub

Browse files
backend/core/management/commands/load_legal_document.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from pathlib import Path
3
+
4
+ from django.core.management.base import BaseCommand, CommandError
5
+
6
+ from hue_portal.core.services import ingest_uploaded_document
7
+
8
+
9
+ class Command(BaseCommand):
10
+ help = "Ingest a legal document (PDF/DOCX) into the database."
11
+
12
+ def add_arguments(self, parser):
13
+ parser.add_argument("--file", required=True, help="Path to PDF/DOCX file.")
14
+ parser.add_argument("--code", required=True, help="Unique document code.")
15
+ parser.add_argument("--title", help="Document title.")
16
+ parser.add_argument("--doc-type", default="other", help="Document type tag.")
17
+ parser.add_argument("--summary", default="", help="Short summary.")
18
+ parser.add_argument("--issued-by", default="", help="Issuing authority.")
19
+ parser.add_argument("--issued-at", help="Issued date (YYYY-MM-DD or DD/MM/YYYY).")
20
+ parser.add_argument("--source-url", default="", help="Original source URL.")
21
+ parser.add_argument("--metadata", help="JSON string with extra metadata.")
22
+
23
+ def handle(self, *args, **options):
24
+ file_path = Path(options["file"])
25
+ if not file_path.exists():
26
+ raise CommandError(f"File not found: {file_path}")
27
+
28
+ metadata = {
29
+ "code": options["code"],
30
+ "title": options.get("title") or options["code"],
31
+ "doc_type": options["doc_type"],
32
+ "summary": options["summary"],
33
+ "issued_by": options["issued_by"],
34
+ "issued_at": options.get("issued_at"),
35
+ "source_url": options["source_url"],
36
+ "metadata": {},
37
+ }
38
+ if options.get("metadata"):
39
+ try:
40
+ metadata["metadata"] = json.loads(options["metadata"])
41
+ except json.JSONDecodeError as exc:
42
+ raise CommandError(f"Invalid metadata JSON: {exc}") from exc
43
+
44
+ with file_path.open("rb") as file_obj:
45
+ result = ingest_uploaded_document(
46
+ file_obj=file_obj,
47
+ filename=file_path.name,
48
+ metadata=metadata,
49
+ )
50
+
51
+ self.stdout.write(
52
+ self.style.SUCCESS(
53
+ f"Ingested document {result.document.code}. "
54
+ f"Sections: {result.sections_count}, Images: {result.images_count}."
55
+ )
56
+ )
57
+