davidtran999 commited on
Commit
9caeeaf
·
verified ·
1 Parent(s): a9744a4

Upload backend/core/management/commands/rechunk_legal_document.py with huggingface_hub

Browse files
backend/core/management/commands/rechunk_legal_document.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from django.core.management.base import BaseCommand, CommandError
2
+
3
+ from hue_portal.core.models import LegalDocument
4
+ from hue_portal.core.services import ingest_uploaded_document
5
+
6
+
7
+ class Command(BaseCommand):
8
+ help = "Re-run ingestion on an existing legal document using the stored file"
9
+
10
+ def add_arguments(self, parser):
11
+ parser.add_argument("--code", required=True, help="Document code to reprocess")
12
+
13
+ def handle(self, *args, **options):
14
+ code = options["code"]
15
+ try:
16
+ doc = LegalDocument.objects.get(code=code)
17
+ except LegalDocument.DoesNotExist as exc:
18
+ raise CommandError(f"Legal document {code} not found") from exc
19
+
20
+ if not doc.uploaded_file:
21
+ raise CommandError("Document does not have an uploaded file to reprocess")
22
+
23
+ metadata = {
24
+ "code": doc.code,
25
+ "title": doc.title,
26
+ "doc_type": doc.doc_type,
27
+ "summary": doc.summary,
28
+ "issued_by": doc.issued_by,
29
+ "issued_at": doc.issued_at.isoformat() if doc.issued_at else "",
30
+ "source_url": doc.source_url,
31
+ "metadata": doc.metadata,
32
+ "mime_type": doc.mime_type,
33
+ }
34
+
35
+ with doc.uploaded_file.open("rb") as handle:
36
+ ingest_uploaded_document(
37
+ file_obj=handle,
38
+ filename=doc.original_filename or doc.uploaded_file.name,
39
+ metadata=metadata,
40
+ )
41
+
42
+ self.stdout.write(self.style.SUCCESS(f"Reprocessed document {code}"))
43
+