davidtran999 commited on
Commit
77ffb68
·
verified ·
1 Parent(s): f468a7c

Upload backend/core/models.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. backend/core/models.py +362 -0
backend/core/models.py ADDED
@@ -0,0 +1,362 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from django.db import models
2
+ from django.contrib.postgres.search import SearchVectorField
3
+ from django.contrib.postgres.indexes import GinIndex
4
+ from django.utils import timezone
5
+ import uuid
6
+
7
+
8
+ def legal_document_upload_path(instance, filename):
9
+ base = "legal_uploads"
10
+ code = (instance.code or uuid.uuid4().hex).replace("/", "_")
11
+ return f"{base}/{code}/{filename}"
12
+
13
+
14
+ def legal_document_image_upload_path(instance, filename):
15
+ base = "legal_images"
16
+ code = (instance.document.code if instance.document else uuid.uuid4().hex).replace("/", "_")
17
+ timestamp = timezone.now().strftime("%Y%m%d%H%M%S")
18
+ return f"{base}/{code}/{timestamp}_{filename}"
19
+
20
+ class Procedure(models.Model):
21
+ title = models.CharField(max_length=500)
22
+ domain = models.CharField(max_length=100, db_index=True) # ANTT/Cư trú/PCCC/GT
23
+ level = models.CharField(max_length=50, blank=True) # Tỉnh/Huyện/Xã
24
+ conditions = models.TextField(blank=True)
25
+ dossier = models.TextField(blank=True)
26
+ fee = models.CharField(max_length=200, blank=True)
27
+ duration = models.CharField(max_length=200, blank=True)
28
+ authority = models.CharField(max_length=300, blank=True)
29
+ source_url = models.URLField(max_length=1000, blank=True)
30
+ updated_at = models.DateTimeField(auto_now=True)
31
+ tsv_body = SearchVectorField(null=True, editable=False)
32
+ embedding = models.BinaryField(null=True, blank=True, editable=False)
33
+
34
+ class Meta:
35
+ indexes = [
36
+ GinIndex(fields=["tsv_body"], name="procedure_tsv_idx"),
37
+ ]
38
+
39
+ def search_vector(self) -> str:
40
+ """Create searchable text vector for this procedure."""
41
+ fields = [self.title, self.domain, self.level, self.conditions, self.dossier]
42
+ return " ".join(str(f) for f in fields if f)
43
+
44
+ class Fine(models.Model):
45
+ code = models.CharField(max_length=50, unique=True)
46
+ name = models.CharField(max_length=500)
47
+ article = models.CharField(max_length=100, blank=True)
48
+ decree = models.CharField(max_length=100, blank=True)
49
+ min_fine = models.DecimalField(max_digits=12, decimal_places=0, null=True, blank=True)
50
+ max_fine = models.DecimalField(max_digits=12, decimal_places=0, null=True, blank=True)
51
+ license_points = models.CharField(max_length=50, blank=True)
52
+ remedial = models.TextField(blank=True)
53
+ source_url = models.URLField(max_length=1000, blank=True)
54
+ tsv_body = SearchVectorField(null=True, editable=False)
55
+ embedding = models.BinaryField(null=True, blank=True, editable=False)
56
+
57
+ class Meta:
58
+ indexes = [
59
+ GinIndex(fields=["tsv_body"], name="fine_tsv_idx"),
60
+ ]
61
+
62
+ def search_vector(self) -> str:
63
+ """Create searchable text vector for this fine."""
64
+ fields = [self.name, self.code, self.article, self.decree, self.remedial]
65
+ return " ".join(str(f) for f in fields if f)
66
+
67
+ class Office(models.Model):
68
+ unit_name = models.CharField(max_length=300)
69
+ address = models.CharField(max_length=500, blank=True)
70
+ district = models.CharField(max_length=100, blank=True, db_index=True)
71
+ working_hours = models.CharField(max_length=200, blank=True)
72
+ phone = models.CharField(max_length=100, blank=True)
73
+ email = models.EmailField(blank=True)
74
+ latitude = models.FloatField(null=True, blank=True)
75
+ longitude = models.FloatField(null=True, blank=True)
76
+ service_scope = models.CharField(max_length=300, blank=True)
77
+ tsv_body = SearchVectorField(null=True, editable=False)
78
+ embedding = models.BinaryField(null=True, blank=True, editable=False)
79
+
80
+ class Meta:
81
+ indexes = [
82
+ GinIndex(fields=["tsv_body"], name="office_tsv_idx"),
83
+ ]
84
+
85
+ def search_vector(self) -> str:
86
+ """Create searchable text vector for this office."""
87
+ fields = [self.unit_name, self.address, self.district, self.service_scope]
88
+ return " ".join(str(f) for f in fields if f)
89
+
90
+ class Advisory(models.Model):
91
+ title = models.CharField(max_length=500)
92
+ summary = models.TextField()
93
+ source_url = models.URLField(max_length=1000, blank=True)
94
+ published_at = models.DateField(null=True, blank=True)
95
+ tsv_body = SearchVectorField(null=True, editable=False)
96
+ embedding = models.BinaryField(null=True, blank=True, editable=False)
97
+
98
+ class Meta:
99
+ indexes = [
100
+ GinIndex(fields=["tsv_body"], name="advisory_tsv_idx"),
101
+ ]
102
+
103
+ def search_vector(self) -> str:
104
+ """Create searchable text vector for this advisory."""
105
+ fields = [self.title, self.summary]
106
+ return " ".join(str(f) for f in fields if f)
107
+
108
+
109
+ class LegalDocument(models.Model):
110
+ """Metadata + raw text for authoritative legal documents."""
111
+
112
+ DOCUMENT_TYPES = [
113
+ ("decision", "Decision"),
114
+ ("circular", "Circular"),
115
+ ("guideline", "Guideline"),
116
+ ("plan", "Plan"),
117
+ ("other", "Other"),
118
+ ]
119
+
120
+ code = models.CharField(max_length=100, unique=True)
121
+ title = models.CharField(max_length=500)
122
+ doc_type = models.CharField(max_length=30, choices=DOCUMENT_TYPES, default="other")
123
+ summary = models.TextField(blank=True)
124
+ issued_by = models.CharField(max_length=200, blank=True)
125
+ issued_at = models.DateField(null=True, blank=True)
126
+ source_file = models.CharField(max_length=500, blank=True)
127
+ uploaded_file = models.FileField(upload_to=legal_document_upload_path, null=True, blank=True)
128
+ original_filename = models.CharField(max_length=255, blank=True)
129
+ mime_type = models.CharField(max_length=120, blank=True)
130
+ file_size = models.BigIntegerField(null=True, blank=True)
131
+ file_checksum = models.CharField(max_length=128, blank=True)
132
+ content_checksum = models.CharField(max_length=128, blank=True)
133
+ source_url = models.URLField(max_length=1000, blank=True)
134
+ page_count = models.IntegerField(null=True, blank=True)
135
+ raw_text = models.TextField()
136
+ raw_text_ocr = models.TextField(blank=True)
137
+ metadata = models.JSONField(default=dict, blank=True)
138
+ created_at = models.DateTimeField(auto_now_add=True)
139
+ updated_at = models.DateTimeField(auto_now=True)
140
+ tsv_body = SearchVectorField(null=True, editable=False)
141
+
142
+ class Meta:
143
+ indexes = [
144
+ GinIndex(fields=["tsv_body"], name="legal_document_tsv_idx"),
145
+ models.Index(fields=["doc_type"]),
146
+ models.Index(fields=["issued_at"]),
147
+ ]
148
+ ordering = ["title"]
149
+
150
+ def search_vector(self) -> str:
151
+ """Return concatenated searchable text."""
152
+ fields = [
153
+ self.title,
154
+ self.code,
155
+ self.summary,
156
+ self.issued_by,
157
+ self.raw_text,
158
+ ]
159
+ return " ".join(str(f) for f in fields if f)
160
+
161
+
162
+ class LegalSection(models.Model):
163
+ """Structured snippet (chapter/section/article) for each legal document."""
164
+
165
+ LEVEL_CHOICES = [
166
+ ("chapter", "Chapter"),
167
+ ("section", "Section"),
168
+ ("article", "Article"),
169
+ ("clause", "Clause"),
170
+ ("note", "Note"),
171
+ ("other", "Other"),
172
+ ]
173
+
174
+ document = models.ForeignKey(
175
+ LegalDocument,
176
+ on_delete=models.CASCADE,
177
+ related_name="sections",
178
+ )
179
+ section_code = models.CharField(max_length=120)
180
+ section_title = models.CharField(max_length=500, blank=True)
181
+ level = models.CharField(max_length=30, choices=LEVEL_CHOICES, default="other")
182
+ order = models.PositiveIntegerField(default=0, db_index=True)
183
+ page_start = models.IntegerField(null=True, blank=True)
184
+ page_end = models.IntegerField(null=True, blank=True)
185
+ content = models.TextField()
186
+ excerpt = models.TextField(blank=True)
187
+ metadata = models.JSONField(default=dict, blank=True)
188
+ is_ocr = models.BooleanField(default=False)
189
+ tsv_body = SearchVectorField(null=True, editable=False)
190
+ embedding = models.BinaryField(null=True, blank=True, editable=False)
191
+
192
+ class Meta:
193
+ indexes = [
194
+ GinIndex(fields=["tsv_body"], name="legal_section_tsv_idx"),
195
+ models.Index(fields=["document", "order"]),
196
+ models.Index(fields=["level"]),
197
+ ]
198
+ ordering = ["document", "order"]
199
+ unique_together = ("document", "section_code", "order")
200
+
201
+ def search_vector(self) -> str:
202
+ fields = [
203
+ self.section_title,
204
+ self.section_code,
205
+ self.content,
206
+ self.excerpt,
207
+ ]
208
+ return " ".join(str(f) for f in fields if f)
209
+
210
+
211
+ class Synonym(models.Model):
212
+ keyword = models.CharField(max_length=120, unique=True)
213
+ alias = models.CharField(max_length=120)
214
+
215
+
216
+ class LegalDocumentImage(models.Model):
217
+ """Metadata for images extracted from uploaded legal documents."""
218
+
219
+ document = models.ForeignKey(
220
+ LegalDocument,
221
+ on_delete=models.CASCADE,
222
+ related_name="images",
223
+ )
224
+ image = models.ImageField(upload_to=legal_document_image_upload_path)
225
+ page_number = models.IntegerField(null=True, blank=True)
226
+ description = models.CharField(max_length=255, blank=True)
227
+ width = models.IntegerField(null=True, blank=True)
228
+ height = models.IntegerField(null=True, blank=True)
229
+ checksum = models.CharField(max_length=128, blank=True)
230
+ created_at = models.DateTimeField(auto_now_add=True)
231
+
232
+ class Meta:
233
+ indexes = [
234
+ models.Index(fields=["document", "page_number"]),
235
+ models.Index(fields=["checksum"]),
236
+ ]
237
+
238
+ def __str__(self) -> str:
239
+ return f"Image {self.id} of {self.document.code}"
240
+
241
+
242
+ class IngestionJob(models.Model):
243
+ """Background ingestion task information."""
244
+
245
+ STATUS_PENDING = "pending"
246
+ STATUS_RUNNING = "running"
247
+ STATUS_COMPLETED = "completed"
248
+ STATUS_FAILED = "failed"
249
+
250
+ STATUS_CHOICES = [
251
+ (STATUS_PENDING, "Pending"),
252
+ (STATUS_RUNNING, "Running"),
253
+ (STATUS_COMPLETED, "Completed"),
254
+ (STATUS_FAILED, "Failed"),
255
+ ]
256
+
257
+ id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
258
+ code = models.CharField(max_length=128)
259
+ filename = models.CharField(max_length=255)
260
+ document = models.ForeignKey(
261
+ LegalDocument,
262
+ related_name="ingestion_jobs",
263
+ on_delete=models.SET_NULL,
264
+ null=True,
265
+ blank=True,
266
+ )
267
+ metadata = models.JSONField(default=dict, blank=True)
268
+ stats = models.JSONField(default=dict, blank=True)
269
+ status = models.CharField(max_length=20, choices=STATUS_CHOICES, default=STATUS_PENDING)
270
+ error_message = models.TextField(blank=True)
271
+ storage_path = models.CharField(max_length=512, blank=True)
272
+ progress = models.PositiveIntegerField(default=0)
273
+ created_at = models.DateTimeField(auto_now_add=True)
274
+ updated_at = models.DateTimeField(auto_now=True)
275
+ started_at = models.DateTimeField(null=True, blank=True)
276
+ finished_at = models.DateTimeField(null=True, blank=True)
277
+
278
+ class Meta:
279
+ ordering = ("-created_at",)
280
+
281
+ def __str__(self) -> str: # pragma: no cover - trivial
282
+ return f"IngestionJob({self.code}, {self.status})"
283
+
284
+ class AuditLog(models.Model):
285
+ created_at = models.DateTimeField(auto_now_add=True)
286
+ ip = models.GenericIPAddressField(null=True, blank=True)
287
+ user_agent = models.CharField(max_length=300, blank=True)
288
+ path = models.CharField(max_length=300)
289
+ query = models.CharField(max_length=500, blank=True)
290
+ status = models.IntegerField(default=200)
291
+ intent = models.CharField(max_length=50, blank=True)
292
+ confidence = models.FloatField(null=True, blank=True)
293
+ latency_ms = models.FloatField(null=True, blank=True)
294
+
295
+
296
+ class MLMetrics(models.Model):
297
+ date = models.DateField(unique=True)
298
+ total_requests = models.IntegerField(default=0)
299
+ intent_accuracy = models.FloatField(null=True, blank=True)
300
+ average_latency_ms = models.FloatField(null=True, blank=True)
301
+ error_rate = models.FloatField(null=True, blank=True)
302
+ intent_breakdown = models.JSONField(default=dict, blank=True)
303
+ generated_at = models.DateTimeField(auto_now_add=True)
304
+
305
+ class Meta:
306
+ ordering = ["-date"]
307
+ verbose_name = "ML Metrics"
308
+ verbose_name_plural = "ML Metrics"
309
+
310
+
311
+ class ConversationSession(models.Model):
312
+ """Model to store conversation sessions for context management."""
313
+ session_id = models.UUIDField(default=uuid.uuid4, unique=True, editable=False)
314
+ user_id = models.CharField(max_length=100, null=True, blank=True, db_index=True)
315
+ created_at = models.DateTimeField(auto_now_add=True)
316
+ updated_at = models.DateTimeField(auto_now=True)
317
+ metadata = models.JSONField(default=dict, blank=True)
318
+
319
+ class Meta:
320
+ ordering = ["-updated_at"]
321
+ verbose_name = "Conversation Session"
322
+ verbose_name_plural = "Conversation Sessions"
323
+ indexes = [
324
+ models.Index(fields=["session_id"]),
325
+ models.Index(fields=["user_id", "-updated_at"]),
326
+ ]
327
+
328
+ def __str__(self):
329
+ return f"Session {self.session_id}"
330
+
331
+
332
+ class ConversationMessage(models.Model):
333
+ """Model to store individual messages in a conversation session."""
334
+ ROLE_CHOICES = [
335
+ ("user", "User"),
336
+ ("bot", "Bot"),
337
+ ]
338
+
339
+ session = models.ForeignKey(
340
+ ConversationSession,
341
+ on_delete=models.CASCADE,
342
+ related_name="messages"
343
+ )
344
+ role = models.CharField(max_length=10, choices=ROLE_CHOICES)
345
+ content = models.TextField()
346
+ intent = models.CharField(max_length=50, blank=True, null=True)
347
+ entities = models.JSONField(default=dict, blank=True)
348
+ timestamp = models.DateTimeField(auto_now_add=True)
349
+ metadata = models.JSONField(default=dict, blank=True)
350
+
351
+ class Meta:
352
+ ordering = ["timestamp"]
353
+ verbose_name = "Conversation Message"
354
+ verbose_name_plural = "Conversation Messages"
355
+ indexes = [
356
+ models.Index(fields=["session", "timestamp"]),
357
+ models.Index(fields=["session", "role", "timestamp"]),
358
+ ]
359
+
360
+ def __str__(self):
361
+ return f"{self.role}: {self.content[:50]}..."
362
+