davidtran999 commited on
Commit
9e39729
·
verified ·
1 Parent(s): fadb000

Upload backend/core/views.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. backend/core/views.py +320 -0
backend/core/views.py ADDED
@@ -0,0 +1,320 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from django.conf import settings
3
+ from django.db.models.functions import Lower
4
+ from django.db.models import Q
5
+ from django.http import FileResponse, Http404
6
+ from django.shortcuts import get_object_or_404
7
+ from pathlib import Path
8
+ from rest_framework.decorators import api_view, parser_classes
9
+ from rest_framework.parsers import MultiPartParser, FormParser
10
+ from rest_framework.response import Response
11
+ from .models import Procedure, Fine, Office, Advisory, LegalSection, LegalDocument, Synonym, IngestionJob
12
+ from .serializers import (
13
+ ProcedureSerializer,
14
+ FineSerializer,
15
+ OfficeSerializer,
16
+ AdvisorySerializer,
17
+ LegalSectionSerializer,
18
+ LegalDocumentSerializer,
19
+ IngestionJobSerializer,
20
+ )
21
+ from .services import enqueue_ingestion_job
22
+ from .search_ml import search_with_ml
23
+ # Chatbot moved to hue_portal.chatbot app
24
+ # Keeping import for backward compatibility
25
+ try:
26
+ from hue_portal.chatbot.chatbot import get_chatbot
27
+ except ImportError:
28
+ from .chatbot import get_chatbot
29
+
30
+ def normalize_query(q: str) -> str:
31
+ return (q or "").strip()
32
+
33
+ @api_view(["GET"])
34
+ def search(request):
35
+ """Unified search endpoint - searches across all models."""
36
+ q = normalize_query(request.GET.get("q", ""))
37
+ type_ = request.GET.get("type") # Optional: filter by type
38
+
39
+ if not q:
40
+ return Response({"error": "q parameter is required"}, status=400)
41
+
42
+ results = []
43
+
44
+ # Search Procedures
45
+ if not type_ or type_ == "procedure":
46
+ proc_qs = Procedure.objects.all()
47
+ proc_text_fields = ["title", "domain", "conditions", "dossier"]
48
+ proc_results = search_with_ml(proc_qs, q, proc_text_fields, top_k=10, min_score=0.1)
49
+ for obj in proc_results:
50
+ results.append({
51
+ "type": "procedure",
52
+ "data": ProcedureSerializer(obj).data,
53
+ "relevance": getattr(obj, '_ml_score', 0.5)
54
+ })
55
+
56
+ # Search Fines
57
+ if not type_ or type_ == "fine":
58
+ fine_qs = Fine.objects.all()
59
+ fine_text_fields = ["name", "code", "article", "decree", "remedial"]
60
+ fine_results = search_with_ml(fine_qs, q, fine_text_fields, top_k=10, min_score=0.1)
61
+ for obj in fine_results:
62
+ results.append({
63
+ "type": "fine",
64
+ "data": FineSerializer(obj).data,
65
+ "relevance": getattr(obj, '_ml_score', 0.5)
66
+ })
67
+
68
+ # Search Offices
69
+ if not type_ or type_ == "office":
70
+ office_qs = Office.objects.all()
71
+ office_text_fields = ["unit_name", "address", "district", "service_scope"]
72
+ office_results = search_with_ml(office_qs, q, office_text_fields, top_k=10, min_score=0.1)
73
+ for obj in office_results:
74
+ results.append({
75
+ "type": "office",
76
+ "data": OfficeSerializer(obj).data,
77
+ "relevance": getattr(obj, '_ml_score', 0.5)
78
+ })
79
+
80
+ # Search Advisories
81
+ if not type_ or type_ == "advisory":
82
+ adv_qs = Advisory.objects.all()
83
+ adv_text_fields = ["title", "summary"]
84
+ adv_results = search_with_ml(adv_qs, q, adv_text_fields, top_k=10, min_score=0.1)
85
+ for obj in adv_results:
86
+ results.append({
87
+ "type": "advisory",
88
+ "data": AdvisorySerializer(obj).data,
89
+ "relevance": getattr(obj, '_ml_score', 0.5)
90
+ })
91
+
92
+ if not type_ or type_ == "legal":
93
+ legal_qs = LegalSection.objects.select_related("document").all()
94
+ legal_text_fields = ["section_title", "section_code", "content"]
95
+ legal_results = search_with_ml(legal_qs, q, legal_text_fields, top_k=10, min_score=0.1)
96
+ for obj in legal_results:
97
+ results.append({
98
+ "type": "legal",
99
+ "data": LegalSectionSerializer(obj, context={"request": request}).data,
100
+ "relevance": getattr(obj, '_ml_score', 0.5)
101
+ })
102
+
103
+ # Sort by relevance score
104
+ results.sort(key=lambda x: x["relevance"], reverse=True)
105
+
106
+ return Response({
107
+ "query": q,
108
+ "count": len(results),
109
+ "results": results[:50] # Limit total results
110
+ })
111
+
112
+ @api_view(["GET"])
113
+ def procedures_list(request):
114
+ q = normalize_query(request.GET.get("q", ""))
115
+ domain = request.GET.get("domain")
116
+ level = request.GET.get("level")
117
+ qs = Procedure.objects.all()
118
+ if domain: qs = qs.filter(domain__iexact=domain)
119
+ if level: qs = qs.filter(level__iexact=level)
120
+ if q:
121
+ # Use ML-based search for better results
122
+ text_fields = ["title", "domain", "conditions", "dossier"]
123
+ qs = search_with_ml(qs, q, text_fields, top_k=100, min_score=0.1)
124
+ return Response(ProcedureSerializer(qs[:100], many=True).data)
125
+
126
+ @api_view(["GET"])
127
+ def procedures_detail(request, pk:int):
128
+ try:
129
+ obj = Procedure.objects.get(pk=pk)
130
+ except Procedure.DoesNotExist:
131
+ return Response(status=404)
132
+ return Response(ProcedureSerializer(obj).data)
133
+
134
+ @api_view(["GET"])
135
+ def fines_list(request):
136
+ q = normalize_query(request.GET.get("q", ""))
137
+ code = request.GET.get("code")
138
+ qs = Fine.objects.all()
139
+ if code: qs = qs.filter(code__iexact=code)
140
+ if q:
141
+ # Use ML-based search for better results
142
+ text_fields = ["name", "code", "article", "decree", "remedial"]
143
+ qs = search_with_ml(qs, q, text_fields, top_k=100, min_score=0.1)
144
+ return Response(FineSerializer(qs[:100], many=True).data)
145
+
146
+ @api_view(["GET"])
147
+ def fines_detail(request, pk:int):
148
+ try:
149
+ obj = Fine.objects.get(pk=pk)
150
+ except Fine.DoesNotExist:
151
+ return Response(status=404)
152
+ return Response(FineSerializer(obj).data)
153
+
154
+ @api_view(["GET"])
155
+ def offices_list(request):
156
+ q = normalize_query(request.GET.get("q", ""))
157
+ district = request.GET.get("district")
158
+ qs = Office.objects.all()
159
+ if district: qs = qs.filter(district__iexact=district)
160
+ if q:
161
+ # Use ML-based search for better results
162
+ text_fields = ["unit_name", "address", "district", "service_scope"]
163
+ qs = search_with_ml(qs, q, text_fields, top_k=100, min_score=0.1)
164
+ return Response(OfficeSerializer(qs[:100], many=True).data)
165
+
166
+ @api_view(["GET"])
167
+ def offices_detail(request, pk:int):
168
+ try:
169
+ obj = Office.objects.get(pk=pk)
170
+ except Office.DoesNotExist:
171
+ return Response(status=404)
172
+ return Response(OfficeSerializer(obj).data)
173
+
174
+ @api_view(["GET"])
175
+ def advisories_list(request):
176
+ q = normalize_query(request.GET.get("q", ""))
177
+ qs = Advisory.objects.all().order_by("-published_at")
178
+ if q:
179
+ # Use ML-based search for better results
180
+ text_fields = ["title", "summary"]
181
+ qs = search_with_ml(qs, q, text_fields, top_k=100, min_score=0.1)
182
+ return Response(AdvisorySerializer(qs[:100], many=True).data)
183
+
184
+ @api_view(["GET"])
185
+ def advisories_detail(request, pk:int):
186
+ try:
187
+ obj = Advisory.objects.get(pk=pk)
188
+ except Advisory.DoesNotExist:
189
+ return Response(status=404)
190
+ return Response(AdvisorySerializer(obj).data)
191
+
192
+ @api_view(["GET"])
193
+ def legal_sections_list(request):
194
+ q = normalize_query(request.GET.get("q", ""))
195
+ document_code = request.GET.get("document_code")
196
+ section_code = request.GET.get("section_code")
197
+ qs = LegalSection.objects.select_related("document").all()
198
+ if document_code:
199
+ qs = qs.filter(document__code__iexact=document_code)
200
+ if section_code:
201
+ qs = qs.filter(section_code__icontains=section_code)
202
+ if q:
203
+ text_fields = ["section_title", "section_code", "content"]
204
+ qs = search_with_ml(qs, q, text_fields, top_k=100, min_score=0.1)
205
+ return Response(LegalSectionSerializer(qs[:100], many=True, context={"request": request}).data)
206
+
207
+ @api_view(["GET"])
208
+ def legal_sections_detail(request, pk:int):
209
+ try:
210
+ obj = LegalSection.objects.select_related("document").get(pk=pk)
211
+ except LegalSection.DoesNotExist:
212
+ return Response(status=404)
213
+ return Response(LegalSectionSerializer(obj, context={"request": request}).data)
214
+
215
+ @api_view(["GET"])
216
+ def legal_document_download(request, pk:int):
217
+ try:
218
+ doc = LegalDocument.objects.get(pk=pk)
219
+ except LegalDocument.DoesNotExist:
220
+ raise Http404("Document not found")
221
+ if not doc.source_file:
222
+ raise Http404("Document missing source file")
223
+ file_path = Path(doc.source_file)
224
+ if not file_path.exists():
225
+ raise Http404("Source file not found on server")
226
+ response = FileResponse(open(file_path, "rb"), as_attachment=True, filename=file_path.name)
227
+ return response
228
+
229
+
230
+ def _has_upload_access(request):
231
+ if getattr(request, "user", None) and request.user.is_authenticated:
232
+ return True
233
+ expected = getattr(settings, "LEGAL_UPLOAD_TOKEN", "")
234
+ header_token = request.headers.get("X-Upload-Token")
235
+ return bool(expected and header_token and header_token == expected)
236
+
237
+
238
+ @api_view(["POST"])
239
+ @parser_classes([MultiPartParser, FormParser])
240
+ def legal_document_upload(request):
241
+ if not _has_upload_access(request):
242
+ return Response({"error": "unauthorized"}, status=403)
243
+
244
+ upload = request.FILES.get("file")
245
+ if not upload:
246
+ return Response({"error": "file is required"}, status=400)
247
+
248
+ code = (request.data.get("code") or "").strip()
249
+ if not code:
250
+ return Response({"error": "code is required"}, status=400)
251
+
252
+ metadata = {
253
+ "code": code,
254
+ "title": request.data.get("title") or code,
255
+ "doc_type": request.data.get("doc_type", "other"),
256
+ "summary": request.data.get("summary", ""),
257
+ "issued_by": request.data.get("issued_by", ""),
258
+ "issued_at": request.data.get("issued_at"),
259
+ "source_url": request.data.get("source_url", ""),
260
+ "mime_type": request.data.get("mime_type") or getattr(upload, "content_type", ""),
261
+ "metadata": {},
262
+ }
263
+ extra_meta = request.data.get("metadata")
264
+ if extra_meta:
265
+ try:
266
+ metadata["metadata"] = json.loads(extra_meta) if isinstance(extra_meta, str) else extra_meta
267
+ except Exception:
268
+ return Response({"error": "metadata must be valid JSON"}, status=400)
269
+
270
+ try:
271
+ job = enqueue_ingestion_job(
272
+ file_obj=upload,
273
+ filename=upload.name,
274
+ metadata=metadata,
275
+ )
276
+ except ValueError as exc:
277
+ return Response({"error": str(exc)}, status=400)
278
+ except Exception as exc:
279
+ return Response({"error": str(exc)}, status=500)
280
+
281
+ serialized = IngestionJobSerializer(job, context={"request": request}).data
282
+ return Response(serialized, status=202)
283
+
284
+
285
+ @api_view(["GET"])
286
+ def legal_ingestion_job_detail(request, job_id):
287
+ job = get_object_or_404(IngestionJob, id=job_id)
288
+ return Response(IngestionJobSerializer(job, context={"request": request}).data)
289
+
290
+
291
+ @api_view(["GET"])
292
+ def legal_ingestion_job_list(request):
293
+ code = request.GET.get("code")
294
+ qs = IngestionJob.objects.all()
295
+ if code:
296
+ qs = qs.filter(code=code)
297
+ qs = qs.order_by("-created_at")[:20]
298
+ serializer = IngestionJobSerializer(qs, many=True, context={"request": request})
299
+ return Response(serializer.data)
300
+
301
+ @api_view(["POST"])
302
+ def chat(request):
303
+ """Chatbot endpoint for natural language queries."""
304
+ message = request.data.get("message", "").strip()
305
+ if not message:
306
+ return Response({"error": "message is required"}, status=400)
307
+
308
+ try:
309
+ chatbot = get_chatbot()
310
+ response = chatbot.generate_response(message)
311
+ return Response(response)
312
+ except Exception as e:
313
+ return Response({
314
+ "message": "Xin lỗi, có lỗi xảy ra. Vui lòng thử lại.",
315
+ "intent": "error",
316
+ "error": str(e),
317
+ "results": [],
318
+ "count": 0
319
+ }, status=500)
320
+