sofhiaazzhr commited on
Commit
023b7cf
·
1 Parent(s): 9debae5

[NOTICKET]: add comments to flag that file type lists must stay in sync

Browse files
src/api/v1/document.py CHANGED
@@ -24,20 +24,25 @@ class DocumentResponse(BaseModel):
24
  created_at: str
25
 
26
 
27
- @router.get("/documents/doctypes")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  @log_execution(logger)
29
  async def get_document_types():
30
- """List supported document types with max file size and status."""
31
- return {
32
- "status": "success",
33
- "data": [
34
- {"doc_type": "pdf", "max_size": 10, "status": "active", "message": None},
35
- {"doc_type": "docx", "max_size": 10, "status": "active", "message": None},
36
- {"doc_type": "txt", "max_size": 10, "status": "active", "message": None},
37
- {"doc_type": "csv", "max_size": 10, "status": "active", "message": None},
38
- {"doc_type": "xlsx", "max_size": 10, "status": "active", "message": None},
39
- ]
40
- }
41
 
42
 
43
  @router.get("/documents/{user_id}", response_model=List[DocumentResponse])
 
24
  created_at: str
25
 
26
 
27
+ # NOTE: Keep in sync with SUPPORTED_FILE_TYPES in src/pipeline/document_pipeline/document_pipeline.py
28
+ _DOC_TYPES = [
29
+ {"doc_type": "pdf", "max_size": 10, "status": "active", "message": None},
30
+ {"doc_type": "docx", "max_size": 10, "status": "active", "message": None},
31
+ {"doc_type": "txt", "max_size": 10, "status": "active", "message": None},
32
+ {"doc_type": "csv", "max_size": 10, "status": "active", "message": None},
33
+ {"doc_type": "xlsx", "max_size": 10, "status": "active", "message": None},
34
+ ]
35
+
36
+
37
+ @router.get(
38
+ "/documents/doctypes",
39
+ summary="List supported document types",
40
+ response_description="All document types supported by DataEyond with their size limits and status.",
41
+ )
42
  @log_execution(logger)
43
  async def get_document_types():
44
+ """Return every document type DataEyond can process, with max file size and active/inactive status."""
45
+ return {"status": "success", "data": _DOC_TYPES}
 
 
 
 
 
 
 
 
 
46
 
47
 
48
  @router.get("/documents/{user_id}", response_model=List[DocumentResponse])
src/pipeline/document_pipeline/document_pipeline.py CHANGED
@@ -10,7 +10,9 @@ from src.storage.az_blob.az_blob import blob_storage
10
 
11
  logger = get_logger("document_pipeline")
12
 
 
13
  SUPPORTED_FILE_TYPES = ["pdf", "docx", "txt", "csv", "xlsx"]
 
14
 
15
 
16
  class DocumentPipeline:
@@ -21,7 +23,6 @@ class DocumentPipeline:
21
  content = await file.read()
22
  file_type = file.filename.split(".")[-1].lower() if "." in file.filename else "txt"
23
 
24
- MAX_FILE_SIZE_BYTES = 10 * 1024 * 1024 # 10 MB
25
  if len(content) > MAX_FILE_SIZE_BYTES:
26
  raise HTTPException(
27
  status_code=400,
 
10
 
11
  logger = get_logger("document_pipeline")
12
 
13
+ # NOTE: Keep in sync with _DOC_TYPES in src/api/v1/document.py
14
  SUPPORTED_FILE_TYPES = ["pdf", "docx", "txt", "csv", "xlsx"]
15
+ MAX_FILE_SIZE_BYTES = 10 * 1024 * 1024 # 10 MB
16
 
17
 
18
  class DocumentPipeline:
 
23
  content = await file.read()
24
  file_type = file.filename.split(".")[-1].lower() if "." in file.filename else "txt"
25
 
 
26
  if len(content) > MAX_FILE_SIZE_BYTES:
27
  raise HTTPException(
28
  status_code=400,