File size: 1,586 Bytes
f3270e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# Copyright (C) 2021-2025, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.


from typing import Any

import numpy as np
from fastapi import UploadFile

from doctr.io import DocumentFile


def resolve_geometry(
    geom: Any,
) -> tuple[float, float, float, float] | tuple[float, float, float, float, float, float, float, float]:
    if len(geom) == 4:
        return (*geom[0], *geom[1], *geom[2], *geom[3])
    return (*geom[0], *geom[1])


async def get_documents(files: list[UploadFile]) -> tuple[list[np.ndarray], list[str]]:  # pragma: no cover
    """Convert a list of UploadFile objects to lists of numpy arrays and their corresponding filenames

    Args:
        files: list of UploadFile objects

    Returns:
        tuple[list[np.ndarray], list[str]]: list of numpy arrays and their corresponding filenames

    """
    filenames = []
    docs = []
    for file in files:
        mime_type = file.content_type
        if mime_type in ["image/jpeg", "image/png"]:
            docs.extend(DocumentFile.from_images([await file.read()]))
            filenames.append(file.filename or "")
        elif mime_type == "application/pdf":
            pdf_content = DocumentFile.from_pdf(await file.read())
            docs.extend(pdf_content)
            filenames.extend([file.filename] * len(pdf_content) or [""] * len(pdf_content))
        else:
            raise ValueError(f"Unsupported file format: {mime_type} for file {file.filename}")

    return docs, filenames