Spaces:
Running
Running
File size: 1,586 Bytes
f3270e6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
# Copyright (C) 2021-2025, Mindee.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
from typing import Any
import numpy as np
from fastapi import UploadFile
from doctr.io import DocumentFile
def resolve_geometry(
geom: Any,
) -> tuple[float, float, float, float] | tuple[float, float, float, float, float, float, float, float]:
if len(geom) == 4:
return (*geom[0], *geom[1], *geom[2], *geom[3])
return (*geom[0], *geom[1])
async def get_documents(files: list[UploadFile]) -> tuple[list[np.ndarray], list[str]]: # pragma: no cover
"""Convert a list of UploadFile objects to lists of numpy arrays and their corresponding filenames
Args:
files: list of UploadFile objects
Returns:
tuple[list[np.ndarray], list[str]]: list of numpy arrays and their corresponding filenames
"""
filenames = []
docs = []
for file in files:
mime_type = file.content_type
if mime_type in ["image/jpeg", "image/png"]:
docs.extend(DocumentFile.from_images([await file.read()]))
filenames.append(file.filename or "")
elif mime_type == "application/pdf":
pdf_content = DocumentFile.from_pdf(await file.read())
docs.extend(pdf_content)
filenames.extend([file.filename] * len(pdf_content) or [""] * len(pdf_content))
else:
raise ValueError(f"Unsupported file format: {mime_type} for file {file.filename}")
return docs, filenames
|