File size: 1,054 Bytes
7248d39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
"""MiniCPM-V 4.6 OCR client — runs on Modal GPU."""

import io
from typing import Union

from PIL import Image

from models.modal_client import get_ocr


class MiniCPMVOCR:
    def __init__(self):
        self._remote = get_ocr()

    @staticmethod
    def _to_bytes(image: Union[Image.Image, bytes]) -> bytes:
        if isinstance(image, bytes):
            return image
        buf = io.BytesIO()
        image.save(buf, format="PNG")
        return buf.getvalue()

    def extract_text(self, image: Union[Image.Image, bytes]) -> str:
        return self._remote.extract_text.remote(self._to_bytes(image))

    def extract_tables(self, image: Union[Image.Image, bytes]) -> str:
        return self._remote.extract_tables.remote(self._to_bytes(image))

    def describe_chart(self, image: Union[Image.Image, bytes]) -> str:
        return self._remote.describe_chart.remote(self._to_bytes(image))

    def extract_structured(self, image: Union[Image.Image, bytes]) -> str:
        return self._remote.extract_structured.remote(self._to_bytes(image))