retailpxdemo / base /ocr.py
leonge's picture
Updated pydantic models to support lists
e80c4c1
raw
history blame
1.55 kB
"""
Custom types for dealing with the Google Vision API JSON output.
"""
from enum import IntEnum
from typing import Any, Optional, List
from pydantic import BaseModel
class BreakType(IntEnum):
UNKNOWN = 0
SPACE = 1
SURE_SPACE = 2
EOL_SURE_SPACE = 3
LINE_BREAK = 4
HYPHEN = 5
class BlockType(IntEnum):
UNKNOWN = 0
TEXT = 1
TABLE = 2
PICTURE = 3
RULER = 4
BARCODE = 5
class DetectedBreak(BaseModel):
type: BreakType
is_prefix: Optional[bool]
class DetectedLanguage(BaseModel):
languageCode: str
confidence: float
class TextProperty(BaseModel):
detectedLanguages: List[DetectedLanguage]
detectedBreak: Optional[DetectedBreak]
class Symbol(BaseModel):
property: Optional[TextProperty]
boundingBox: Any
text: str
confidence: float
class Word(BaseModel):
property: Optional[TextProperty]
boundingBox: Any
symbols: List[Symbol]
confidence: float
class Paragraph(BaseModel):
property: Optional[TextProperty]
boundingBox: Any
words: List[Word]
confidence: float
class Block(BaseModel):
property: Optional[TextProperty]
boundingBox: Any
paragraphs: List[Paragraph]
blockType: BlockType
confidence: float
class Page(BaseModel):
property: Optional[TextProperty]
width: int
height: int
blocks: List[Block]
confidence: float
class TextAnnotation(BaseModel):
pages: List[Page]
text: str
class Output(BaseModel):
fullTextAnnotation: Optional[TextAnnotation] = None