oneocr / ocr /models.py
OneOCR Dev
OneOCR - reverse engineering complete, ONNX pipeline 53% match rate
ce847d4
"""OCR result data models."""
from __future__ import annotations
from dataclasses import dataclass, field
@dataclass
class BoundingRect:
"""Quadrilateral bounding box (4 corners)."""
x1: float = 0.0
y1: float = 0.0
x2: float = 0.0
y2: float = 0.0
x3: float = 0.0
y3: float = 0.0
x4: float = 0.0
y4: float = 0.0
@dataclass
class OcrWord:
"""Single recognized word with position and confidence."""
text: str
bounding_rect: BoundingRect | None = None
confidence: float = 0.0
@dataclass
class OcrLine:
"""Single line of recognized text."""
text: str
bounding_rect: BoundingRect | None = None
words: list[OcrWord] = field(default_factory=list)
@dataclass
class OcrResult:
"""Complete OCR recognition result."""
text: str = ""
text_angle: float | None = None
lines: list[OcrLine] = field(default_factory=list)
error: str | None = None
@property
def is_empty(self) -> bool:
return not self.text.strip()
@property
def average_confidence(self) -> float:
"""Average word-level confidence across all lines."""
all_words = [w for line in self.lines for w in line.words if w.confidence > 0]
if not all_words:
return 0.0
return sum(w.confidence for w in all_words) / len(all_words)