zeroshotGPU / zsgdp /normalize /normalize_paddleocr.py
Arjunvir Singh
Initial commit: zeroshotGPU MVP with full eval surface
db06ffa
"""PaddleOCR Markdown normalization."""
from __future__ import annotations
from zsgdp.normalize.markdown import normalize_markdown_candidate
from zsgdp.schema import DocumentProfile, ParseCandidate
def normalize_paddleocr_markdown(*, markdown: str, profile: DocumentProfile, source_path: str) -> ParseCandidate:
return normalize_markdown_candidate(
markdown=markdown,
doc_id=profile.doc_id,
source_path=source_path,
file_type=profile.file_type,
parser_name="paddleocr",
confidence=0.78,
provenance={"backend": "paddleocr"},
)