File size: 594 Bytes
db06ffa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
"""PaddleOCR Markdown normalization."""

from __future__ import annotations

from zsgdp.normalize.markdown import normalize_markdown_candidate
from zsgdp.schema import DocumentProfile, ParseCandidate


def normalize_paddleocr_markdown(*, markdown: str, profile: DocumentProfile, source_path: str) -> ParseCandidate:
    return normalize_markdown_candidate(
        markdown=markdown,
        doc_id=profile.doc_id,
        source_path=source_path,
        file_type=profile.file_type,
        parser_name="paddleocr",
        confidence=0.78,
        provenance={"backend": "paddleocr"},
    )