Spaces:
Running on Zero
Running on Zero
| """olmOCR Markdown normalization.""" | |
| from __future__ import annotations | |
| from zsgdp.normalize.markdown import normalize_markdown_candidate | |
| from zsgdp.schema import DocumentProfile, ParseCandidate | |
| def normalize_olmocr_markdown(*, markdown: str, profile: DocumentProfile, source_path: str) -> ParseCandidate: | |
| return normalize_markdown_candidate( | |
| markdown=markdown, | |
| doc_id=profile.doc_id, | |
| source_path=source_path, | |
| file_type=profile.file_type, | |
| parser_name="olmocr", | |
| confidence=0.80, | |
| provenance={"backend": "olmocr"}, | |
| ) | |