File size: 795 Bytes
3370983
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
"""Document parser utilities."""

from .text import normalize_bullets, tag_contacts, EMAIL_RE, PHONE_RE, URL_RE
from .image import render_pdf_to_images, pil_to_png_data_uri, split_halves
from .sections import (
    parse_sections_from_json_text,
    normalize_sections,
    merge_duplicate_titles,
    build_contact_section_from_filename,
    process_section,
    apply_postprocessing,
)

__all__ = [
    # Text
    "normalize_bullets",
    "tag_contacts",
    "EMAIL_RE",
    "PHONE_RE",
    "URL_RE",
    # Image
    "render_pdf_to_images",
    "pil_to_png_data_uri",
    "split_halves",
    # Sections
    "parse_sections_from_json_text",
    "normalize_sections",
    "merge_duplicate_titles",
    "build_contact_section_from_filename",
    "process_section",
    "apply_postprocessing",
]