File size: 365 Bytes
65691ad
 
 
 
 
86d5840
65691ad
 
 
86d5840
 
 
65691ad
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# pdf_parser.py
import fitz  # PyMuPDF

def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    return "\n".join(page.get_text() for page in doc)

def parse_data_blocks(text):
    data = {}
    for line in text.splitlines():
        if ':' in line:
            key, val = line.split(':', 1)
            data[key.strip()] = val.strip()
    return data