import re
from pypdf import PdfReader

def extract_text_from_pdf(pdf_path: str):
    reader = PdfReader(pdf_path)
    text = ""

    for page in reader.pages:
        page_text = page.extract_text()
        if page_text:
            text += page_text + "\n"

    text = re.sub(r'\s+', ' ', text).strip()
    return text

def structure_pdf_text(text: str):
    section_patterns = [
        "Patient Name", "Age", "Gender", "Diagnosis", "Findings",
        "Test Results", "Impression", "Prescription", "Doctor's Notes"
    ]

    structured_report = {"General Information": []}
    current_section = "General Information"

    for line in text.split(". "):
        line = line.strip()
        if not line:
            continue
        for section in section_patterns:
            if line.lower().startswith(section.lower()):
                current_section = section
                structured_report[current_section] = []
                break
        structured_report[current_section].append(line)

    return {k: " ".join(v) for k, v in structured_report.items()}