Spaces:
Sleeping
Sleeping
File size: 1,067 Bytes
ec563fd | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 | import re
from pypdf import PdfReader
def extract_text_from_pdf(pdf_path: str):
reader = PdfReader(pdf_path)
text = ""
for page in reader.pages:
page_text = page.extract_text()
if page_text:
text += page_text + "\n"
text = re.sub(r'\s+', ' ', text).strip()
return text
def structure_pdf_text(text: str):
section_patterns = [
"Patient Name", "Age", "Gender", "Diagnosis", "Findings",
"Test Results", "Impression", "Prescription", "Doctor's Notes"
]
structured_report = {"General Information": []}
current_section = "General Information"
for line in text.split(". "):
line = line.strip()
if not line:
continue
for section in section_patterns:
if line.lower().startswith(section.lower()):
current_section = section
structured_report[current_section] = []
break
structured_report[current_section].append(line)
return {k: " ".join(v) for k, v in structured_report.items()}
|