Spaces:
Sleeping
Sleeping
| import re | |
| from pypdf import PdfReader | |
| def extract_text_from_pdf(pdf_path: str): | |
| reader = PdfReader(pdf_path) | |
| text = "" | |
| for page in reader.pages: | |
| page_text = page.extract_text() | |
| if page_text: | |
| text += page_text + "\n" | |
| text = re.sub(r'\s+', ' ', text).strip() | |
| return text | |
| def structure_pdf_text(text: str): | |
| section_patterns = [ | |
| "Patient Name", "Age", "Gender", "Diagnosis", "Findings", | |
| "Test Results", "Impression", "Prescription", "Doctor's Notes" | |
| ] | |
| structured_report = {"General Information": []} | |
| current_section = "General Information" | |
| for line in text.split(". "): | |
| line = line.strip() | |
| if not line: | |
| continue | |
| for section in section_patterns: | |
| if line.lower().startswith(section.lower()): | |
| current_section = section | |
| structured_report[current_section] = [] | |
| break | |
| structured_report[current_section].append(line) | |
| return {k: " ".join(v) for k, v in structured_report.items()} | |