Spaces:
Sleeping
Sleeping
| import pdfplumber | |
| import re | |
| def extract_text_from_pdf(file_obj): | |
| text = "" | |
| with pdfplumber.open(file_obj) as pdf: | |
| for page in pdf.pages: | |
| text += page.extract_text() or "" | |
| return text | |
| def simple_clause_split(text): | |
| return [s.strip() for s in re.split(r'(?<=[.?!])\s+', text) if s.strip()] | |