import pdfplumber import re def extract_text_from_pdf(file_obj): text = "" with pdfplumber.open(file_obj) as pdf: for page in pdf.pages: text += page.extract_text() or "" return text def simple_clause_split(text): return [s.strip() for s in re.split(r'(?<=[.?!])\s+', text) if s.strip()]