Spaces:
Sleeping
Sleeping
File size: 326 Bytes
44a60f7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 |
import pdfplumber
import re
def extract_text_from_pdf(file_obj):
text = ""
with pdfplumber.open(file_obj) as pdf:
for page in pdf.pages:
text += page.extract_text() or ""
return text
def simple_clause_split(text):
return [s.strip() for s in re.split(r'(?<=[.?!])\s+', text) if s.strip()]
|