File size: 326 Bytes
44a60f7
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
import pdfplumber
import re

def extract_text_from_pdf(file_obj):
    text = ""
    with pdfplumber.open(file_obj) as pdf:
        for page in pdf.pages:
            text += page.extract_text() or ""
    return text

def simple_clause_split(text):
    return [s.strip() for s in re.split(r'(?<=[.?!])\s+', text) if s.strip()]