SathvikGanta's picture
Create utils.py
76cf667 verified
raw
history blame contribute delete
315 Bytes
import pdfplumber
def extract_text_from_pdf(file_obj):
text = ""
with pdfplumber.open(file_obj) as pdf:
for page in pdf.pages:
text += page.extract_text() or ""
return text
def simple_clause_split(text):
return [clause.strip() for clause in text.split('. ') if clause.strip()]