final_year / ingestion /pdf_reader.py
jayasrees's picture
first commit
9d21edd
raw
history blame contribute delete
248 Bytes
import pdfplumber
def extract_text_from_pdf(path):
text = ""
with pdfplumber.open(path) as pdf:
for page in pdf.pages:
if page.extract_text():
text += page.extract_text() + "\n"
return text