summarizer_space / pdf_reader.py
Asanaly
Update pdf_reader.py
441273a verified
raw
history blame
366 Bytes
import io
from PyPDF2 import PdfReader
def extract_text_from_pdf(pdf_bytes: bytes) -> str:
try:
reader = PdfReader(io.BytesIO(pdf_bytes))
text = ""
for page in reader.pages:
page_text = page.extract_text()
if page_text:
text += page_text + "\n"
return text
except:
return ""