Spaces:
Runtime error
Runtime error
File size: 885 Bytes
c9ed90a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | from pypdf import PdfReader
from io import BytesIO
from typing import Union
def extract_text_from_pdf(pdf_content: Union[bytes, BytesIO]) -> str:
"""
Extract text content from a PDF file.
Args:
pdf_content: PDF file content as bytes or BytesIO object
Returns:
str: Extracted text content
"""
try:
# Convert bytes to BytesIO if necessary
if isinstance(pdf_content, bytes):
pdf_content = BytesIO(pdf_content)
# Create PDF reader
pdf_reader = PdfReader(pdf_content)
# Extract text from all pages
text_content = []
for page in pdf_reader.pages:
text_content.append(page.extract_text())
return "\n".join(text_content)
except Exception as e:
raise ValueError(f"Error extracting text from PDF: {str(e)}") |