Spaces:

Siggmoid
/

ATS-Intelligence-Engine

Running

Deploy ATS Intelligence Engine to Hugging Face Space

d2b7a80 11 days ago

700 Bytes

	import fitz # PyMuPDF

	def extract_text_from_pdf(file_bytes: bytes) -> str:
	"""
	Extract plain text from a PDF given its raw bytes.
	Joins all pages into a single string.
	Raises ValueError if the PDF yields no text (e.g. scanned image-only PDF).
	"""
	doc = fitz.open(stream=file_bytes, filetype="pdf")

	pages_text = []
	for page in doc:
	pages_text.append(page.get_text("text")) # "text" = plain text mode

	full_text = "\n".join(pages_text).strip()

	if not full_text:
	raise ValueError(
	"No text could be extracted from the PDF. "
	"It may be a scanned image. Please upload a text-based PDF."
	)

	return full_text