Spaces:

abdullah090809
/

Job_Tracker_API

Sleeping

Backend i guess complete

cf25e9f 5 days ago

755 Bytes

	from pypdf import PdfReader
	from fastapi import HTTPException, status
	import io


	def extract_text_from_pdf(file_bytes: bytes) -> str:
	try:
	reader = PdfReader(io.BytesIO(file_bytes))
	text = ""
	for page in reader.pages:
	text += page.extract_text() or ""

	if not text.strip():
	raise HTTPException(
	status_code=status.HTTP_400_BAD_REQUEST,
	detail="Could not extract text from PDF. The file may be scanned/image-based."
	)

	return text.strip()
	except HTTPException:
	raise
	except Exception:
	raise HTTPException(
	status_code=status.HTTP_400_BAD_REQUEST,
	detail="Invalid or corrupted PDF file"
	)