Spaces:

DreamStream-1
/

HR-Resume-Analyzer

Runtime error

HR-Resume-Analyzer / text_extraction.py

Create text_extraction.py

3dfe527 verified about 1 year ago

961 Bytes

	import PyPDF2
	import re

	def extract_text_from_pdf(pdf_file_path):
	"""Extracts text from a PDF file with improved error handling."""
	try:
	with open(pdf_file_path, 'rb') as pdf_file:
	pdf_reader = PyPDF2.PdfReader(pdf_file)
	text = ' '.join(page.extract_text() or '' for page in pdf_reader.pages)
	return text.strip()
	except Exception as e:
	return f"Error reading PDF: {str(e)}"

	def extract_text_from_txt(txt_file_path):
	"""Extracts text from a TXT file with encoding fallbacks."""
	encodings = ['utf-8', 'latin-1', 'ascii']
	for encoding in encodings:
	try:
	with open(txt_file_path, 'r', encoding=encoding) as txt_file:
	return txt_file.read().strip()
	except UnicodeDecodeError:
	continue
	except Exception as e:
	return f"Error reading TXT: {str(e)}"
	return "Error: Unable to decode file with supported encodings"