Spaces:

marchji2415
/

resumematcher

Sleeping

March

first

46917c3 9 months ago

2.42 kB

	import glob
	import os

	from pypdf import PdfReader


	def get_pdf_files(file_path):
	"""
	Get all PDF files from the specified file path.

	Args:
	file_path (str): The directory path containing the PDF files.

	Returns:
	list: A list containing the paths of all the PDF files in the directory.
	"""
	if os.path.exists(file_path):
	return glob.glob(os.path.join(file_path, "*.pdf"))
	else:
	return []


	def read_multiple_pdf(file_path: str) -> list:
	"""
	Read multiple PDF files from the specified file path and extract the text from each page.

	Args:
	file_path (str): The directory path containing the PDF files.

	Returns:
	list: A list containing the extracted text from each page of the PDF files.
	"""
	pdf_files = get_pdf_files(file_path)
	output = []
	for file in pdf_files:
	try:
	with open(file, "rb") as f:
	pdf_reader = PdfReader(f)
	count = pdf_reader.getNumPages()
	for i in range(count):
	page = pdf_reader.getPage(i)
	output.append(page.extractText())
	except Exception as e:
	print(f"Error reading file '{file}': {str(e)}")
	return output


	def read_single_pdf(file_path: str) -> str:
	"""
	Read a single PDF file and extract the text from each page.

	Args:
	file_path (str): The path of the PDF file.

	Returns:
	list: A list containing the extracted text from each page of the PDF file.
	"""
	output = []
	try:
	with open(file_path, "rb") as f:
	pdf_reader = PdfReader(f)
	count = len(pdf_reader.pages)
	for i in range(count):
	page = pdf_reader.pages[i]
	output.append(page.extract_text())
	except Exception as e:
	print(f"Error reading file '{file_path}': {str(e)}")
	return str(" ".join(output))


	def get_pdf_files(file_path: str) -> list:
	"""
	Get a list of PDF files from the specified directory path.

	Args:
	file_path (str): The directory path containing the PDF files.

	Returns:
	list: A list of PDF file paths.
	"""
	pdf_files = []
	try:
	pdf_files = glob.glob(os.path.join(file_path, "*.pdf"))
	except Exception as e:
	print(f"Error getting PDF files from '{file_path}': {str(e)}")
	return pdf_files