Spaces:

marchji2415
/

resumematcher

Sleeping

resumematcher / scripts /LinkedinJobToPDF.py

March

first

46917c3 9 months ago

2.66 kB

	import logging
	from os import listdir
	from os.path import isfile, join

	import easygui
	import requests
	from bs4 import BeautifulSoup
	from pathvalidate import sanitize_filename
	from xhtml2pdf import pisa

	"""
	This script takes a LinkedIn job posting URL
	and converts the description to a PDF file.
	The PDF file is saved in the Data/JobDescription folder.
	The name will be OrgName__Job Title_X.pdf, where X is the number of files in the folder.

	IMPORTANT: Make sure the URL is to the actual job description,
	and not the job search page.
	"""


	def linkedin_to_pdf(job_url: str):

	job_path = "Data/JobDescription/"
	job_description = ""
	files_number = len([f for f in listdir(job_path) if isfile(join(job_path, f))])

	try:
	page = requests.get(job_url)

	if page.status_code != 200:
	print(
	f"Failed to retrieve the job posting at {job_url}. Status code: {page.status_code}"
	)
	return

	# Parse the HTML content of the job posting using BeautifulSoup
	soup = BeautifulSoup(page.text, "html.parser")

	# Find the job title element and get the text
	job_title = soup.find("h1", {"class": "topcard__title"}).text.strip()

	# Find the organization name element (try both selectors)
	organization_element = soup.find("span", {"class": "topcard__flavor"})

	if not organization_element:
	organization_element = soup.find("a", {"class": "topcard__org-name-link"})

	# Extract the organization name
	organization = organization_element.text.strip()

	# Find the job description element
	job_description_element = soup.find(
	"div", {"class": "show-more-less-html__markup"}
	)

	# Extract the job description and concatenate its elements
	if job_description_element:
	for element in job_description_element.contents:
	job_description += str(element)

	# Set file_path and sanitize organization name and job title
	file_path = f"{job_path}{sanitize_filename(organization + '__' + job_title)}_{files_number}.pdf"

	# Create a PDF file and write the job description to it
	with open(file_path, "wb") as pdf_file:
	pisa.CreatePDF(job_description, dest=pdf_file, encoding="utf-8")

	logging.info("PDF saved to " + file_path)

	except Exception as e:
	logging.error(f"Could not get the description from the URL: {job_url}")
	logging.error(e)
	exit()


	if __name__ == "__main__":
	url = easygui.enterbox("Enter the URL of the LinkedIn Job Posting:").strip()
	linkedin_to_pdf(url)