File size: 2,662 Bytes
46917c3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import logging
from os import listdir
from os.path import isfile, join

import easygui
import requests
from bs4 import BeautifulSoup
from pathvalidate import sanitize_filename
from xhtml2pdf import pisa

"""
This script takes a LinkedIn job posting URL
and converts the description to a PDF file.
The PDF file is saved in the Data/JobDescription folder.
The name will be OrgName__Job Title_X.pdf, where X is the number of files in the folder.

IMPORTANT: Make sure the URL is to the actual job description,
and not the job search page.
"""


def linkedin_to_pdf(job_url: str):

    job_path = "Data/JobDescription/"
    job_description = ""
    files_number = len([f for f in listdir(job_path) if isfile(join(job_path, f))])

    try:
        page = requests.get(job_url)

        if page.status_code != 200:
            print(
                f"Failed to retrieve the job posting at {job_url}. Status code: {page.status_code}"
            )
            return

        # Parse the HTML content of the job posting using BeautifulSoup
        soup = BeautifulSoup(page.text, "html.parser")

        # Find the job title element and get the text
        job_title = soup.find("h1", {"class": "topcard__title"}).text.strip()

        # Find the organization name element (try both selectors)
        organization_element = soup.find("span", {"class": "topcard__flavor"})

        if not organization_element:
            organization_element = soup.find("a", {"class": "topcard__org-name-link"})

        # Extract the organization name
        organization = organization_element.text.strip()

        # Find the job description element
        job_description_element = soup.find(
            "div", {"class": "show-more-less-html__markup"}
        )

        # Extract the job description and concatenate its elements
        if job_description_element:
            for element in job_description_element.contents:
                job_description += str(element)

        # Set file_path and sanitize organization name and job title
        file_path = f"{job_path}{sanitize_filename(organization + '__' + job_title)}_{files_number}.pdf"

        # Create a PDF file and write the job description to it
        with open(file_path, "wb") as pdf_file:
            pisa.CreatePDF(job_description, dest=pdf_file, encoding="utf-8")

        logging.info("PDF saved to " + file_path)

    except Exception as e:
        logging.error(f"Could not get the description from the URL: {job_url}")
        logging.error(e)
        exit()


if __name__ == "__main__":
    url = easygui.enterbox("Enter the URL of the LinkedIn Job Posting:").strip()
    linkedin_to_pdf(url)