|
|
import streamlit as st |
|
|
import requests |
|
|
from bs4 import BeautifulSoup |
|
|
import pandas as pd |
|
|
import time |
|
|
import random |
|
|
|
|
|
def scrape_linkedin_jobs(keyword, location, num_pages=1): |
|
|
base_url = "https://www.linkedin.com/jobs/search/" |
|
|
jobs = [] |
|
|
headers = { |
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" |
|
|
} |
|
|
|
|
|
for page in range(num_pages): |
|
|
params = { |
|
|
"keywords": keyword, |
|
|
"location": location, |
|
|
"start": page * 25 |
|
|
} |
|
|
|
|
|
try: |
|
|
response = requests.get(base_url, params=params, headers=headers) |
|
|
response.raise_for_status() |
|
|
soup = BeautifulSoup(response.content, 'html.parser') |
|
|
|
|
|
job_cards = soup.find_all('div', class_='base-card relative w-full hover:no-underline focus:no-underline base-card--link base-search-card base-search-card--link job-search-card') |
|
|
|
|
|
if not job_cards: |
|
|
st.warning(f"No job cards found on page {page + 1}. The page structure might have changed.") |
|
|
continue |
|
|
|
|
|
for card in job_cards: |
|
|
title = card.find('h3', class_='base-search-card__title') |
|
|
company = card.find('h4', class_='base-search-card__subtitle') |
|
|
location = card.find('span', class_='job-search-card__location') |
|
|
link = card.find('a', class_='base-card__full-link') |
|
|
|
|
|
if title and company and location and link: |
|
|
jobs.append({ |
|
|
'Title': title.text.strip(), |
|
|
'Company': company.text.strip(), |
|
|
'Location': location.text.strip(), |
|
|
'Link': link['href'] |
|
|
}) |
|
|
|
|
|
time.sleep(random.uniform(1, 3)) |
|
|
|
|
|
except requests.RequestException as e: |
|
|
st.error(f"An error occurred while fetching page {page + 1}: {str(e)}") |
|
|
break |
|
|
|
|
|
return jobs |
|
|
|
|
|
st.title("LinkedIn Job Scraper") |
|
|
|
|
|
keyword = st.text_input("Enter job keyword:") |
|
|
location = st.text_input("Enter location:") |
|
|
num_pages = st.number_input("Number of pages to scrape:", min_value=1, max_value=10, value=1) |
|
|
|
|
|
if st.button("Scrape Jobs"): |
|
|
if keyword and location: |
|
|
with st.spinner('Scraping jobs... This may take a moment.'): |
|
|
jobs = scrape_linkedin_jobs(keyword, location, num_pages) |
|
|
if jobs: |
|
|
df = pd.DataFrame(jobs) |
|
|
st.success(f"Found {len(jobs)} jobs!") |
|
|
st.dataframe(df) |
|
|
|
|
|
csv = df.to_csv(index=False) |
|
|
st.download_button( |
|
|
label="Download CSV", |
|
|
data=csv, |
|
|
file_name="linkedin_jobs.csv", |
|
|
mime="text/csv", |
|
|
) |
|
|
else: |
|
|
st.warning("No jobs found. Try different keywords or location.") |
|
|
else: |
|
|
st.warning("Please enter both keyword and location.") |
|
|
|
|
|
st.markdown("---") |
|
|
st.markdown("Note: This scraper is for educational purposes only. Please respect LinkedIn's terms of service.") |