Ashar086 commited on
Commit
12c2ccf
·
verified ·
1 Parent(s): 5a906f8

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +83 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import pandas as pd
5
+ import time
6
+ import random
7
+
8
+ def scrape_linkedin_jobs(keyword, location, num_pages=1):
9
+ base_url = "https://www.linkedin.com/jobs/search/"
10
+ jobs = []
11
+ headers = {
12
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
13
+ }
14
+
15
+ for page in range(num_pages):
16
+ params = {
17
+ "keywords": keyword,
18
+ "location": location,
19
+ "start": page * 25
20
+ }
21
+
22
+ try:
23
+ response = requests.get(base_url, params=params, headers=headers)
24
+ response.raise_for_status()
25
+ soup = BeautifulSoup(response.content, 'html.parser')
26
+
27
+ job_cards = soup.find_all('div', class_='base-card relative w-full hover:no-underline focus:no-underline base-card--link base-search-card base-search-card--link job-search-card')
28
+
29
+ if not job_cards:
30
+ st.warning(f"No job cards found on page {page + 1}. The page structure might have changed.")
31
+ continue
32
+
33
+ for card in job_cards:
34
+ title = card.find('h3', class_='base-search-card__title')
35
+ company = card.find('h4', class_='base-search-card__subtitle')
36
+ location = card.find('span', class_='job-search-card__location')
37
+ link = card.find('a', class_='base-card__full-link')
38
+
39
+ if title and company and location and link:
40
+ jobs.append({
41
+ 'Title': title.text.strip(),
42
+ 'Company': company.text.strip(),
43
+ 'Location': location.text.strip(),
44
+ 'Link': link['href']
45
+ })
46
+
47
+ time.sleep(random.uniform(1, 3)) # Random delay between requests
48
+
49
+ except requests.RequestException as e:
50
+ st.error(f"An error occurred while fetching page {page + 1}: {str(e)}")
51
+ break
52
+
53
+ return jobs
54
+
55
+ st.title("LinkedIn Job Scraper")
56
+
57
+ keyword = st.text_input("Enter job keyword:")
58
+ location = st.text_input("Enter location:")
59
+ num_pages = st.number_input("Number of pages to scrape:", min_value=1, max_value=10, value=1)
60
+
61
+ if st.button("Scrape Jobs"):
62
+ if keyword and location:
63
+ with st.spinner('Scraping jobs... This may take a moment.'):
64
+ jobs = scrape_linkedin_jobs(keyword, location, num_pages)
65
+ if jobs:
66
+ df = pd.DataFrame(jobs)
67
+ st.success(f"Found {len(jobs)} jobs!")
68
+ st.dataframe(df)
69
+
70
+ csv = df.to_csv(index=False)
71
+ st.download_button(
72
+ label="Download CSV",
73
+ data=csv,
74
+ file_name="linkedin_jobs.csv",
75
+ mime="text/csv",
76
+ )
77
+ else:
78
+ st.warning("No jobs found. Try different keywords or location.")
79
+ else:
80
+ st.warning("Please enter both keyword and location.")
81
+
82
+ st.markdown("---")
83
+ st.markdown("Note: This scraper is for educational purposes only. Please respect LinkedIn's terms of service.")
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ streamlit
2
+ requests
3
+ beautifulsoup4
4
+ pandas