Spaces:

Ashar086
/

Linkedin_Scrapper

Paused

App Files Files Community

Ashar086 commited on Aug 6, 2024

Commit

12c2ccf

verified ·

1 Parent(s): 5a906f8

Upload 2 files

Browse files

Files changed (2) hide show

app.py +83 -0
requirements.txt +4 -0

app.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import streamlit as st
+import requests
+from bs4 import BeautifulSoup
+import pandas as pd
+import time
+import random
+def scrape_linkedin_jobs(keyword, location, num_pages=1):
+    base_url = "https://www.linkedin.com/jobs/search/"
+    jobs = []
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+    }
+    for page in range(num_pages):
+        params = {
+            "keywords": keyword,
+            "location": location,
+            "start": page * 25
+        }
+        try:
+            response = requests.get(base_url, params=params, headers=headers)
+            response.raise_for_status()
+            soup = BeautifulSoup(response.content, 'html.parser')
+            job_cards = soup.find_all('div', class_='base-card relative w-full hover:no-underline focus:no-underline base-card--link base-search-card base-search-card--link job-search-card')
+            if not job_cards:
+                st.warning(f"No job cards found on page {page + 1}. The page structure might have changed.")
+                continue
+            for card in job_cards:
+                title = card.find('h3', class_='base-search-card__title')
+                company = card.find('h4', class_='base-search-card__subtitle')
+                location = card.find('span', class_='job-search-card__location')
+                link = card.find('a', class_='base-card__full-link')
+                if title and company and location and link:
+                    jobs.append({
+                        'Title': title.text.strip(),
+                        'Company': company.text.strip(),
+                        'Location': location.text.strip(),
+                        'Link': link['href']
+                    })
+            time.sleep(random.uniform(1, 3))  # Random delay between requests
+        except requests.RequestException as e:
+            st.error(f"An error occurred while fetching page {page + 1}: {str(e)}")
+            break
+    return jobs
+st.title("LinkedIn Job Scraper")
+keyword = st.text_input("Enter job keyword:")
+location = st.text_input("Enter location:")
+num_pages = st.number_input("Number of pages to scrape:", min_value=1, max_value=10, value=1)
+if st.button("Scrape Jobs"):
+    if keyword and location:
+        with st.spinner('Scraping jobs... This may take a moment.'):
+            jobs = scrape_linkedin_jobs(keyword, location, num_pages)
+        if jobs:
+            df = pd.DataFrame(jobs)
+            st.success(f"Found {len(jobs)} jobs!")
+            st.dataframe(df)
+            csv = df.to_csv(index=False)
+            st.download_button(
+                label="Download CSV",
+                data=csv,
+                file_name="linkedin_jobs.csv",
+                mime="text/csv",
+            )
+        else:
+            st.warning("No jobs found. Try different keywords or location.")
+    else:
+        st.warning("Please enter both keyword and location.")
+st.markdown("---")
+st.markdown("Note: This scraper is for educational purposes only. Please respect LinkedIn's terms of service.")

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+streamlit
+requests
+beautifulsoup4
+pandas