Spaces:
Sleeping
Sleeping
| import requests | |
| pip install beautifulsoup4 | |
| from bs4 import BeautifulSoup | |
| import pandas as pd | |
| def get_job_details(job_url): | |
| response = requests.get(job_url) | |
| if response.status_code == 200: | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| # Extract job description | |
| description_elem = soup.find('div', {'class': 'show-more-less-html__markup'}) | |
| job_description = description_elem.text.strip() if description_elem else '' | |
| # Extract skills | |
| skills_elem = soup.find('div', {'class': 'job-criteria__list'}) | |
| skills = [skill.text.strip() for skill in skills_elem.find_all('span', {'class': 'job-criteria__text'})] if skills_elem else [] | |
| return job_description, skills | |
| else: | |
| return '', [] | |
| url = 'https://www.linkedin.com/jobs/search/?currentJobId=3834896045&f_WT=3%2C1%2C2&geoId=102713980&keywords=power%20bi%20developer&location=&origin=JOB_SEARCH_PAGE_KEYWORD_AUTOCOMPLETE&refresh=true&sortBy=R' | |
| response = requests.get(url) | |
| def scrap_data(url): | |
| response = requests.get(url) | |
| if response.status_code == 200: | |
| # print("scrapping now") | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| job_listings = soup.find_all('div', {'class':'job-search-card'}) | |
| # Initialize lists to store job data | |
| titles = [] | |
| companies = [] | |
| locations = [] | |
| links = [] | |
| descriptions = [] | |
| skills_list = [] | |
| for job in job_listings: | |
| title = job.find('h3', {'class': 'base-search-card__title'}).text.strip() | |
| company = job.find('a', {'class': 'hidden-nested-link'}).text.strip() | |
| location = job.find('span', {'class': 'job-search-card__location'}).text.strip() | |
| anchor_tag = job.find('a', class_='base-card__full-link') | |
| href_link = anchor_tag['href'] | |
| # Fetch job details | |
| job_description, skills = get_job_details(href_link) | |
| # Append data to lists | |
| titles.append(title) | |
| companies.append(company) | |
| locations.append(location) | |
| links.append(href_link) | |
| descriptions.append(job_description) | |
| skills_list.append(', '.join(skills)) | |
| # Create DataFrame from lists | |
| df = pd.DataFrame({ | |
| 'Title': titles, | |
| 'Company': companies, | |
| 'Location': locations, | |
| 'Job Link': links, | |
| 'Job Description': descriptions, | |
| 'Skills': skills_list | |
| }) | |
| # Save DataFrame to CSV | |
| # df.to_csv('output/linkedinjobs.csv', index=False, encoding='utf-8') | |
| # print("Job listings saved to linkedinjobs.csv") | |
| return df | |
| else: | |
| print("Failed to fetch job listings.") | |