|
|
import requests |
|
|
from bs4 import BeautifulSoup |
|
|
import pandas as pd |
|
|
import gradio as gr |
|
|
|
|
|
def scrape_jobs(link): |
|
|
|
|
|
response = requests.get(link) |
|
|
|
|
|
|
|
|
soup = BeautifulSoup(response.content, 'html.parser') |
|
|
|
|
|
|
|
|
job_elements = soup.find_all('div', class_='job-listing') |
|
|
|
|
|
jobs = [] |
|
|
for job_element in job_elements: |
|
|
|
|
|
job_title = job_element.find('h2', class_='job-title').text.strip() |
|
|
job_location = job_element.find('span', class_='job-location').text.strip() |
|
|
job_description = job_element.find('div', class_='job-description').text.strip() |
|
|
|
|
|
|
|
|
job = { |
|
|
'Title': job_title, |
|
|
'Location': job_location, |
|
|
'Description': job_description |
|
|
} |
|
|
|
|
|
jobs.append(job) |
|
|
|
|
|
return jobs |
|
|
|
|
|
def export_to_excel(jobs, filename): |
|
|
|
|
|
df = pd.DataFrame(jobs) |
|
|
|
|
|
|
|
|
df.to_excel(filename, index=False) |
|
|
|
|
|
def job_listing_scraper(link): |
|
|
job_listings = scrape_jobs(link) |
|
|
output_file = "job_listings.xlsx" |
|
|
export_to_excel(job_listings, output_file) |
|
|
return f"Job listings scraped successfully! Saved to {output_file}" |
|
|
|
|
|
|
|
|
interface = gr.Interface( |
|
|
fn=job_listing_scraper, |
|
|
inputs="text", |
|
|
outputs="text", |
|
|
title="Job Listing Scraper", |
|
|
description="Enter the link to the carrier website and click 'Submit' to scrape job listings and save them to an Excel file.", |
|
|
examples=[["https://example.com/carrier"]], |
|
|
) |
|
|
|
|
|
|
|
|
interface.launch() |