scrapper / app.py
brackozi's picture
Create app.py
4b926ea
import requests
from bs4 import BeautifulSoup
import pandas as pd
import gradio as gr
def scrape_jobs(link):
# Send a GET request to the carrier website
response = requests.get(link)
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')
# Find all the job listings on the page
job_elements = soup.find_all('div', class_='job-listing')
jobs = []
for job_element in job_elements:
# Extract relevant information from each job listing
job_title = job_element.find('h2', class_='job-title').text.strip()
job_location = job_element.find('span', class_='job-location').text.strip()
job_description = job_element.find('div', class_='job-description').text.strip()
# Store the job information in a dictionary
job = {
'Title': job_title,
'Location': job_location,
'Description': job_description
}
jobs.append(job)
return jobs
def export_to_excel(jobs, filename):
# Create a DataFrame from the job data
df = pd.DataFrame(jobs)
# Save the DataFrame to an Excel file
df.to_excel(filename, index=False)
def job_listing_scraper(link):
job_listings = scrape_jobs(link)
output_file = "job_listings.xlsx"
export_to_excel(job_listings, output_file)
return f"Job listings scraped successfully! Saved to {output_file}"
# Define the Gradio interface
interface = gr.Interface(
fn=job_listing_scraper,
inputs="text",
outputs="text",
title="Job Listing Scraper",
description="Enter the link to the carrier website and click 'Submit' to scrape job listings and save them to an Excel file.",
examples=[["https://example.com/carrier"]],
)
# Run the Gradio interface
interface.launch()