Spaces:
Paused
Paused
| import gradio as gr | |
| import logging | |
| from linkedin_jobs_scraper import LinkedinScraper | |
| from linkedin_jobs_scraper.events import Events, EventData, EventMetrics | |
| from linkedin_jobs_scraper.query import Query, QueryOptions, QueryFilters | |
| from linkedin_jobs_scraper.filters import RelevanceFilters, TimeFilters, OnSiteOrRemoteFilters | |
| import pandas as pd | |
| # Configure logging | |
| logging.basicConfig(filename="job_scraper.log", level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") | |
| # Initialize job data storage | |
| job_data = [] | |
| # Event Handlers | |
| def on_data(data: EventData): | |
| job_data.append({ | |
| 'Date Posted': data.date, | |
| 'Title': data.title, | |
| 'Company': data.company, | |
| 'Location': data.location, | |
| 'Job Link': data.link, | |
| 'Description Length': len(data.description), | |
| 'Description': data.description, | |
| }) | |
| def on_end(): | |
| logging.info("[ON_END] Scraping completed.") | |
| # Scraper function | |
| def scrape_jobs(query, locations, time_filter): | |
| global job_data | |
| try: | |
| job_data = [] | |
| scraper = LinkedinScraper( | |
| chrome_executable_path=None, | |
| chrome_binary_location=None, | |
| chrome_options=None, | |
| headless=True, | |
| max_workers=5, | |
| slow_mo=0.8, | |
| page_load_timeout=100, | |
| ) | |
| scraper.on(Events.DATA, on_data) | |
| scraper.on(Events.END, on_end) | |
| if time_filter == "From Past Month": | |
| time_filter = TimeFilters.MONTH | |
| elif time_filter == "From Last 24 Hours": | |
| time_filter = TimeFilters.DAY | |
| else: | |
| time_filter = TimeFilters.MONTH | |
| queries = [ | |
| Query( | |
| query=query, | |
| options=QueryOptions( | |
| locations=locations.split(','), | |
| apply_link=True, | |
| skip_promoted_jobs=False, | |
| page_offset=0, | |
| limit=100, | |
| filters=QueryFilters( | |
| # relevance=RelevanceFilters.RECENT, | |
| time=time_filter, | |
| ), | |
| ), | |
| ), | |
| ] | |
| scraper.run(queries) | |
| if job_data: | |
| df = pd.DataFrame(job_data) | |
| message = f"Jobs ({len(job_data)}) data successfully scraped." | |
| logging.info(message) | |
| return df, message | |
| else: | |
| logging.warning("No job data found.") | |
| return pd.DataFrame(), 'No jobs found.' | |
| except Exception as e: | |
| # Handle specific exceptions and log detailed information | |
| logging.error(f"An error occurred during scraping: {e}", exc_info=True) | |
| message = f"An error occurred during scraping: {e}. Please check the logs for more details." | |
| return None, message | |
| def gradio_interface(query, locations, time_filter): | |
| df, message = scrape_jobs(query, locations, time_filter) | |
| return df, message | |
| # App Layout | |
| iface = gr.Interface( | |
| fn=gradio_interface, | |
| inputs=[ | |
| gr.Textbox(label="Job Query", placeholder="e.g., Data Scientist", value="Blockchain developers"), | |
| gr.Textbox(label="Locations (comma-separated)", placeholder="e.g., United States, India", value="United States, United Kingdom, India"), | |
| gr.Dropdown( | |
| label="Time Filter", | |
| choices=["From Past Month", "From Last 24 Hours"], | |
| value="From Last 24 Hours", # Default option | |
| type="value", | |
| ), | |
| ], | |
| outputs=[ | |
| gr.Dataframe(label="Job Results", headers=['Date','Company', 'ApplyLink'], interactive=True), | |
| gr.Textbox(label="Message"), | |
| ], | |
| title="Job Scraper", | |
| description="Enter a job query and locations to scrape job postings and display the results in a table.", | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |