Spaces:

OppaAI
/

Job-Search-MCP-Server

Running

App Files Files Community

Job-Search-MCP-Server / app.py

OppaAI

Update app.py

5638525 verified 11 months ago

raw

history blame

4.56 kB

	import os
	import gradio as gr
	import asyncio
	from urllib.parse import urlencode
	from fastmcp import FastMCP
	from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode

	# Initialize FastMCP agent
	mcp = FastMCP("Indeed Web Scraper Agent")

	@mcp.tool(name="search_jobs")
	def search_jobs_tool(query: str, location: str, limit: int = 10, salary: str = None, job_type: str = None):
	"""
	Scrape jobs from Indeed website using crawl4ai based on query, location, and optional filters.

	Args:
	query (str): Job title or keywords to search for.
	location (str): Location (city, region) to search jobs in.
	limit (int): Number of job results to return (default 10).
	salary (str, optional): Not used in scraper, reserved for future.
	job_type (str, optional): Not used in scraper, reserved for future.

	Returns:
	dict: Contains a list of jobs with title, company, location, and url.
	"""
	base_url = "https://ca.indeed.com/jobs?"

	# Build query parameters
	params = {
	"q": query,
	"l": location,
	"sort": "date", # sort by most recent
	}
	url = base_url + urlencode(params)

	async def crawl_indeed():
	browser_cfg = BrowserConfig(headless=True, text_mode=True)
	crawler_cfg = CrawlerRunConfig(
	scan_full_page=True,
	delay_before_return_html=2.0,
	cache_mode=CacheMode.BYPASS,
	remove_overlay_elements=True,
	exclude_external_links=True,
	exclude_social_media_links=True
	)

	async with AsyncWebCrawler(config=browser_cfg) as crawler:
	result = await crawler.arun(url, config=crawler_cfg)
	if not result.success:
	return {"error": result.error_message}
	html = result.html

	# Parse job cards manually with BeautifulSoup from crawl4ai html
	from bs4 import BeautifulSoup
	soup = BeautifulSoup(html, "html.parser")

	jobs = []
	cards = soup.find_all("a", class_="tapItem")
	for card in cards[:limit]:
	title_elem = card.find("h2", class_="jobTitle")
	company_elem = card.find("span", class_="companyName")
	location_elem = card.find("div", class_="companyLocation")
	link = card.get("href")
	if link and not link.startswith("http"):
	link = "https://ca.indeed.com" + link
	job = {
	"title": title_elem.get_text(strip=True) if title_elem else "No Title",
	"company": company_elem.get_text(strip=True) if company_elem else "Unknown Company",
	"location": location_elem.get_text(strip=True) if location_elem else "Unknown Location",
	"url": link or "#",
	}
	jobs.append(job)
	return {"jobs": jobs}

	# Run async crawl in sync context (FastMCP expects sync)
	return asyncio.run(crawl_indeed())


	def search_jobs_ui(query, location, limit=10, salary=None, job_type=None):
	"""
	Gradio UI handler for scraping Indeed jobs using FastMCP.

	Args:
	query (str): Job title or keyword.
	location (str): Job location.
	limit (int, optional): Number of jobs to retrieve (default 10).
	salary (str, optional): Not used.
	job_type (str, optional): Not used.

	Returns:
	str: Markdown-formatted list of jobs or error message.
	"""
	result = search_jobs_tool(query, location, limit, salary, job_type)

	if "error" in result:
	return f"❌ Error: {result['error']}"

	jobs = result.get("jobs", [])
	if not jobs:
	return "No jobs found for your search."

	output = ""
	for job in jobs:
	output += f"{job['title']} at {job['company']}\n📍 {job['location']}\n[Apply Here]({job['url']})\n\n"

	return output


	# Gradio Interface
	app = gr.Interface(
	fn=search_jobs_ui,
	inputs=[
	gr.Textbox(label="Job Title / Keyword"),
	gr.Textbox(label="Location"),
	gr.Number(value=10, label="Number of Results (limit)", precision=0),
	gr.Textbox(label="Salary (optional, ignored)"),
	gr.Textbox(label="Job Type (optional, ignored)")
	],
	outputs="markdown",
	title="Indeed Job Search (Web Scraping with Crawl4AI) + FastMCP",
	description="Search jobs by scraping Indeed.ca with crawl4ai. Results sorted by most recent."
	)

	if __name__ == "__main__":
	app.launch(mcp_server=True)