OppaAI commited on
Commit
4316eb0
·
verified ·
1 Parent(s): d9d8e3b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -27
app.py CHANGED
@@ -4,9 +4,6 @@ from urllib.parse import urlencode
4
  import requests
5
  from fastmcp import FastMCP
6
  import logging
7
- from selenium import webdriver
8
- from selenium.webdriver.chrome.options import Options
9
- import time
10
 
11
  # Set up logging
12
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
@@ -18,7 +15,7 @@ mcp = FastMCP("Canada Job Bank Scraper Agent")
18
  @mcp.tool(name="search_jobs")
19
  def search_jobs_tool(query: str, location: str, limit: int = 10, salary: str = None, job_type: str = None) -> dict:
20
  """
21
- Scrape job listings from the Canada Job Bank website.
22
 
23
  Args:
24
  query (str): Job title or keyword to search for.
@@ -42,43 +39,31 @@ def search_jobs_tool(query: str, location: str, limit: int = 10, salary: str = N
42
  "User-Agent": (
43
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
44
  "AppleWebKit/537.36 (KHTML, like Gecko) "
45
- "Chrome/120.0.0.0 Safari/537.36"
46
  ),
47
  "Accept-Language": "en-US,en;q=0.9",
48
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9",
 
 
49
  }
50
 
51
  try:
52
- # Try requests first
53
  logger.info(f"Attempting to scrape: {url}")
54
  response = requests.get(url, headers=headers, timeout=10)
55
  response.raise_for_status()
56
  soup = BeautifulSoup(response.text, "html.parser")
57
- cards = soup.find_all("article", class_="job-result") # Updated class name (verify)
58
 
59
  if not cards:
60
- logger.warning("No job cards found with requests. Trying Selenium...")
61
- # Fallback to Selenium for dynamic content
62
- chrome_options = Options()
63
- chrome_options.add_argument("--headless")
64
- chrome_options.add_argument("--no-sandbox")
65
- chrome_options.add_argument("--disable-dev-shm-usage")
66
- driver = webdriver.Chrome(options=chrome_options)
67
- driver.get(url)
68
- time.sleep(3) # Wait for JavaScript to load
69
- soup = BeautifulSoup(driver.page_source, "html.parser")
70
- driver.quit()
71
- cards = soup.find_all("article", class_="job-result")
72
-
73
- if not cards:
74
- logger.error("No job listings found. Possible website structure change.")
75
- return {"error": "No job listings found or website structure changed."}
76
 
77
  jobs = []
78
  for card in cards[:limit]:
79
- title_elem = card.find("span", class_="job-title") # Updated class
80
- company_elem = card.find("li", class_="employer") # Updated class
81
- location_elem = card.find("li", class_="job-location") # Updated class
82
  link_elem = card.find("a", href=True)
83
 
84
  link = link_elem.get("href") if link_elem else None
 
4
  import requests
5
  from fastmcp import FastMCP
6
  import logging
 
 
 
7
 
8
  # Set up logging
9
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 
15
  @mcp.tool(name="search_jobs")
16
  def search_jobs_tool(query: str, location: str, limit: int = 10, salary: str = None, job_type: str = None) -> dict:
17
  """
18
+ Scrape job listings from the Canada Job Bank website using requests only.
19
 
20
  Args:
21
  query (str): Job title or keyword to search for.
 
39
  "User-Agent": (
40
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
41
  "AppleWebKit/537.36 (KHTML, like Gecko) "
42
+ "Chrome/126.0.0.0 Safari/537.36"
43
  ),
44
  "Accept-Language": "en-US,en;q=0.9",
45
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
46
+ "Referer": "https://www.jobbank.gc.ca/",
47
+ "Connection": "keep-alive",
48
  }
49
 
50
  try:
 
51
  logger.info(f"Attempting to scrape: {url}")
52
  response = requests.get(url, headers=headers, timeout=10)
53
  response.raise_for_status()
54
  soup = BeautifulSoup(response.text, "html.parser")
55
+ cards = soup.find_all("article", class_="job-result") # Verify class name
56
 
57
  if not cards:
58
+ logger.warning("No job cards found. The website may use JavaScript or the HTML structure may have changed.")
59
+ logger.debug(f"HTML sample: {soup.prettify()[:1000]}")
60
+ return {"error": "No job listings found. The website may use JavaScript or the HTML structure may have changed."}
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  jobs = []
63
  for card in cards[:limit]:
64
+ title_elem = card.find("span", class_="job-title")
65
+ company_elem = card.find("li", class_="employer")
66
+ location_elem = card.find("li", class_="job-location")
67
  link_elem = card.find("a", href=True)
68
 
69
  link = link_elem.get("href") if link_elem else None