Spaces:

OppaAI
/

Job-Search-MCP-Server

Running

App Files Files Community

OppaAI commited on Jun 8, 2025

Commit

4316eb0

verified ·

1 Parent(s): d9d8e3b

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -27

app.py CHANGED Viewed

@@ -4,9 +4,6 @@ from urllib.parse import urlencode
 import requests
 from fastmcp import FastMCP
 import logging
-from selenium import webdriver
-from selenium.webdriver.chrome.options import Options
-import time
 # Set up logging
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
@@ -18,7 +15,7 @@ mcp = FastMCP("Canada Job Bank Scraper Agent")
 @mcp.tool(name="search_jobs")
 def search_jobs_tool(query: str, location: str, limit: int = 10, salary: str = None, job_type: str = None) -> dict:
     """
-    Scrape job listings from the Canada Job Bank website.
     Args:
         query (str): Job title or keyword to search for.
@@ -42,43 +39,31 @@ def search_jobs_tool(query: str, location: str, limit: int = 10, salary: str = N
         "User-Agent": (
             "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
             "AppleWebKit/537.36 (KHTML, like Gecko) "
-            "Chrome/120.0.0.0 Safari/537.36"
         ),
         "Accept-Language": "en-US,en;q=0.9",
-        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9",
     }
     try:
-        # Try requests first
         logger.info(f"Attempting to scrape: {url}")
         response = requests.get(url, headers=headers, timeout=10)
         response.raise_for_status()
         soup = BeautifulSoup(response.text, "html.parser")
-        cards = soup.find_all("article", class_="job-result")  # Updated class name (verify)
         if not cards:
-            logger.warning("No job cards found with requests. Trying Selenium...")
-            # Fallback to Selenium for dynamic content
-            chrome_options = Options()
-            chrome_options.add_argument("--headless")
-            chrome_options.add_argument("--no-sandbox")
-            chrome_options.add_argument("--disable-dev-shm-usage")
-            driver = webdriver.Chrome(options=chrome_options)
-            driver.get(url)
-            time.sleep(3)  # Wait for JavaScript to load
-            soup = BeautifulSoup(driver.page_source, "html.parser")
-            driver.quit()
-            cards = soup.find_all("article", class_="job-result")
-        if not cards:
-            logger.error("No job listings found. Possible website structure change.")
-            return {"error": "No job listings found or website structure changed."}
         jobs = []
         for card in cards[:limit]:
-            title_elem = card.find("span", class_="job-title")  # Updated class
-            company_elem = card.find("li", class_="employer")   # Updated class
-            location_elem = card.find("li", class_="job-location")  # Updated class
             link_elem = card.find("a", href=True)
             link = link_elem.get("href") if link_elem else None

 import requests
 from fastmcp import FastMCP
 import logging
 # Set up logging
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 @mcp.tool(name="search_jobs")
 def search_jobs_tool(query: str, location: str, limit: int = 10, salary: str = None, job_type: str = None) -> dict:
     """
+    Scrape job listings from the Canada Job Bank website using requests only.
     Args:
         query (str): Job title or keyword to search for.
         "User-Agent": (
             "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
             "AppleWebKit/537.36 (KHTML, like Gecko) "
+            "Chrome/126.0.0.0 Safari/537.36"
         ),
         "Accept-Language": "en-US,en;q=0.9",
+        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
+        "Referer": "https://www.jobbank.gc.ca/",
+        "Connection": "keep-alive",
     }
     try:
         logger.info(f"Attempting to scrape: {url}")
         response = requests.get(url, headers=headers, timeout=10)
         response.raise_for_status()
         soup = BeautifulSoup(response.text, "html.parser")
+        cards = soup.find_all("article", class_="job-result")  # Verify class name
         if not cards:
+            logger.warning("No job cards found. The website may use JavaScript or the HTML structure may have changed.")
+            logger.debug(f"HTML sample: {soup.prettify()[:1000]}")
+            return {"error": "No job listings found. The website may use JavaScript or the HTML structure may have changed."}
         jobs = []
         for card in cards[:limit]:
+            title_elem = card.find("span", class_="job-title")
+            company_elem = card.find("li", class_="employer")
+            location_elem = card.find("li", class_="job-location")
             link_elem = card.find("a", href=True)
             link = link_elem.get("href") if link_elem else None