OppaAI commited on
Commit
8ae67db
·
verified ·
1 Parent(s): f77befc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -6
app.py CHANGED
@@ -3,6 +3,14 @@ from bs4 import BeautifulSoup
3
  from urllib.parse import urlencode
4
  import requests
5
  from fastmcp import FastMCP
 
 
 
 
 
 
 
 
6
 
7
  # Initialize FastMCP server
8
  mcp = FastMCP("Canada Job Bank Scraper Agent")
@@ -41,20 +49,36 @@ def search_jobs_tool(query: str, location: str, limit: int = 10, salary: str = N
41
  }
42
 
43
  try:
 
 
44
  response = requests.get(url, headers=headers, timeout=10)
45
  response.raise_for_status()
46
-
47
  soup = BeautifulSoup(response.text, "html.parser")
48
- cards = soup.find_all("article", class_="resultJobItem")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  if not cards:
 
51
  return {"error": "No job listings found or website structure changed."}
52
 
53
  jobs = []
54
  for card in cards[:limit]:
55
- title_elem = card.find("span", class_="noctitle")
56
- company_elem = card.find("li", class_="business")
57
- location_elem = card.find("li", class_="location")
58
  link_elem = card.find("a", href=True)
59
 
60
  link = link_elem.get("href") if link_elem else None
@@ -69,15 +93,20 @@ def search_jobs_tool(query: str, location: str, limit: int = 10, salary: str = N
69
  }
70
  jobs.append(job)
71
 
 
72
  return {"jobs": jobs}
73
 
74
  except requests.exceptions.HTTPError as http_err:
 
75
  return {"error": f"HTTP error occurred: {http_err}"}
76
  except requests.exceptions.Timeout:
 
77
  return {"error": "Request timed out. Please try again later."}
78
  except requests.exceptions.RequestException as req_err:
 
79
  return {"error": f"Request error: {req_err}"}
80
  except Exception as e:
 
81
  return {"error": f"Unexpected error: {str(e)}"}
82
 
83
  def search_jobs_ui(query, location, limit=10, salary=None, job_type=None):
@@ -125,4 +154,4 @@ app = gr.Interface(
125
  )
126
 
127
  if __name__ == "__main__":
128
- app.launch(mcp_server=True)
 
3
  from urllib.parse import urlencode
4
  import requests
5
  from fastmcp import FastMCP
6
+ import logging
7
+ from selenium import webdriver
8
+ from selenium.webdriver.chrome.options import Options
9
+ import time
10
+
11
+ # Set up logging
12
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
13
+ logger = logging.getLogger(__name__)
14
 
15
  # Initialize FastMCP server
16
  mcp = FastMCP("Canada Job Bank Scraper Agent")
 
49
  }
50
 
51
  try:
52
+ # Try requests first
53
+ logger.info(f"Attempting to scrape: {url}")
54
  response = requests.get(url, headers=headers, timeout=10)
55
  response.raise_for_status()
 
56
  soup = BeautifulSoup(response.text, "html.parser")
57
+ cards = soup.find_all("article", class_="job-result") # Updated class name (verify)
58
+
59
+ if not cards:
60
+ logger.warning("No job cards found with requests. Trying Selenium...")
61
+ # Fallback to Selenium for dynamic content
62
+ chrome_options = Options()
63
+ chrome_options.add_argument("--headless")
64
+ chrome_options.add_argument("--no-sandbox")
65
+ chrome_options.add_argument("--disable-dev-shm-usage")
66
+ driver = webdriver.Chrome(options=chrome_options)
67
+ driver.get(url)
68
+ time.sleep(3) # Wait for JavaScript to load
69
+ soup = BeautifulSoup(driver.page_source, "html.parser")
70
+ driver.quit()
71
+ cards = soup.find_all("article", class_="job-result")
72
 
73
  if not cards:
74
+ logger.error("No job listings found. Possible website structure change.")
75
  return {"error": "No job listings found or website structure changed."}
76
 
77
  jobs = []
78
  for card in cards[:limit]:
79
+ title_elem = card.find("span", class_="job-title") # Updated class
80
+ company_elem = card.find("li", class_="employer") # Updated class
81
+ location_elem = card.find("li", class_="job-location") # Updated class
82
  link_elem = card.find("a", href=True)
83
 
84
  link = link_elem.get("href") if link_elem else None
 
93
  }
94
  jobs.append(job)
95
 
96
+ logger.info(f"Found {len(jobs)} job listings.")
97
  return {"jobs": jobs}
98
 
99
  except requests.exceptions.HTTPError as http_err:
100
+ logger.error(f"HTTP error: {http_err}")
101
  return {"error": f"HTTP error occurred: {http_err}"}
102
  except requests.exceptions.Timeout:
103
+ logger.error("Request timed out.")
104
  return {"error": "Request timed out. Please try again later."}
105
  except requests.exceptions.RequestException as req_err:
106
+ logger.error(f"Request error: {req_err}")
107
  return {"error": f"Request error: {req_err}"}
108
  except Exception as e:
109
+ logger.error(f"Unexpected error: {str(e)}")
110
  return {"error": f"Unexpected error: {str(e)}"}
111
 
112
  def search_jobs_ui(query, location, limit=10, salary=None, job_type=None):
 
154
  )
155
 
156
  if __name__ == "__main__":
157
+ app.launch(mcp_server=True, server_name="0.0.0.0", server_port=7860)