OppaAI commited on
Commit
69d8214
·
verified ·
1 Parent(s): 41387a2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -34
app.py CHANGED
@@ -2,55 +2,47 @@ import gradio as gr
2
  from bs4 import BeautifulSoup
3
  from urllib.parse import urlencode
4
  from fastmcp import FastMCP
5
- import undetected_chromedriver as uc
6
- from selenium.webdriver.chrome.options import Options
7
- import time
8
 
9
  # Initialize FastMCP agent
10
- mcp = FastMCP("Indeed Web Scraper Agent")
11
 
12
  @mcp.tool(name="search_jobs")
13
  def search_jobs_tool(query: str, location: str, limit: int = 10, salary: str = None, job_type: str = None):
14
- base_url = "https://ca.indeed.com/jobs?"
15
 
16
  params = {
17
- "q": query,
18
- "l": location,
19
- "sort": "date",
20
  }
21
  url = base_url + urlencode(params)
22
 
23
- options = Options()
24
- options.headless = True
25
- options.add_argument("--no-sandbox")
26
- options.add_argument("--disable-dev-shm-usage")
27
- options.add_argument("--disable-blink-features=AutomationControlled")
28
- options.add_argument("--disable-gpu")
29
-
30
- # 你電腦 Chrome 路徑(下面係 Windows 舉例)
31
- options.binary_location = "C:/Program Files/Google/Chrome/Application/chrome.exe"
32
- # macOS 路徑範例:'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'
33
- # Linux 路徑範例:'/usr/bin/google-chrome'
34
 
35
  try:
36
- driver = uc.Chrome(options=options)
37
- driver.get(url)
38
- time.sleep(5)
39
-
40
- html = driver.page_source
41
- driver.quit()
42
 
43
- soup = BeautifulSoup(html, "html.parser")
44
 
45
  jobs = []
46
- cards = soup.find_all("a", class_="tapItem")
 
47
  for card in cards[:limit]:
48
- title_elem = card.find("h2", class_="jobTitle")
49
- company_elem = card.find("span", class_="companyName")
50
- location_elem = card.find("div", class_="companyLocation")
51
- link = card.get("href")
52
  if link and not link.startswith("http"):
53
- link = "https://ca.indeed.com" + link
54
 
55
  job = {
56
  "title": title_elem.get_text(strip=True) if title_elem else "No Title",
@@ -65,6 +57,7 @@ def search_jobs_tool(query: str, location: str, limit: int = 10, salary: str = N
65
  except Exception as e:
66
  return {"error": str(e)}
67
 
 
68
  def search_jobs_ui(query, location, limit=10, salary=None, job_type=None):
69
  result = search_jobs_tool(query, location, limit, salary, job_type)
70
 
@@ -93,8 +86,8 @@ app = gr.Interface(
93
  gr.Textbox(label="Job Type (optional, ignored)")
94
  ],
95
  outputs="markdown",
96
- title="Indeed Job Search (with Selenium) + FastMCP",
97
- description="Search jobs by scraping Indeed.ca using Selenium and BeautifulSoup."
98
  )
99
 
100
  if __name__ == "__main__":
 
2
  from bs4 import BeautifulSoup
3
  from urllib.parse import urlencode
4
  from fastmcp import FastMCP
5
+ import requests
 
 
6
 
7
  # Initialize FastMCP agent
8
+ mcp = FastMCP("Canada Job Bank Scraper Agent")
9
 
10
  @mcp.tool(name="search_jobs")
11
  def search_jobs_tool(query: str, location: str, limit: int = 10, salary: str = None, job_type: str = None):
12
+ base_url = "https://www.jobbank.gc.ca/jobsearch/jobsearch?"
13
 
14
  params = {
15
+ "searchstring": query,
16
+ "locationstring": location,
17
+ "sort": "M", # Sort by most recent
18
  }
19
  url = base_url + urlencode(params)
20
 
21
+ headers = {
22
+ "User-Agent": (
23
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
24
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
25
+ "Chrome/114.0.0.0 Safari/537.36"
26
+ ),
27
+ "Accept-Language": "en-US,en;q=0.9",
28
+ }
 
 
 
29
 
30
  try:
31
+ response = requests.get(url, headers=headers, timeout=15)
32
+ response.raise_for_status()
 
 
 
 
33
 
34
+ soup = BeautifulSoup(response.text, "html.parser")
35
 
36
  jobs = []
37
+ cards = soup.find_all("article", class_="resultJobItem") # Job listings container
38
+
39
  for card in cards[:limit]:
40
+ title_elem = card.find("a", class_="resultJobItem-title")
41
+ company_elem = card.find("span", class_="resultJobItem-employer")
42
+ location_elem = card.find("li", class_="resultJobItem-location")
43
+ link = title_elem.get("href") if title_elem else None
44
  if link and not link.startswith("http"):
45
+ link = "https://www.jobbank.gc.ca" + link
46
 
47
  job = {
48
  "title": title_elem.get_text(strip=True) if title_elem else "No Title",
 
57
  except Exception as e:
58
  return {"error": str(e)}
59
 
60
+
61
  def search_jobs_ui(query, location, limit=10, salary=None, job_type=None):
62
  result = search_jobs_tool(query, location, limit, salary, job_type)
63
 
 
86
  gr.Textbox(label="Job Type (optional, ignored)")
87
  ],
88
  outputs="markdown",
89
+ title="Canada Job Bank Job Search (with requests & BeautifulSoup) + FastMCP",
90
+ description="Search jobs by scraping Canada Job Bank using requests and BeautifulSoup."
91
  )
92
 
93
  if __name__ == "__main__":