OppaAI commited on
Commit
84a279b
·
verified ·
1 Parent(s): 1d4d544

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -30
app.py CHANGED
@@ -1,27 +1,16 @@
1
  import gradio as gr
2
- import cloudscraper
3
  from bs4 import BeautifulSoup
4
  from urllib.parse import urlencode
5
  from fastmcp import FastMCP
 
 
 
6
 
7
  # Initialize FastMCP agent
8
  mcp = FastMCP("Indeed Web Scraper Agent")
9
 
10
  @mcp.tool(name="search_jobs")
11
  def search_jobs_tool(query: str, location: str, limit: int = 10, salary: str = None, job_type: str = None):
12
- """
13
- Scrape jobs from Indeed website using cloudscraper + BeautifulSoup.
14
-
15
- Args:
16
- query (str): Job title or keywords to search for.
17
- location (str): Location (city, region) to search jobs in.
18
- limit (int): Number of job results to return (default 10).
19
- salary (str, optional): Not used in scraper, reserved for future.
20
- job_type (str, optional): Not used in scraper, reserved for future.
21
-
22
- Returns:
23
- dict: Contains a list of jobs with title, company, location, and url.
24
- """
25
  base_url = "https://ca.indeed.com/jobs?"
26
 
27
  params = {
@@ -29,23 +18,25 @@ def search_jobs_tool(query: str, location: str, limit: int = 10, salary: str = N
29
  "l": location,
30
  "sort": "date",
31
  }
32
-
33
  url = base_url + urlencode(params)
34
- headers = {
35
- "User-Agent": (
36
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
37
- "AppleWebKit/537.36 (KHTML, like Gecko) "
38
- "Chrome/114.0.0.0 Safari/537.36"
39
- ),
40
- "Accept-Language": "en-US,en;q=0.9",
41
- }
42
 
43
  try:
44
- scraper = cloudscraper.create_scraper()
45
- response = scraper.get(url, headers=headers, timeout=15)
46
- response.raise_for_status()
 
47
 
48
- soup = BeautifulSoup(response.text, "html.parser")
 
 
 
49
 
50
  jobs = []
51
  cards = soup.find_all("a", class_="tapItem")
@@ -70,7 +61,6 @@ def search_jobs_tool(query: str, location: str, limit: int = 10, salary: str = N
70
  except Exception as e:
71
  return {"error": str(e)}
72
 
73
-
74
  def search_jobs_ui(query, location, limit=10, salary=None, job_type=None):
75
  result = search_jobs_tool(query, location, limit, salary, job_type)
76
 
@@ -99,8 +89,8 @@ app = gr.Interface(
99
  gr.Textbox(label="Job Type (optional, ignored)")
100
  ],
101
  outputs="markdown",
102
- title="Indeed Job Search (with cloudscraper) + FastMCP",
103
- description="Search jobs by scraping Indeed.ca using cloudscraper and BeautifulSoup."
104
  )
105
 
106
  if __name__ == "__main__":
 
1
  import gradio as gr
 
2
  from bs4 import BeautifulSoup
3
  from urllib.parse import urlencode
4
  from fastmcp import FastMCP
5
+ import undetected_chromedriver as uc
6
+ from selenium.webdriver.chrome.options import Options
7
+ import time
8
 
9
  # Initialize FastMCP agent
10
  mcp = FastMCP("Indeed Web Scraper Agent")
11
 
12
  @mcp.tool(name="search_jobs")
13
  def search_jobs_tool(query: str, location: str, limit: int = 10, salary: str = None, job_type: str = None):
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  base_url = "https://ca.indeed.com/jobs?"
15
 
16
  params = {
 
18
  "l": location,
19
  "sort": "date",
20
  }
 
21
  url = base_url + urlencode(params)
22
+
23
+ options = Options()
24
+ options.headless = True # 如要睇實際畫面,改 False
25
+ options.add_argument("--no-sandbox")
26
+ options.add_argument("--disable-dev-shm-usage")
27
+ options.add_argument("--disable-blink-features=AutomationControlled")
28
+ options.add_argument("--disable-gpu")
 
29
 
30
  try:
31
+ # undetected_chromedriver 產生一個 selenium ChromeDriver
32
+ driver = uc.Chrome(options=options)
33
+ driver.get(url)
34
+ time.sleep(5) # 等頁面載入好
35
 
36
+ html = driver.page_source
37
+ driver.quit()
38
+
39
+ soup = BeautifulSoup(html, "html.parser")
40
 
41
  jobs = []
42
  cards = soup.find_all("a", class_="tapItem")
 
61
  except Exception as e:
62
  return {"error": str(e)}
63
 
 
64
  def search_jobs_ui(query, location, limit=10, salary=None, job_type=None):
65
  result = search_jobs_tool(query, location, limit, salary, job_type)
66
 
 
89
  gr.Textbox(label="Job Type (optional, ignored)")
90
  ],
91
  outputs="markdown",
92
+ title="Indeed Job Search (with Selenium) + FastMCP",
93
+ description="Search jobs by scraping Indeed.ca using Selenium and BeautifulSoup."
94
  )
95
 
96
  if __name__ == "__main__":