OppaAI commited on
Commit
51a4078
·
verified ·
1 Parent(s): fb1c8c9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -5
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
  from bs4 import BeautifulSoup
3
  from urllib.parse import urlencode
4
- import requests
5
  from fastmcp import FastMCP
6
  import logging
7
 
@@ -15,7 +15,7 @@ mcp = FastMCP("Canada Job Bank Scraper Agent")
15
  @mcp.tool(name="search_jobs")
16
  def search_jobs_tool(query: str, location: str, limit: int = 10, salary: str = None, job_type: str = None) -> dict:
17
  """
18
- Scrape job listings from the Canada Job Bank website using requests only.
19
 
20
  Args:
21
  query (str): Job title or keyword to search for.
@@ -49,14 +49,15 @@ def search_jobs_tool(query: str, location: str, limit: int = 10, salary: str = N
49
 
50
  try:
51
  logger.info(f"Attempting to scrape: {url}")
52
- response = requests.get(url, headers=headers, timeout=10)
 
53
  response.raise_for_status()
54
  soup = BeautifulSoup(response.text, "html.parser")
55
  cards = soup.find_all("article", class_="job-result") # Verify class name
56
 
57
  if not cards:
58
  logger.warning("No job cards found. The website may use JavaScript or the HTML structure may have changed.")
59
- logger.debug(f"HTML sample: {soup.prettify()[:1000]}")
60
  return {"error": "No job listings found. The website may use JavaScript or the HTML structure may have changed."}
61
 
62
  jobs = []
@@ -81,6 +82,9 @@ def search_jobs_tool(query: str, location: str, limit: int = 10, salary: str = N
81
  logger.info(f"Found {len(jobs)} job listings.")
82
  return {"jobs": jobs}
83
 
 
 
 
84
  except requests.exceptions.HTTPError as http_err:
85
  logger.error(f"HTTP error: {http_err}")
86
  return {"error": f"HTTP error occurred: {http_err}"}
@@ -134,7 +138,7 @@ app = gr.Interface(
134
  ],
135
  outputs=gr.Markdown(),
136
  title="Canada Job Bank Job Search",
137
- description="Search jobs by scraping Canada Job Bank using FastMCP and Gradio.",
138
  theme="huggingface"
139
  )
140
 
 
1
  import gradio as gr
2
  from bs4 import BeautifulSoup
3
  from urllib.parse import urlencode
4
+ import cloudscraper
5
  from fastmcp import FastMCP
6
  import logging
7
 
 
15
  @mcp.tool(name="search_jobs")
16
  def search_jobs_tool(query: str, location: str, limit: int = 10, salary: str = None, job_type: str = None) -> dict:
17
  """
18
+ Scrape job listings from the Canada Job Bank website using cloudscraper.
19
 
20
  Args:
21
  query (str): Job title or keyword to search for.
 
49
 
50
  try:
51
  logger.info(f"Attempting to scrape: {url}")
52
+ scraper = cloudscraper.create_scraper()
53
+ response = scraper.get(url, headers=headers, timeout=10)
54
  response.raise_for_status()
55
  soup = BeautifulSoup(response.text, "html.parser")
56
  cards = soup.find_all("article", class_="job-result") # Verify class name
57
 
58
  if not cards:
59
  logger.warning("No job cards found. The website may use JavaScript or the HTML structure may have changed.")
60
+ logger.debug(f"HTML sample: {soup.prettify()[:2000]}") # Increased sample size for debugging
61
  return {"error": "No job listings found. The website may use JavaScript or the HTML structure may have changed."}
62
 
63
  jobs = []
 
82
  logger.info(f"Found {len(jobs)} job listings.")
83
  return {"jobs": jobs}
84
 
85
+ except cloudscraper.exceptions.CloudflareChallengeError as cf_err:
86
+ logger.error(f"Cloudflare challenge error: {cf_err}")
87
+ return {"error": f"Cloudflare challenge error: {cf_err}"}
88
  except requests.exceptions.HTTPError as http_err:
89
  logger.error(f"HTTP error: {http_err}")
90
  return {"error": f"HTTP error occurred: {http_err}"}
 
138
  ],
139
  outputs=gr.Markdown(),
140
  title="Canada Job Bank Job Search",
141
+ description="Search jobs by scraping Canada Job Bank using FastMCP and cloudscraper.",
142
  theme="huggingface"
143
  )
144