Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from bs4 import BeautifulSoup
|
| 3 |
from urllib.parse import urlencode
|
| 4 |
-
import
|
| 5 |
from fastmcp import FastMCP
|
| 6 |
import logging
|
| 7 |
|
|
@@ -15,7 +15,7 @@ mcp = FastMCP("Canada Job Bank Scraper Agent")
|
|
| 15 |
@mcp.tool(name="search_jobs")
|
| 16 |
def search_jobs_tool(query: str, location: str, limit: int = 10, salary: str = None, job_type: str = None) -> dict:
|
| 17 |
"""
|
| 18 |
-
Scrape job listings from the Canada Job Bank website using
|
| 19 |
|
| 20 |
Args:
|
| 21 |
query (str): Job title or keyword to search for.
|
|
@@ -49,14 +49,15 @@ def search_jobs_tool(query: str, location: str, limit: int = 10, salary: str = N
|
|
| 49 |
|
| 50 |
try:
|
| 51 |
logger.info(f"Attempting to scrape: {url}")
|
| 52 |
-
|
|
|
|
| 53 |
response.raise_for_status()
|
| 54 |
soup = BeautifulSoup(response.text, "html.parser")
|
| 55 |
cards = soup.find_all("article", class_="job-result") # Verify class name
|
| 56 |
|
| 57 |
if not cards:
|
| 58 |
logger.warning("No job cards found. The website may use JavaScript or the HTML structure may have changed.")
|
| 59 |
-
logger.debug(f"HTML sample: {soup.prettify()[:
|
| 60 |
return {"error": "No job listings found. The website may use JavaScript or the HTML structure may have changed."}
|
| 61 |
|
| 62 |
jobs = []
|
|
@@ -81,6 +82,9 @@ def search_jobs_tool(query: str, location: str, limit: int = 10, salary: str = N
|
|
| 81 |
logger.info(f"Found {len(jobs)} job listings.")
|
| 82 |
return {"jobs": jobs}
|
| 83 |
|
|
|
|
|
|
|
|
|
|
| 84 |
except requests.exceptions.HTTPError as http_err:
|
| 85 |
logger.error(f"HTTP error: {http_err}")
|
| 86 |
return {"error": f"HTTP error occurred: {http_err}"}
|
|
@@ -134,7 +138,7 @@ app = gr.Interface(
|
|
| 134 |
],
|
| 135 |
outputs=gr.Markdown(),
|
| 136 |
title="Canada Job Bank Job Search",
|
| 137 |
-
description="Search jobs by scraping Canada Job Bank using FastMCP and
|
| 138 |
theme="huggingface"
|
| 139 |
)
|
| 140 |
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from bs4 import BeautifulSoup
|
| 3 |
from urllib.parse import urlencode
|
| 4 |
+
import cloudscraper
|
| 5 |
from fastmcp import FastMCP
|
| 6 |
import logging
|
| 7 |
|
|
|
|
| 15 |
@mcp.tool(name="search_jobs")
|
| 16 |
def search_jobs_tool(query: str, location: str, limit: int = 10, salary: str = None, job_type: str = None) -> dict:
|
| 17 |
"""
|
| 18 |
+
Scrape job listings from the Canada Job Bank website using cloudscraper.
|
| 19 |
|
| 20 |
Args:
|
| 21 |
query (str): Job title or keyword to search for.
|
|
|
|
| 49 |
|
| 50 |
try:
|
| 51 |
logger.info(f"Attempting to scrape: {url}")
|
| 52 |
+
scraper = cloudscraper.create_scraper()
|
| 53 |
+
response = scraper.get(url, headers=headers, timeout=10)
|
| 54 |
response.raise_for_status()
|
| 55 |
soup = BeautifulSoup(response.text, "html.parser")
|
| 56 |
cards = soup.find_all("article", class_="job-result") # Verify class name
|
| 57 |
|
| 58 |
if not cards:
|
| 59 |
logger.warning("No job cards found. The website may use JavaScript or the HTML structure may have changed.")
|
| 60 |
+
logger.debug(f"HTML sample: {soup.prettify()[:2000]}") # Increased sample size for debugging
|
| 61 |
return {"error": "No job listings found. The website may use JavaScript or the HTML structure may have changed."}
|
| 62 |
|
| 63 |
jobs = []
|
|
|
|
| 82 |
logger.info(f"Found {len(jobs)} job listings.")
|
| 83 |
return {"jobs": jobs}
|
| 84 |
|
| 85 |
+
except cloudscraper.exceptions.CloudflareChallengeError as cf_err:
|
| 86 |
+
logger.error(f"Cloudflare challenge error: {cf_err}")
|
| 87 |
+
return {"error": f"Cloudflare challenge error: {cf_err}"}
|
| 88 |
except requests.exceptions.HTTPError as http_err:
|
| 89 |
logger.error(f"HTTP error: {http_err}")
|
| 90 |
return {"error": f"HTTP error occurred: {http_err}"}
|
|
|
|
| 138 |
],
|
| 139 |
outputs=gr.Markdown(),
|
| 140 |
title="Canada Job Bank Job Search",
|
| 141 |
+
description="Search jobs by scraping Canada Job Bank using FastMCP and cloudscraper.",
|
| 142 |
theme="huggingface"
|
| 143 |
)
|
| 144 |
|