Spaces:

OppaAI
/

Job-Search-MCP-Server

Sleeping

App Files Files Community

OppaAI commited on Jun 8, 2025

Commit

064e5aa

verified ·

1 Parent(s): 55479d7

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -60

app.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import os
 import gradio as gr
-import asyncio
 from urllib.parse import urlencode
 from fastmcp import FastMCP
-from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
 # Initialize FastMCP agent
 mcp = FastMCP("Indeed Web Scraper Agent")
@@ -11,7 +11,7 @@ mcp = FastMCP("Indeed Web Scraper Agent")
 @mcp.tool(name="search_jobs")
 def search_jobs_tool(query: str, location: str, limit: int = 10, salary: str = None, job_type: str = None):
     """
-    Scrape jobs from Indeed website using crawl4ai based on query, location, and optional filters.
     Args:
         query (str): Job title or keywords to search for.
@@ -25,71 +25,48 @@ def search_jobs_tool(query: str, location: str, limit: int = 10, salary: str = N
     """
     base_url = "https://ca.indeed.com/jobs?"
-    # Build query parameters
     params = {
         "q": query,
         "l": location,
-        "sort": "date",  # sort by most recent
     }
     url = base_url + urlencode(params)
-    async def crawl_indeed():
-        browser_cfg = BrowserConfig(headless=True, text_mode=True)
-        crawler_cfg = CrawlerRunConfig(
-            scan_full_page=True,
-            delay_before_return_html=2.0,
-            cache_mode=CacheMode.BYPASS,
-            remove_overlay_elements=True,
-            exclude_external_links=True,
-            exclude_social_media_links=True
-        )
-        async with AsyncWebCrawler(config=browser_cfg) as crawler:
-            result = await crawler.arun(url, config=crawler_cfg)
-            if not result.success:
-                return {"error": result.error_message}
-            html = result.html
-            # Parse job cards manually with BeautifulSoup from crawl4ai html
-            from bs4 import BeautifulSoup
-            soup = BeautifulSoup(html, "html.parser")
-            jobs = []
-            cards = soup.find_all("a", class_="tapItem")
-            for card in cards[:limit]:
-                title_elem = card.find("h2", class_="jobTitle")
-                company_elem = card.find("span", class_="companyName")
-                location_elem = card.find("div", class_="companyLocation")
-                link = card.get("href")
-                if link and not link.startswith("http"):
-                    link = "https://ca.indeed.com" + link
-                job = {
-                    "title": title_elem.get_text(strip=True) if title_elem else "No Title",
-                    "company": company_elem.get_text(strip=True) if company_elem else "Unknown Company",
-                    "location": location_elem.get_text(strip=True) if location_elem else "Unknown Location",
-                    "url": link or "#",
-                }
-                jobs.append(job)
-            return {"jobs": jobs}
-    # Run async crawl in sync context (FastMCP expects sync)
-    return asyncio.run(crawl_indeed())
-def search_jobs_ui(query, location, limit=10, salary=None, job_type=None):
-    """
-    Gradio UI handler for scraping Indeed jobs using FastMCP.
-    Args:
-        query (str): Job title or keyword.
-        location (str): Job location.
-        limit (int, optional): Number of jobs to retrieve (default 10).
-        salary (str, optional): Not used.
-        job_type (str, optional): Not used.
-    Returns:
-        str: Markdown-formatted list of jobs or error message.
-    """
     result = search_jobs_tool(query, location, limit, salary, job_type)
     if "error" in result:
@@ -117,8 +94,8 @@ app = gr.Interface(
         gr.Textbox(label="Job Type (optional, ignored)")
     ],
     outputs="markdown",
-    title="Indeed Job Search (Web Scraping with Crawl4AI) + FastMCP",
-    description="Search jobs by scraping Indeed.ca with crawl4ai. Results sorted by most recent."
 )
 if __name__ == "__main__":

 import os
 import gradio as gr
+import requests
+from bs4 import BeautifulSoup
 from urllib.parse import urlencode
 from fastmcp import FastMCP
 # Initialize FastMCP agent
 mcp = FastMCP("Indeed Web Scraper Agent")
 @mcp.tool(name="search_jobs")
 def search_jobs_tool(query: str, location: str, limit: int = 10, salary: str = None, job_type: str = None):
     """
+    Scrape jobs from Indeed website using requests + BeautifulSoup.
     Args:
         query (str): Job title or keywords to search for.
     """
     base_url = "https://ca.indeed.com/jobs?"
     params = {
         "q": query,
         "l": location,
+        "sort": "date",
     }
     url = base_url + urlencode(params)
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
+    }
+    try:
+        response = requests.get(url, headers=headers)
+        response.raise_for_status()
+        soup = BeautifulSoup(response.text, "html.parser")
+        jobs = []
+        cards = soup.find_all("a", class_="tapItem")
+        for card in cards[:limit]:
+            title_elem = card.find("h2", class_="jobTitle")
+            company_elem = card.find("span", class_="companyName")
+            location_elem = card.find("div", class_="companyLocation")
+            link = card.get("href")
+            if link and not link.startswith("http"):
+                link = "https://ca.indeed.com" + link
+            job = {
+                "title": title_elem.get_text(strip=True) if title_elem else "No Title",
+                "company": company_elem.get_text(strip=True) if company_elem else "Unknown Company",
+                "location": location_elem.get_text(strip=True) if location_elem else "Unknown Location",
+                "url": link or "#"
+            }
+            jobs.append(job)
+        return {"jobs": jobs}
+    except Exception as e:
+        return {"error": str(e)}
+def search_jobs_ui(query, location, limit=10, salary=None, job_type=None):
     result = search_jobs_tool(query, location, limit, salary, job_type)
     if "error" in result:
         gr.Textbox(label="Job Type (optional, ignored)")
     ],
     outputs="markdown",
+    title="Indeed Job Search (with BeautifulSoup) + FastMCP",
+    description="Search jobs by scraping Indeed.ca using requests and BeautifulSoup."
 )
 if __name__ == "__main__":