Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| from typing import List | |
| import requests | |
| from src.collectors.common import dedupe_jobs, normalize_job_posting | |
| from src.detectors.ats_detector import extract_ats_identifier | |
| from src.models import CompanyRecord, JobPosting | |
| from src.resolver.jobs_page_resolver import ResolvedJobsPage | |
| HEADERS = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"} | |
| def collect(company_record: CompanyRecord, resolved_page: ResolvedJobsPage) -> List[JobPosting]: | |
| """Collect Greenhouse jobs via the public boards API.""" | |
| identifier = company_record.ats_identifier or extract_ats_identifier("greenhouse", resolved_page.url, resolved_page.html) | |
| if not identifier: | |
| return [] | |
| api_url = f"https://boards-api.greenhouse.io/v1/boards/{identifier}/jobs?content=true" | |
| try: | |
| response = requests.get(api_url, headers=HEADERS, timeout=20) | |
| response.raise_for_status() | |
| payload = response.json() | |
| except Exception: | |
| return [] | |
| jobs = [ | |
| normalize_job_posting( | |
| company_record, | |
| title=item.get("title", ""), | |
| location=((item.get("location") or {}).get("name") or ""), | |
| job_url=item.get("absolute_url", resolved_page.url), | |
| description=(item.get("content") or ""), | |
| department=(item.get("department") or ""), | |
| source_ats="greenhouse", | |
| resolved_url=resolved_page.url, | |
| posted_date=str(item.get("updated_at") or ""), | |
| raw_payload=item, | |
| ) | |
| for item in payload.get("jobs", []) | |
| if item.get("title") | |
| ] | |
| return dedupe_jobs(jobs) | |