from __future__ import annotations import json from datetime import datetime, timezone from pathlib import Path from typing import Iterable, List, Sequence from src.models import JobPosting def load_cached_jobs(cache_path: str | Path) -> dict: path = Path(cache_path) if not path.exists(): return {"generated_at": "", "companies": [], "jobs": []} try: return json.loads(path.read_text(encoding="utf-8")) except Exception: return {"generated_at": "", "companies": [], "jobs": []} def write_cached_jobs(cache_path: str | Path, jobs: Sequence[JobPosting], companies: Iterable[str]) -> Path: path = Path(cache_path) path.parent.mkdir(parents=True, exist_ok=True) payload = { "generated_at": datetime.now(timezone.utc).isoformat(), "companies": sorted(set(companies)), "jobs": [job.to_dict() for job in jobs], } path.write_text(json.dumps(payload, indent=2), encoding="utf-8") return path def merge_cached_jobs(cache_path: str | Path, refreshed_jobs: Sequence[JobPosting], refreshed_companies: Iterable[str]) -> Path: cached = load_cached_jobs(cache_path) refreshed_set = set(refreshed_companies) retained_jobs = [job for job in cached.get("jobs", []) if job.get("company") not in refreshed_set] refreshed_payload = [job.to_dict() for job in refreshed_jobs] merged_companies = set(cached.get("companies", [])) - refreshed_set merged_companies.update(refreshed_set) payload = { "generated_at": datetime.now(timezone.utc).isoformat(), "companies": sorted(merged_companies), "jobs": retained_jobs + refreshed_payload, } path = Path(cache_path) path.parent.mkdir(parents=True, exist_ok=True) path.write_text(json.dumps(payload, indent=2), encoding="utf-8") return path