nsbecf / src /cache.py
acarey5
new scrapping
851ce09
from __future__ import annotations
import json
from datetime import datetime, timezone
from pathlib import Path
from typing import Iterable, List, Sequence
from src.models import JobPosting
def load_cached_jobs(cache_path: str | Path) -> dict:
path = Path(cache_path)
if not path.exists():
return {"generated_at": "", "companies": [], "jobs": []}
try:
return json.loads(path.read_text(encoding="utf-8"))
except Exception:
return {"generated_at": "", "companies": [], "jobs": []}
def write_cached_jobs(cache_path: str | Path, jobs: Sequence[JobPosting], companies: Iterable[str]) -> Path:
path = Path(cache_path)
path.parent.mkdir(parents=True, exist_ok=True)
payload = {
"generated_at": datetime.now(timezone.utc).isoformat(),
"companies": sorted(set(companies)),
"jobs": [job.to_dict() for job in jobs],
}
path.write_text(json.dumps(payload, indent=2), encoding="utf-8")
return path
def merge_cached_jobs(cache_path: str | Path, refreshed_jobs: Sequence[JobPosting], refreshed_companies: Iterable[str]) -> Path:
cached = load_cached_jobs(cache_path)
refreshed_set = set(refreshed_companies)
retained_jobs = [job for job in cached.get("jobs", []) if job.get("company") not in refreshed_set]
refreshed_payload = [job.to_dict() for job in refreshed_jobs]
merged_companies = set(cached.get("companies", [])) - refreshed_set
merged_companies.update(refreshed_set)
payload = {
"generated_at": datetime.now(timezone.utc).isoformat(),
"companies": sorted(merged_companies),
"jobs": retained_jobs + refreshed_payload,
}
path = Path(cache_path)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(payload, indent=2), encoding="utf-8")
return path