Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import json | |
| from datetime import datetime, timezone | |
| from pathlib import Path | |
| from typing import Iterable, List, Sequence | |
| from src.models import JobPosting | |
| def load_cached_jobs(cache_path: str | Path) -> dict: | |
| path = Path(cache_path) | |
| if not path.exists(): | |
| return {"generated_at": "", "companies": [], "jobs": []} | |
| try: | |
| return json.loads(path.read_text(encoding="utf-8")) | |
| except Exception: | |
| return {"generated_at": "", "companies": [], "jobs": []} | |
| def write_cached_jobs(cache_path: str | Path, jobs: Sequence[JobPosting], companies: Iterable[str]) -> Path: | |
| path = Path(cache_path) | |
| path.parent.mkdir(parents=True, exist_ok=True) | |
| payload = { | |
| "generated_at": datetime.now(timezone.utc).isoformat(), | |
| "companies": sorted(set(companies)), | |
| "jobs": [job.to_dict() for job in jobs], | |
| } | |
| path.write_text(json.dumps(payload, indent=2), encoding="utf-8") | |
| return path | |
| def merge_cached_jobs(cache_path: str | Path, refreshed_jobs: Sequence[JobPosting], refreshed_companies: Iterable[str]) -> Path: | |
| cached = load_cached_jobs(cache_path) | |
| refreshed_set = set(refreshed_companies) | |
| retained_jobs = [job for job in cached.get("jobs", []) if job.get("company") not in refreshed_set] | |
| refreshed_payload = [job.to_dict() for job in refreshed_jobs] | |
| merged_companies = set(cached.get("companies", [])) - refreshed_set | |
| merged_companies.update(refreshed_set) | |
| payload = { | |
| "generated_at": datetime.now(timezone.utc).isoformat(), | |
| "companies": sorted(merged_companies), | |
| "jobs": retained_jobs + refreshed_payload, | |
| } | |
| path = Path(cache_path) | |
| path.parent.mkdir(parents=True, exist_ok=True) | |
| path.write_text(json.dumps(payload, indent=2), encoding="utf-8") | |
| return path | |