Spaces:

cherrykiwidd
/

nsbecf

Sleeping

nsbecf / src /cache.py

acarey5

new scrapping

851ce09 10 days ago

1.83 kB

	from __future__ import annotations

	import json
	from datetime import datetime, timezone
	from pathlib import Path
	from typing import Iterable, List, Sequence

	from src.models import JobPosting


	def load_cached_jobs(cache_path: str \| Path) -> dict:
	path = Path(cache_path)
	if not path.exists():
	return {"generated_at": "", "companies": [], "jobs": []}
	try:
	return json.loads(path.read_text(encoding="utf-8"))
	except Exception:
	return {"generated_at": "", "companies": [], "jobs": []}


	def write_cached_jobs(cache_path: str \| Path, jobs: Sequence[JobPosting], companies: Iterable[str]) -> Path:
	path = Path(cache_path)
	path.parent.mkdir(parents=True, exist_ok=True)
	payload = {
	"generated_at": datetime.now(timezone.utc).isoformat(),
	"companies": sorted(set(companies)),
	"jobs": [job.to_dict() for job in jobs],
	}
	path.write_text(json.dumps(payload, indent=2), encoding="utf-8")
	return path


	def merge_cached_jobs(cache_path: str \| Path, refreshed_jobs: Sequence[JobPosting], refreshed_companies: Iterable[str]) -> Path:
	cached = load_cached_jobs(cache_path)
	refreshed_set = set(refreshed_companies)
	retained_jobs = [job for job in cached.get("jobs", []) if job.get("company") not in refreshed_set]
	refreshed_payload = [job.to_dict() for job in refreshed_jobs]
	merged_companies = set(cached.get("companies", [])) - refreshed_set
	merged_companies.update(refreshed_set)

	payload = {
	"generated_at": datetime.now(timezone.utc).isoformat(),
	"companies": sorted(merged_companies),
	"jobs": retained_jobs + refreshed_payload,
	}

	path = Path(cache_path)
	path.parent.mkdir(parents=True, exist_ok=True)
	path.write_text(json.dumps(payload, indent=2), encoding="utf-8")
	return path