from __future__ import annotations import json import os from datetime import datetime, timezone from pathlib import Path from .config import events_path DEFAULT_EVENTS_PATH = events_path() SCHEMA_VERSION = 6 def load_payload(path: str | Path = DEFAULT_EVENTS_PATH) -> dict: return json.loads(Path(path).read_text(encoding="utf-8")) def ensure_payload_schema(payload: dict) -> dict: meta = payload.setdefault("meta", {}) meta["schema_version"] = SCHEMA_VERSION return payload def write_payload(payload: dict, path: str | Path = DEFAULT_EVENTS_PATH) -> None: ensure_payload_schema(payload) Path(path).write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8") def parse_dt_utc(value: str) -> datetime: dt = datetime.fromisoformat(value) if dt.tzinfo is None: dt = dt.replace(tzinfo=timezone.utc) return dt.astimezone(timezone.utc) def infer_category(*, title: str, org_name: str, event_url: str) -> str: text = " ".join((title, org_name, event_url)).lower().replace("-", " ").replace("_", " ").replace("/", " ") if "thesis defense" in text or "phd defense" in text: return "PhD defense" if "thesis proposal" in text or "phd proposal" in text: return "PhD proposal" if "lecture" in text: return "Lecture" if "talk" in text: return "Talk" if "podcast" in text or "episode" in text: return "Podcast" if "seminar" in text: return "Seminar" return ""