"""ICSAC publications registry — single source of truth for /publications. Maintains src/data/accepted.json on the icsacinstitute.org repo. Three writers populate the registry: - Zenodo community watcher (action.register_accepted_paper) — accepts in the icsac Zenodo community. - Submission intake DOI route (icsac-submission-intake/) — papers submitted via author-supplied DOI and accepted by the panel. - Submission intake PDF route post-publish (publish_watcher) — operator publishes the staged Zenodo draft and the watcher registers the now- live DOI. Every entry powers /publications/; entries with `record_id` also power the legacy /accepted/ share landings. Filename note: the on-disk file is still `accepted.json` for back-compat with TS imports that already reference it; semantically it's the publications registry. """ from __future__ import annotations import datetime import json import os import re import subprocess from typing import Any, Optional WEBSITE_REPO = os.environ.get("ICSAC_WEBSITE_REPO", "") REGISTRY_PATH = os.path.join(WEBSITE_REPO, "src/data/accepted.json") PUBLICATIONS_BASE_URL = "https://icsacinstitute.org/publications" VALID_SOURCES = {"zenodo-community", "submission-doi", "submission-pdf"} def make_slug(title: str, existing_slugs: Optional[set[str]] = None) -> str: """Slugify a title to a kebab-case URL fragment. Splits on first colon or em/en-dash so subtitles don't bloat the URL, then lowercases + reduces non-alphanumerics to single hyphens. Caps at 80 chars. On collision with `existing_slugs`, appends -2, -3, ... """ src = (title or "").strip() if not src: src = "paper" base = re.split(r"\s*[:—–]\s*", src, maxsplit=1)[0].strip() or src slug = re.sub(r"[^a-z0-9]+", "-", base.lower()).strip("-") slug = slug[:80].rstrip("-") or "paper" if not existing_slugs: return slug candidate = slug n = 2 while candidate in existing_slugs: candidate = f"{slug}-{n}" n += 1 return candidate def publications_url(slug: str) -> str: return f"{PUBLICATIONS_BASE_URL}/{slug}" def _load_registry() -> list[dict]: if not os.path.exists(REGISTRY_PATH): raise FileNotFoundError(f"Registry missing: {REGISTRY_PATH}") with open(REGISTRY_PATH) as f: return json.load(f) def _save_registry(registry: list[dict]) -> None: with open(REGISTRY_PATH, "w") as f: json.dump(registry, f, indent=2, ensure_ascii=False) f.write("\n") def _match_existing(registry: list[dict], proto: dict) -> Optional[int]: """Find an existing registry entry by record_id then doi. Returns its index or None.""" rid = proto.get("record_id") doi = proto.get("doi") for i, e in enumerate(registry): if rid and e.get("record_id") == rid: return i if doi and e.get("doi") == doi: return i return None def upsert_entry(proto: dict) -> dict: """Insert or update a publications entry. Returns the final entry. `proto` must carry: title, authors (list[str]), doi, source. Optional: abstract, source_ref, record_id, accepted_date (defaults to today), slug (auto-derived from title if absent). Existing entries (matched by record_id or doi) are updated in place; the existing slug is preserved for URL stability. New entries get a fresh slug, deduped against the current registry. Caller is responsible for staging any ancillary files (public-review HTML, etc.) and then calling commit_and_push(). Returns an empty dict when ICSAC_WEBSITE_REPO is not configured (the Zenodo accept itself still proceeds; the registry publish is skipped). """ if not WEBSITE_REPO: print(" Registry publish skipped: ICSAC_WEBSITE_REPO not configured") return {} if proto.get("source") not in VALID_SOURCES: raise ValueError( f"invalid source {proto.get('source')!r}; want one of {sorted(VALID_SOURCES)}" ) if not proto.get("title"): raise ValueError("proto.title is required") if not proto.get("doi"): raise ValueError("proto.doi is required") if not proto.get("authors"): raise ValueError("proto.authors must be a non-empty list") registry = _load_registry() existing_idx = _match_existing(registry, proto) final: dict[str, Any] = {} if existing_idx is not None: prior = registry[existing_idx] final["slug"] = prior.get("slug") or make_slug( proto["title"], {e.get("slug") for e in registry if e is not prior and e.get("slug")}, ) final["accepted_date"] = ( proto.get("accepted_date") or prior.get("accepted_date") or datetime.date.today().isoformat() ) else: existing_slugs = {e.get("slug") for e in registry if e.get("slug")} final["slug"] = proto.get("slug") or make_slug(proto["title"], existing_slugs) final["accepted_date"] = ( proto.get("accepted_date") or datetime.date.today().isoformat() ) if proto.get("record_id"): final["record_id"] = str(proto["record_id"]) final["title"] = proto["title"] final["authors"] = list(proto["authors"]) final["doi"] = proto["doi"] final["source"] = proto["source"] if proto.get("source_ref"): final["source_ref"] = proto["source_ref"] if proto.get("abstract"): final["abstract"] = proto["abstract"] # Re-key in canonical insert order so the JSON stays diff-friendly. ordered_keys = [ "slug", "record_id", "title", "authors", "doi", "accepted_date", "source", "source_ref", "abstract", ] canonical = {k: final[k] for k in ordered_keys if k in final} if existing_idx is not None: registry[existing_idx] = canonical else: registry.append(canonical) _save_registry(registry) return canonical def stage_public_review_for_slug( review_key: str, slug: str, reviews_dir: str, ) -> tuple[Optional[str], Optional[str]]: """Redact the panel review + RQC keyed by `review_key`, then rename the generated public-reviews/.{md,html} files to .{md,html} so /publications/ can find them. `review_key` is the prefix redaction.publish_public_review searches for under reviews_dir — record_id for Zenodo-watcher-path papers, sub_id (e.g. ICSAC-SUB-00006) for intake-path papers. Returns (review_md_path, rqc_md_path) — either may be None if no matching review was found. RedactionLeak from the underlying redaction bubbles up; callers gate it the same way action.accept_request does. Returns (None, None) when ICSAC_WEBSITE_REPO is not configured. """ if not WEBSITE_REPO: print(" Public-review stage skipped: ICSAC_WEBSITE_REPO not configured") return (None, None) import redaction # editorial system module review_md_orig = redaction.publish_public_review( review_key, reviews_dir, WEBSITE_REPO, ) rqc_md_orig = redaction.publish_public_rqc( review_key, reviews_dir, WEBSITE_REPO, ) out_dir = os.path.join(WEBSITE_REPO, "src", "data", "public-reviews") def _rename_pair(orig_md: Optional[str], src_base: str, dst_base: str) -> Optional[str]: if not orig_md or src_base == dst_base: return orig_md final_md: Optional[str] = None for ext in (".md", ".html"): src = os.path.join(out_dir, f"{src_base}{ext}") dst = os.path.join(out_dir, f"{dst_base}{ext}") if not os.path.exists(src): continue if os.path.exists(dst): os.remove(dst) os.rename(src, dst) if ext == ".md": final_md = dst return final_md final_review = _rename_pair(review_md_orig, review_key, slug) final_rqc = _rename_pair( rqc_md_orig, f"{review_key}_review_quality_control", f"{slug}_review_quality_control", ) return final_review, final_rqc def commit_and_push(message: str, extra_paths: Optional[list[str]] = None) -> None: """Stage accepted.json (+ any extras), commit, pull --rebase, push. No-op when the working tree is clean. Best-effort `git pull --rebase`; push failures raise so callers can surface a /pain signal. No-op when ICSAC_WEBSITE_REPO is not configured. """ if not WEBSITE_REPO: return def run(*cmd, check=True): return subprocess.run( cmd, cwd=WEBSITE_REPO, capture_output=True, text=True, check=check ) run("git", "add", "src/data/accepted.json") for p in extra_paths or []: if not p: continue if os.path.isabs(p): rel = os.path.relpath(p, WEBSITE_REPO) else: rel = p full = os.path.join(WEBSITE_REPO, rel) if os.path.exists(full): run("git", "add", rel) # If the path is a markdown file, also stage the sibling .html # (redaction writes pairs). if rel.endswith(".md"): html_rel = rel[:-3] + ".html" if os.path.exists(os.path.join(WEBSITE_REPO, html_rel)): run("git", "add", html_rel) status = run("git", "status", "--porcelain").stdout if not status.strip(): return run("git", "commit", "-m", message) try: run("git", "pull", "--rebase", "--autostash", "origin", "main") except subprocess.CalledProcessError as e: print(f" git pull --rebase warning: {e.stderr.strip()}") run("git", "push", "origin", "HEAD:main")