File size: 16,499 Bytes
37cb069 470bcea 37cb069 470bcea 37cb069 470bcea 37cb069 470bcea 37cb069 470bcea 37cb069 470bcea 37cb069 470bcea 37cb069 470bcea 37cb069 470bcea 37cb069 470bcea 37cb069 470bcea 37cb069 470bcea 37cb069 470bcea 37cb069 470bcea 37cb069 470bcea 37cb069 470bcea 37cb069 470bcea 37cb069 470bcea 37cb069 470bcea 37cb069 470bcea 37cb069 470bcea 37cb069 470bcea 37cb069 470bcea 37cb069 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 | """Accept/reject Zenodo community requests via API.
Accept also writes the paper to the icsacinstitute.org website registry
(src/data/accepted.json) and commits+pushes the change, which triggers
CF Pages to rebuild. That rebuild publishes an ICSAC-branded landing page
at https://icsacinstitute.org/accepted/<record_id> so LinkedIn and Facebook
shares show ICSAC metadata rather than generic Zenodo cards.
The accept path also redacts the internal review (reviews/<id>_*.md) and
writes a publication-safe copy to the website repo at
src/data/public-reviews/<record_id>.md, embedded on the landing page. The
redaction's grep-gate aborts publication if any forbidden token survives β
a redaction leak fires /pain and leaves the Zenodo accept intact but the
registry + review unpushed.
"""
import datetime
import html
import json
import os
import re
import subprocess
import urllib.request
import urllib.error
import config
import publications
import redaction
import stats as stats_mod
WEBSITE_REPO = publications.WEBSITE_REPO
REGISTRY_PATH = publications.REGISTRY_PATH
PUBLIC_REVIEWS_DIR = os.path.join(WEBSITE_REPO, "src/data/public-reviews")
_COMMUNITY_UUID_CACHE: str | None = None
def _resolve_community_uuid() -> str:
"""Look up the ICSAC community UUID from its slug. Cached for the process.
/api/user/requests filters by community UUID, not slug. /api/communities/<slug>
works as a lookup endpoint and does not require curator scope.
"""
global _COMMUNITY_UUID_CACHE
if _COMMUNITY_UUID_CACHE:
return _COMMUNITY_UUID_CACHE
url = f"{config.ZENODO_API}/communities/{config.COMMUNITY_ID}"
req = urllib.request.Request(url)
req.add_header("Authorization", f"Bearer {config.ZENODO_TOKEN}")
with urllib.request.urlopen(req, timeout=30) as resp:
data = json.loads(resp.read().decode())
_COMMUNITY_UUID_CACHE = data["id"]
return _COMMUNITY_UUID_CACHE
def get_community_requests(open_only: bool = True) -> list[dict]:
"""Fetch ICSAC community-inclusion requests via /api/user/requests.
The historical /api/communities/<id>/requests endpoint requires a curator
scope that personal access tokens cannot grant. /api/user/requests returns
every request the authenticated user is involved in, including incoming
community-inclusion requests for communities they own. We filter client-side
to community-inclusion + ICSAC + (optionally) is_open.
"""
icsac_uuid = _resolve_community_uuid()
out: list[dict] = []
page = 1
while page <= 20: # 20 pages * 100 items = hard ceiling
url = f"{config.ZENODO_API}/user/requests?size=100&page={page}"
req = urllib.request.Request(url)
req.add_header("Authorization", f"Bearer {config.ZENODO_TOKEN}")
try:
with urllib.request.urlopen(req, timeout=30) as resp:
data = json.loads(resp.read().decode())
except urllib.error.URLError as e:
print(f" Error fetching user requests page {page}: {e}")
break
hits = data.get("hits", {}).get("hits", [])
if not hits:
break
for r in hits:
if r.get("type") != "community-inclusion":
continue
if (r.get("receiver") or {}).get("community") != icsac_uuid:
continue
if open_only and not r.get("is_open"):
continue
out.append(r)
if len(hits) < 100:
break
page += 1
return out
def accept_request(request_id: str, comment: str = "",
review_data: dict | None = None) -> bool:
"""Accept a community inclusion request.
If review_data is supplied, an ICSAC-branded acceptance comment is rendered
and posted with the action β Zenodo notifies the author via its own email
machinery. The comment points to the public landing page on icsacinstitute.org.
Registry update (landing page + redacted review + stats) runs after accept
succeeds. Registry failure does NOT fail the Zenodo accept β it is logged
and skipped (a /pain alert fires).
"""
if review_data and not comment:
import email_render
record_id_hint = review_data.get("record_id") or _get_request_record_id(request_id)
landing_url = (
f"https://icsacinstitute.org/accepted/{record_id_hint}"
if record_id_hint else "https://icsacinstitute.org"
)
comment = email_render.render_accept_comment(review_data, landing_url=landing_url)
ok = _action_request(request_id, "accept", comment)
if ok:
try:
record_id = _get_request_record_id(request_id)
if record_id:
register_accepted_paper(record_id)
else:
print(f" Could not derive record_id from request {request_id} β registry not updated.")
except redaction.RedactionLeak as e:
print(f" Accept succeeded on Zenodo BUT redaction leak blocked publication: {e}")
_fire_pain(
title="ICSAC Pipeline: Review Redaction Leak",
body=(
f"Zenodo accept succeeded for request {request_id} but the "
f"redaction blocked publication: {e}. The Zenodo acceptance "
f"is in effect; the landing page + public review are NOT "
f"published. Inspect the raw review, edit out the leak, "
f"then rerun `python3 redaction.py {record_id or '<id>'}` "
f"and commit manually."
),
)
except Exception as e:
print(f" Accept succeeded on Zenodo but registry update failed: {e}")
print(f" (paper is accepted; add to {REGISTRY_PATH} manually)")
_fire_pain(
title="ICSAC Pipeline: Registry Push Failed",
body=(f"Zenodo accept succeeded for request {request_id} but the "
f"icsacinstitute.org landing-page registry update failed: {e}. "
f"Paper is accepted on Zenodo; add the entry to "
f"{REGISTRY_PATH} manually to publish the landing page."),
)
return ok
def _fire_pain(title: str, body: str) -> None:
"""Direct ntfy /pain POST to the monitoring endpoint. Best-effort, never raises."""
url = getattr(config, "NTFY_PAIN_URL", "")
if not url:
return
try:
req = urllib.request.Request(url, data=body.encode())
req.add_header("Title", title)
urllib.request.urlopen(req, timeout=5)
except Exception:
pass
def decline_request(request_id: str, comment: str = "",
review_data: dict | None = None,
review_summary: str = "",
specific_concerns: str = "",
verdict: str = "revise_and_resubmit") -> bool:
"""Decline a community inclusion request.
`verdict` controls the comment template used when one is auto-rendered:
- "revise_and_resubmit" (default) β ICSAC's standard decline path for
engageable in-scope work; uses the revise-and-resubmit-comment
template with review_summary + specific_concerns.
- "reject" β scope-not-suitable / pseudoscience escape hatch; uses the
scope-reject-comment template with no summary/concerns (the verdict
is "out of scope," not "revise these points").
"decline" is Zenodo's API verb for either outcome; the verdict here only
governs WHICH branded comment we attach, not the action call itself.
If review_data is supplied, the appropriate ICSAC-branded comment is
rendered and posted with the action. Zenodo notifies the author via its
own email machinery.
"""
if review_data and not comment:
import email_render
if verdict == "reject":
comment = email_render.render_scope_reject_comment(review_data)
else:
comment = email_render.render_revise_and_resubmit_comment(
review_data, review_summary=review_summary,
specific_concerns=specific_concerns,
)
return _action_request(request_id, "decline", comment)
# Backwards-compatible alias for any caller still using the old name.
reject_request = decline_request
def _action_request(request_id: str, action: str, comment: str) -> bool:
"""POST an action (accept/decline) on a community request."""
url = f"{config.ZENODO_API}/requests/{request_id}/actions/{action}"
payload = {}
if comment:
payload["payload"] = {"content": comment}
data = json.dumps(payload).encode()
req = urllib.request.Request(url, data=data, method="POST")
req.add_header("Authorization", f"Bearer {config.ZENODO_TOKEN}")
req.add_header("Content-Type", "application/json")
try:
with urllib.request.urlopen(req, timeout=30) as resp:
return resp.status in (200, 201, 204)
except urllib.error.URLError as e:
print(f" Error {action}ing request {request_id}: {e}")
return False
def post_request_comment(request_id: str, content: str,
fmt: str = "html") -> bool:
"""POST a comment to a Zenodo request.
Used when the curator already accepted/declined via the Zenodo UI and we
need to add our branded follow-up message after the fact. Zenodo notifies
request participants (including the author) by email on new comments.
`fmt` defaults to "html" because Zenodo's notification renderer treats
"html" payloads as rich text with markdown-style formatting; the markdown
we render flows through cleanly.
"""
url = f"{config.ZENODO_API}/requests/{request_id}/comments"
payload = {"payload": {"content": content, "format": fmt}}
data = json.dumps(payload).encode()
req = urllib.request.Request(url, data=data, method="POST")
req.add_header("Authorization", f"Bearer {config.ZENODO_TOKEN}")
req.add_header("Content-Type", "application/json")
try:
with urllib.request.urlopen(req, timeout=30) as resp:
return resp.status in (200, 201, 204)
except urllib.error.URLError as e:
print(f" Error posting comment to {request_id}: {e}")
return False
def _get_request_record_id(request_id: str) -> str | None:
"""Look up the Zenodo record ID associated with a community request."""
url = f"{config.ZENODO_API}/requests/{request_id}"
req = urllib.request.Request(url)
req.add_header("Authorization", f"Bearer {config.ZENODO_TOKEN}")
try:
with urllib.request.urlopen(req, timeout=30) as resp:
data = json.loads(resp.read().decode())
topic = data.get("topic", {}) or {}
record = topic.get("record") or topic.get("record_id")
if isinstance(record, dict):
record = record.get("id")
return str(record) if record else None
except Exception as e:
print(f" _get_request_record_id failed: {e}")
return None
def _fetch_record(record_id: str) -> dict:
url = f"{config.ZENODO_API}/records/{record_id}"
req = urllib.request.Request(url)
req.add_header("Authorization", f"Bearer {config.ZENODO_TOKEN}")
with urllib.request.urlopen(req, timeout=30) as resp:
return json.loads(resp.read().decode())
def _extract_registry_entry(record_id: str, metadata: dict,
*, source: str = "zenodo-community") -> dict:
"""Shape a Zenodo record dict into the publications-registry schema.
Returns a proto-entry suitable for publications.upsert_entry β slug
+ accepted_date are filled in by the upsert helper.
"""
m = metadata.get("metadata", metadata)
# Zenodo returns the description as HTML (tags + entity-escaped glyphs).
# Strip tags first, THEN html.unescape so /—/& collapse
# to their literal characters β Astro's {} interpolation then renders
# them as proper text instead of leaking escape sequences to the reader.
raw_desc = m.get("description", "") or ""
abstract = html.unescape(re.sub(r"<[^>]+>", "", raw_desc)).strip()
abstract = re.sub(r"[ \t]+", " ", abstract) # collapse whitespace runs from former etc.
authors = []
for c in m.get("creators", []):
name = c.get("name", c.get("person_or_org", {}).get("name", "Unknown"))
if "," in name:
last, after = [s.strip() for s in name.split(",", 1)]
name = f"{after} {last}".strip() if after else last
authors.append(name)
return {
"record_id": str(record_id),
"title": m.get("title", "Untitled"),
"authors": authors or ["Unknown"],
"doi": m.get("doi", f"10.5281/zenodo.{record_id}"),
"abstract": abstract[:2000] if abstract else "",
"source": source,
"source_ref": f"https://zenodo.org/records/{record_id}",
}
def _publish_public_review(record_id: str) -> str | None:
"""Redact the internal review and stage it at public-reviews/<id>.md.
Returns the path written, or None if no internal review exists yet.
Raises redaction.RedactionLeak when a forbidden token slips through; the
caller (accept_request) converts that to a /pain signal.
"""
reviews_dir = getattr(config, "REVIEWS_DIR", os.path.join(
os.path.dirname(os.path.abspath(__file__)), "reviews"
))
if not os.path.isdir(reviews_dir):
return None
present = [
f for f in os.listdir(reviews_dir)
if f.startswith(f"{record_id}_")
and f.endswith(".md")
and not f.endswith("_review_quality_control.md")
]
if not present:
print(f" No internal review for {record_id}; public review not staged.")
return None
return redaction.publish_public_review(record_id, reviews_dir, WEBSITE_REPO)
def _publish_public_rqc(record_id: str) -> str | None:
"""Redact the internal RQC audit and stage its redacted public twin.
Returns the path written, or None if no RQC file exists yet (older
reviews pre-RQC-rollout). Raises redaction.RedactionLeak if any forbidden
token β including a reference to the redacted injection_indicators
dimension β survives. The caller converts that to a /pain signal.
"""
reviews_dir = getattr(config, "REVIEWS_DIR", os.path.join(
os.path.dirname(os.path.abspath(__file__)), "reviews"
))
if not os.path.isdir(reviews_dir):
return None
return redaction.publish_public_rqc(record_id, reviews_dir, WEBSITE_REPO)
def register_accepted_paper(record_id: str) -> None:
"""Append/update the publications registry, stage the redacted review, push.
Order:
1) Fetch Zenodo metadata + upsert registry entry in accepted.json
2) Redact internal review β src/data/public-reviews/<id>.md (gated)
3) Redact internal RQC β src/data/public-reviews/<id>_review_quality_control.md (gated)
4) Refresh panel stats snapshot
5) git add all, commit, pull --rebase, push
"""
metadata = _fetch_record(record_id)
proto = _extract_registry_entry(record_id, metadata,
source="zenodo-community")
entry = publications.upsert_entry(proto)
review_path = _publish_public_review(record_id)
rqc_path = _publish_public_rqc(record_id)
stats_path = _refresh_panel_stats()
publications.commit_and_push(
message=f"accepted: {entry['title']} ({record_id})",
extra_paths=[review_path, rqc_path, stats_path],
)
print(
f" Registered paper {record_id} -> "
f"{publications.publications_url(entry['slug'])} "
f"(legacy /accepted/{record_id} also live)"
)
if review_path:
print(f" Redacted review staged at {review_path}")
if rqc_path:
print(f" Redacted RQC staged at {rqc_path}")
if stats_path:
print(f" Panel stats snapshot refreshed at {stats_path}")
def _refresh_panel_stats() -> str | None:
"""Regenerate the /stats snapshot. Non-fatal on failure."""
reviews_dir = getattr(
config,
"REVIEWS_DIR",
os.path.join(os.path.dirname(os.path.abspath(__file__)), "reviews"),
)
out = os.path.join(WEBSITE_REPO, "src/data/stats.json")
try:
return stats_mod.write_stats(reviews_dir, out)
except Exception as e:
print(f" panel stats refresh failed (non-fatal): {e}")
return None
|