ICSAC Claude Opus 4.7 (1M context) commited on
Commit
afc6e1c
·
1 Parent(s): 470bcea

tools: wrap-paper-pdf.py — ICSAC cover prepend for accepted papers

Browse files

New tool at tools/wrap-paper-pdf.py that takes an accepted paper from
icsacinstitute.org/src/data/accepted.json, renders a 1-page Letter-trim
ICSAC cover via WeasyPrint, and prepends it to the author's source PDF
via pypdf. Output: <record_id>-icsac.pdf alongside the original.

Cover content (designed to never overlay author work):
- Logo + "Institute for Complexity Science and Advanced Computing"
wordmark in bronze caps
- Paper title, authors
- DOI, accepted date, license (CC-BY 4.0 default), submission ID
- "Peer-reviewed at Persistence" venue marker in bronze callout
- Editorial record URL (icsacinstitute.org/publications/<slug>)
- Gray footer note clarifying the cover is added by ICSAC, does not
modify the author's manuscript, and the author retains copyright

Usage:
wrap-paper-pdf.py --slug <slug>
wrap-paper-pdf.py --all
wrap-paper-pdf.py --slug X --fetch # download source from Zenodo

Backfilled all 5 currently-published papers; outputs committed to
icsacinstitute.org repo at public/papers/<rid>-icsac.pdf.

requirements.txt gains pypdf==6.11.0.

TODO (next): wire the worker to call wrap-paper-pdf.py after a curator
verdict so future accepts auto-generate the branded PDF + auto-commit
to icsacinstitute.org.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (2) hide show
  1. requirements.txt +1 -0
  2. tools/wrap-paper-pdf.py +345 -0
requirements.txt CHANGED
@@ -11,3 +11,4 @@ fastapi>=0.115,<1.0
11
  uvicorn[standard]>=0.30,<1.0
12
  python-multipart>=0.0.20,<1.0
13
  weasyprint==68.1
 
 
11
  uvicorn[standard]>=0.30,<1.0
12
  python-multipart>=0.0.20,<1.0
13
  weasyprint==68.1
14
+ pypdf==6.11.0
tools/wrap-paper-pdf.py ADDED
@@ -0,0 +1,345 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Wrap an ICSAC-accepted paper with a 1-page ICSAC cover.
3
+
4
+ Reads paper metadata from icsacinstitute.org/src/data/accepted.json,
5
+ renders a Letter-trim cover via WeasyPrint, and prepends it to the
6
+ author's PDF via pypdf. Output is written alongside the source PDF
7
+ with an `-icsac.pdf` suffix.
8
+
9
+ The author's manuscript is NEVER modified — the cover is a new first
10
+ page only. The unwrapped PDF stays in place at <record_id>.pdf so
11
+ Scholar / Zenodo / archival tooling can still grab the bare manuscript.
12
+
13
+ Usage:
14
+ wrap-paper-pdf.py --slug the-dynamic-existence-threshold
15
+ wrap-paper-pdf.py --all
16
+ wrap-paper-pdf.py --slug X --fetch # download source from Zenodo if missing
17
+ """
18
+ from __future__ import annotations
19
+ import argparse
20
+ import io
21
+ import json
22
+ import os
23
+ import sys
24
+ import urllib.request
25
+ from datetime import date
26
+ from pathlib import Path
27
+
28
+ import weasyprint
29
+ import pypdf
30
+
31
+ SITE = Path("/home/orangepi/Desktop/icsac/icsacinstitute.org")
32
+ LOGO = SITE / "public" / "logo_trans_ICSAC_notext.png"
33
+ ACCEPTED = SITE / "src" / "data" / "accepted.json"
34
+ OUT_DIR = SITE / "public" / "papers"
35
+
36
+ # License default per project policy
37
+ DEFAULT_LICENSE = "CC-BY 4.0"
38
+
39
+
40
+ def _accepted_us(iso: str) -> str:
41
+ """2026-04-19 → 04/19/2026 (US format used elsewhere on the site)."""
42
+ if not iso:
43
+ return ""
44
+ y, m, d = iso.split("-")
45
+ return f"{m}/{d}/{y}"
46
+
47
+
48
+ def _authors_str(authors: list[str]) -> str:
49
+ if not authors:
50
+ return "Anonymous"
51
+ if len(authors) <= 3:
52
+ return ", ".join(authors)
53
+ return ", ".join(authors[:2]) + f", et al. ({len(authors)} authors)"
54
+
55
+
56
+ def _load_papers() -> list[dict]:
57
+ d = json.loads(ACCEPTED.read_text())
58
+ return d if isinstance(d, list) else d.get("papers", [])
59
+
60
+
61
+ def _find(slug: str) -> dict:
62
+ for p in _load_papers():
63
+ if p.get("slug") == slug:
64
+ return p
65
+ raise SystemExit(f"slug not found in accepted.json: {slug}")
66
+
67
+
68
+ def _zenodo_fetch_pdf(record_id: str, dest: Path) -> None:
69
+ """Download the primary PDF file from a Zenodo record."""
70
+ api = f"https://zenodo.org/api/records/{record_id}"
71
+ print(f" fetching Zenodo metadata: {api}")
72
+ with urllib.request.urlopen(api, timeout=30) as r:
73
+ meta = json.loads(r.read())
74
+ files = meta.get("files", [])
75
+ pdfs = [f for f in files if f.get("key", "").lower().endswith(".pdf")]
76
+ if not pdfs:
77
+ raise SystemExit(f"no PDF file on Zenodo record {record_id}")
78
+ # Pick the largest (most likely the manuscript over supplementary)
79
+ pdf = max(pdfs, key=lambda f: f.get("size", 0))
80
+ url = pdf["links"]["self"]
81
+ print(f" downloading: {url}")
82
+ with urllib.request.urlopen(url, timeout=120) as r, open(dest, "wb") as out:
83
+ out.write(r.read())
84
+ print(f" saved → {dest} ({dest.stat().st_size} bytes)")
85
+
86
+
87
+ _COVER_CSS = """
88
+ @page { size: Letter; margin: 0.75in 0.85in 1.1in 0.85in; }
89
+
90
+ body {
91
+ font-family: "EB Garamond", Georgia, "Times New Roman", serif;
92
+ font-size: 11pt;
93
+ line-height: 1.5;
94
+ color: #0A1929;
95
+ margin: 0;
96
+ }
97
+
98
+ .header {
99
+ display: flex;
100
+ align-items: center;
101
+ gap: 14pt;
102
+ padding-bottom: 12pt;
103
+ border-bottom: 1pt solid #8B5E3C;
104
+ }
105
+
106
+ .header img {
107
+ width: 44pt;
108
+ height: 44pt;
109
+ }
110
+
111
+ .header-text {
112
+ display: flex;
113
+ flex-direction: column;
114
+ }
115
+
116
+ .header-org {
117
+ font-family: "Inter", "Helvetica Neue", Arial, sans-serif;
118
+ font-size: 9.5pt;
119
+ letter-spacing: 1.2pt;
120
+ text-transform: uppercase;
121
+ color: #8B5E3C;
122
+ font-weight: 600;
123
+ line-height: 1.3;
124
+ }
125
+
126
+ .header-tag {
127
+ font-family: "Inter", "Helvetica Neue", Arial, sans-serif;
128
+ font-size: 8.5pt;
129
+ color: #6E5C42;
130
+ margin-top: 3pt;
131
+ letter-spacing: 0.3pt;
132
+ }
133
+
134
+ .title-block {
135
+ margin: 32pt 0 0;
136
+ }
137
+
138
+ .title {
139
+ font-size: 20pt;
140
+ font-weight: 600;
141
+ line-height: 1.22;
142
+ margin: 0 0 10pt;
143
+ color: #0A1929;
144
+ }
145
+
146
+ .authors {
147
+ font-size: 11.5pt;
148
+ color: #4B5563;
149
+ font-style: italic;
150
+ margin: 0 0 26pt;
151
+ }
152
+
153
+ .meta {
154
+ display: grid;
155
+ grid-template-columns: 115pt 1fr;
156
+ gap: 7pt 16pt;
157
+ font-size: 10pt;
158
+ margin-bottom: 24pt;
159
+ }
160
+
161
+ .meta dt {
162
+ font-family: "Inter", "Helvetica Neue", Arial, sans-serif;
163
+ color: #8B5E3C;
164
+ font-weight: 600;
165
+ letter-spacing: 0.6pt;
166
+ text-transform: uppercase;
167
+ font-size: 8pt;
168
+ margin: 0;
169
+ }
170
+
171
+ .meta dd {
172
+ color: #1f1f1f;
173
+ margin: 0;
174
+ word-break: break-word;
175
+ }
176
+
177
+ .meta dd a {
178
+ color: #1f1f1f;
179
+ text-decoration: none;
180
+ }
181
+
182
+ .venue {
183
+ padding: 12pt 14pt;
184
+ background-color: rgba(139, 94, 60, 0.06);
185
+ border-left: 2pt solid #8B5E3C;
186
+ font-size: 10pt;
187
+ line-height: 1.55;
188
+ margin-bottom: 22pt;
189
+ }
190
+
191
+ .venue strong {
192
+ color: #8B5E3C;
193
+ }
194
+
195
+ .record {
196
+ font-family: "Inter", "Helvetica Neue", Arial, sans-serif;
197
+ font-size: 9pt;
198
+ color: #4B5563;
199
+ margin-bottom: 18pt;
200
+ }
201
+
202
+ .record-label {
203
+ letter-spacing: 0.6pt;
204
+ text-transform: uppercase;
205
+ font-size: 8pt;
206
+ color: #8B5E3C;
207
+ font-weight: 600;
208
+ display: block;
209
+ margin-bottom: 3pt;
210
+ }
211
+
212
+ .record a {
213
+ color: #8B5E3C;
214
+ text-decoration: none;
215
+ word-break: break-all;
216
+ }
217
+
218
+ .note {
219
+ position: fixed;
220
+ bottom: 0.5in;
221
+ left: 0.85in;
222
+ right: 0.85in;
223
+ padding-top: 8pt;
224
+ border-top: 0.5pt solid #ccc;
225
+ font-family: "Inter", "Helvetica Neue", Arial, sans-serif;
226
+ font-size: 7.5pt;
227
+ line-height: 1.45;
228
+ color: #888;
229
+ }
230
+ """
231
+
232
+
233
+ def _render_cover(paper: dict) -> bytes:
234
+ rid = paper.get("record_id") or ""
235
+ slug = paper["slug"]
236
+ title = paper["title"]
237
+ authors = paper.get("authors") or []
238
+ doi = paper.get("doi") or ""
239
+ accepted = paper.get("accepted_date") or ""
240
+ sub_id = paper.get("sub_id") or "—"
241
+
242
+ doi_url = f"https://doi.org/{doi.lstrip('doi:').strip()}" if doi else ""
243
+ record_url = f"https://icsacinstitute.org/publications/{slug}"
244
+
245
+ # Logo as file:// URI so WeasyPrint loads it without fetching
246
+ logo_uri = f"file://{LOGO}"
247
+
248
+ html = f"""<!DOCTYPE html>
249
+ <html><head><meta charset="utf-8"><title>{title}</title>
250
+ <style>{_COVER_CSS}</style>
251
+ </head><body>
252
+ <div class="header">
253
+ <img src="{logo_uri}" alt="" />
254
+ <div class="header-text">
255
+ <div class="header-org">Institute for Complexity Science and Advanced Computing</div>
256
+ <div class="header-tag">Open editorial record &middot; No fees &middot; Author retains copyright</div>
257
+ </div>
258
+ </div>
259
+
260
+ <div class="title-block">
261
+ <div class="title">{_escape(title)}</div>
262
+ <div class="authors">{_escape(_authors_str(authors))}</div>
263
+ </div>
264
+
265
+ <dl class="meta">
266
+ <dt>DOI</dt><dd>{_escape(doi)}</dd>
267
+ <dt>Accepted</dt><dd>{_escape(_accepted_us(accepted))}</dd>
268
+ <dt>License</dt><dd>{DEFAULT_LICENSE}</dd>
269
+ <dt>Submission ID</dt><dd>{_escape(sub_id)}</dd>
270
+ </dl>
271
+
272
+ <div class="venue">
273
+ <strong>Peer-reviewed at <em>Persistence</em></strong> &mdash; the Institute's annual transdisciplinary journal. The full editorial record &mdash; AI panel reviews, Review Quality Control audit, and curator verdict &mdash; is publicly available at the URL below.
274
+ </div>
275
+
276
+ <div class="record">
277
+ <span class="record-label">Editorial record</span>
278
+ <a href="{record_url}">{record_url}</a>
279
+ </div>
280
+
281
+ <div class="note">
282
+ This cover page is added by the Institute for Complexity Science and Advanced Computing. It does not modify the author's manuscript that follows. The author retains copyright under the license indicated; ICSAC publishes the editorial record but does not claim ownership of the work.
283
+ </div>
284
+ </body></html>"""
285
+ return weasyprint.HTML(string=html).write_pdf()
286
+
287
+
288
+ def _escape(s: str) -> str:
289
+ return (s or "").replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
290
+
291
+
292
+ def wrap_one(paper: dict, fetch: bool = False) -> Path:
293
+ rid = paper.get("record_id")
294
+ slug = paper["slug"]
295
+ if not rid:
296
+ raise SystemExit(f"paper has no record_id (upload route not yet supported): {slug}")
297
+
298
+ source = OUT_DIR / f"{rid}.pdf"
299
+ if not source.exists():
300
+ if fetch:
301
+ print(f" source PDF missing, fetching from Zenodo: {source}")
302
+ _zenodo_fetch_pdf(rid, source)
303
+ else:
304
+ raise SystemExit(f"source PDF missing: {source} (use --fetch to download)")
305
+
306
+ print(f" rendering cover for {slug}…")
307
+ cover_bytes = _render_cover(paper)
308
+
309
+ out = OUT_DIR / f"{rid}-icsac.pdf"
310
+ writer = pypdf.PdfWriter()
311
+ writer.append(fileobj=io.BytesIO(cover_bytes))
312
+ writer.append(fileobj=str(source))
313
+ with open(out, "wb") as f:
314
+ writer.write(f)
315
+ print(f" wrote → {out} ({out.stat().st_size} bytes)")
316
+ return out
317
+
318
+
319
+ def main():
320
+ ap = argparse.ArgumentParser()
321
+ ap.add_argument("--slug")
322
+ ap.add_argument("--all", action="store_true")
323
+ ap.add_argument("--fetch", action="store_true",
324
+ help="Download source PDF from Zenodo if missing")
325
+ args = ap.parse_args()
326
+
327
+ OUT_DIR.mkdir(parents=True, exist_ok=True)
328
+
329
+ if args.all:
330
+ papers = _load_papers()
331
+ for p in papers:
332
+ if not p.get("record_id"):
333
+ print(f"SKIP {p['slug']} (no record_id)")
334
+ continue
335
+ print(f"\n→ {p['slug']}")
336
+ wrap_one(p, fetch=args.fetch)
337
+ elif args.slug:
338
+ print(f"→ {args.slug}")
339
+ wrap_one(_find(args.slug), fetch=args.fetch)
340
+ else:
341
+ ap.error("specify --slug or --all")
342
+
343
+
344
+ if __name__ == "__main__":
345
+ main()