Commit ·
dbfc012
1
Parent(s): 68be85f
feat: overlay-based header+label on content pages only (pypdf merge)
Browse files- Render base PDF with no header/label via Playwright
- Build single-page overlay PDF with header image + label
- Stamp overlay onto content pages (skip cover/toc + final image pages)
- Header flush at top edge, label at right edge (1.65cm x 23.42cm)
- Pages 1-2: clean (no header, no label)
- Final image pages: clean (no header, no label)
- Added pypdf>=4.0.0 dependency
- app/services/pdf_renderer.py +157 -63
- app/static/css/print.css +11 -16
- requirements.txt +1 -0
app/services/pdf_renderer.py
CHANGED
|
@@ -63,6 +63,12 @@ async def render_pdf_from_html(
|
|
| 63 |
) -> bytes:
|
| 64 |
"""Render HTML string to PDF bytes using Playwright Chromium.
|
| 65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
Args:
|
| 67 |
html_content: Complete HTML document string.
|
| 68 |
format: Page format (default A4).
|
|
@@ -96,91 +102,59 @@ async def render_pdf_from_html(
|
|
| 96 |
file_url = Path(tmp_path).as_uri()
|
| 97 |
await page.goto(file_url, wait_until="networkidle", timeout=wait_timeout)
|
| 98 |
|
| 99 |
-
# Wait for fonts to be fully loaded
|
| 100 |
-
await page.evaluate(""
|
| 101 |
-
() => document.fonts.ready
|
| 102 |
-
""")
|
| 103 |
-
|
| 104 |
-
# Wait for all images to complete loading
|
| 105 |
await page.evaluate("""
|
| 106 |
() => {
|
| 107 |
const images = Array.from(document.querySelectorAll('img'));
|
| 108 |
return Promise.all(images.map(img => {
|
| 109 |
if (img.complete) return Promise.resolve();
|
| 110 |
-
return new Promise(
|
| 111 |
-
img.addEventListener('load',
|
| 112 |
-
img.addEventListener('error',
|
| 113 |
});
|
| 114 |
}));
|
| 115 |
}
|
| 116 |
""")
|
| 117 |
-
|
| 118 |
-
# Small delay for final layout settle
|
| 119 |
await page.wait_for_timeout(500)
|
| 120 |
|
| 121 |
-
#
|
| 122 |
-
label_info = await page.evaluate("""
|
| 123 |
-
() => {
|
| 124 |
-
const el = document.querySelector('.hb-right-label');
|
| 125 |
-
if (!el) return 'NO .hb-right-label element found';
|
| 126 |
-
const img = el.querySelector('img');
|
| 127 |
-
const src = img ? img.src.substring(0, 80) : 'NO img';
|
| 128 |
-
const loaded = img ? img.complete : false;
|
| 129 |
-
const natW = img ? img.naturalWidth : 0;
|
| 130 |
-
const natH = img ? img.naturalHeight : 0;
|
| 131 |
-
return `label OK: loaded=${loaded}, natural=${natW}x${natH}, src=${src}...`;
|
| 132 |
-
}
|
| 133 |
-
""")
|
| 134 |
-
logger.info("Label check: %s", label_info)
|
| 135 |
-
|
| 136 |
-
# Extract header image src from DOM for Playwright header_template
|
| 137 |
header_src = await page.evaluate("""
|
| 138 |
() => {
|
| 139 |
const img = document.querySelector('.page-header img');
|
| 140 |
return img ? img.src : '';
|
| 141 |
}
|
| 142 |
""")
|
| 143 |
-
|
| 144 |
-
# Build header template — image scales to fit the top margin zone.
|
| 145 |
-
# Playwright margin.top = 2.54cm; header image fills that entire zone.
|
| 146 |
-
if header_src:
|
| 147 |
-
header_tpl = (
|
| 148 |
-
'<div style="width:100%;height:100%;margin:0;padding:0;overflow:hidden;">'
|
| 149 |
-
f'<img src="{header_src}" '
|
| 150 |
-
'style="display:block;width:100%;height:100%;margin:0;padding:0;object-fit:fill;" />'
|
| 151 |
-
'</div>'
|
| 152 |
-
)
|
| 153 |
-
else:
|
| 154 |
-
header_tpl = '<span></span>'
|
| 155 |
-
|
| 156 |
-
# Hide in-page header and footer divs before PDF generation.
|
| 157 |
-
# .hb-right-label stays visible (position:fixed, repeats every page).
|
| 158 |
-
await page.evaluate("""
|
| 159 |
() => {
|
| 160 |
-
document.
|
| 161 |
-
|
| 162 |
}
|
| 163 |
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
|
| 165 |
-
# Hide
|
| 166 |
-
# These pages expand to fill the full page so the label
|
| 167 |
-
# should not be visible on them.
|
| 168 |
await page.evaluate("""
|
| 169 |
() => {
|
| 170 |
-
document.querySelectorAll('.
|
| 171 |
-
|
| 172 |
-
overlay.style.cssText = 'position:absolute;top:0;right:0;width:2cm;height:100%;background:#fff;z-index:20;';
|
| 173 |
-
el.style.position = 'relative';
|
| 174 |
-
el.appendChild(overlay);
|
| 175 |
-
});
|
| 176 |
}
|
| 177 |
""")
|
| 178 |
|
| 179 |
-
#
|
| 180 |
-
|
| 181 |
-
# Footer: page number rendered by footer_template in the bottom margin.
|
| 182 |
-
# Content: sits inside the margin box — no CSS padding needed.
|
| 183 |
-
pdf_bytes = await page.pdf(
|
| 184 |
format=format,
|
| 185 |
print_background=print_background,
|
| 186 |
prefer_css_page_size=prefer_css_page_size,
|
|
@@ -191,7 +165,7 @@ async def render_pdf_from_html(
|
|
| 191 |
"left": "2.54cm",
|
| 192 |
},
|
| 193 |
display_header_footer=True,
|
| 194 |
-
header_template=
|
| 195 |
footer_template=(
|
| 196 |
'<div style="width:100%;text-align:center;font-size:10px;'
|
| 197 |
'font-family:Century Gothic,Segoe UI,sans-serif;color:#333;'
|
|
@@ -199,12 +173,132 @@ async def render_pdf_from_html(
|
|
| 199 |
'<span class="pageNumber"></span></div>'
|
| 200 |
),
|
| 201 |
)
|
| 202 |
-
|
| 203 |
-
logger.info("PDF rendered via Playwright, size=%d bytes", len(pdf_bytes))
|
| 204 |
-
return pdf_bytes
|
| 205 |
|
| 206 |
finally:
|
| 207 |
os.unlink(tmp_path)
|
| 208 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
finally:
|
| 210 |
await context.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
) -> bytes:
|
| 64 |
"""Render HTML string to PDF bytes using Playwright Chromium.
|
| 65 |
|
| 66 |
+
Generates a base PDF (content only, no decorative header/label),
|
| 67 |
+
then creates a one-page overlay with the header image and right-side
|
| 68 |
+
label, and stamps the overlay onto content pages (page 3 → last
|
| 69 |
+
content page) using pypdf. Pages 1-2 (cover/TOC) and trailing
|
| 70 |
+
full-page image pages get no overlay.
|
| 71 |
+
|
| 72 |
Args:
|
| 73 |
html_content: Complete HTML document string.
|
| 74 |
format: Page format (default A4).
|
|
|
|
| 102 |
file_url = Path(tmp_path).as_uri()
|
| 103 |
await page.goto(file_url, wait_until="networkidle", timeout=wait_timeout)
|
| 104 |
|
| 105 |
+
# Wait for fonts and images to be fully loaded
|
| 106 |
+
await page.evaluate("() => document.fonts.ready")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
await page.evaluate("""
|
| 108 |
() => {
|
| 109 |
const images = Array.from(document.querySelectorAll('img'));
|
| 110 |
return Promise.all(images.map(img => {
|
| 111 |
if (img.complete) return Promise.resolve();
|
| 112 |
+
return new Promise(r => {
|
| 113 |
+
img.addEventListener('load', r);
|
| 114 |
+
img.addEventListener('error', r);
|
| 115 |
});
|
| 116 |
}));
|
| 117 |
}
|
| 118 |
""")
|
|
|
|
|
|
|
| 119 |
await page.wait_for_timeout(500)
|
| 120 |
|
| 121 |
+
# ── Collect info from DOM before hiding elements ──
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
header_src = await page.evaluate("""
|
| 123 |
() => {
|
| 124 |
const img = document.querySelector('.page-header img');
|
| 125 |
return img ? img.src : '';
|
| 126 |
}
|
| 127 |
""")
|
| 128 |
+
label_src = await page.evaluate("""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
() => {
|
| 130 |
+
const img = document.querySelector('.hb-right-label img');
|
| 131 |
+
return img ? img.src : '';
|
| 132 |
}
|
| 133 |
""")
|
| 134 |
+
num_bottom_pages = await page.evaluate("""
|
| 135 |
+
() => document.querySelectorAll('.fullpage-img-wrap').length
|
| 136 |
+
""")
|
| 137 |
+
# Cover page count: cover + TOC image (each is a .cover-page)
|
| 138 |
+
num_cover_pages = await page.evaluate("""
|
| 139 |
+
() => document.querySelectorAll('.cover-page').length
|
| 140 |
+
""")
|
| 141 |
+
|
| 142 |
+
logger.info(
|
| 143 |
+
"Overlay info: header=%s, label=%s, covers=%d, bottoms=%d",
|
| 144 |
+
bool(header_src), bool(label_src),
|
| 145 |
+
num_cover_pages, num_bottom_pages,
|
| 146 |
+
)
|
| 147 |
|
| 148 |
+
# ── Hide header, footer, and label from the base PDF ──
|
|
|
|
|
|
|
| 149 |
await page.evaluate("""
|
| 150 |
() => {
|
| 151 |
+
document.querySelectorAll('.page-header, .page-footer, .hb-right-label')
|
| 152 |
+
.forEach(el => el.style.display = 'none');
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
}
|
| 154 |
""")
|
| 155 |
|
| 156 |
+
# ── Render BASE PDF (no header, no label) ──
|
| 157 |
+
base_pdf = await page.pdf(
|
|
|
|
|
|
|
|
|
|
| 158 |
format=format,
|
| 159 |
print_background=print_background,
|
| 160 |
prefer_css_page_size=prefer_css_page_size,
|
|
|
|
| 165 |
"left": "2.54cm",
|
| 166 |
},
|
| 167 |
display_header_footer=True,
|
| 168 |
+
header_template='<span></span>',
|
| 169 |
footer_template=(
|
| 170 |
'<div style="width:100%;text-align:center;font-size:10px;'
|
| 171 |
'font-family:Century Gothic,Segoe UI,sans-serif;color:#333;'
|
|
|
|
| 173 |
'<span class="pageNumber"></span></div>'
|
| 174 |
),
|
| 175 |
)
|
| 176 |
+
logger.info("Base PDF rendered, size=%d bytes", len(base_pdf))
|
|
|
|
|
|
|
| 177 |
|
| 178 |
finally:
|
| 179 |
os.unlink(tmp_path)
|
| 180 |
|
| 181 |
+
# ── Build overlay (header + label) and stamp onto content pages ──
|
| 182 |
+
if not header_src and not label_src:
|
| 183 |
+
logger.info("No header or label to overlay, returning base PDF")
|
| 184 |
+
return base_pdf
|
| 185 |
+
|
| 186 |
+
overlay_pdf = await _build_overlay_pdf(
|
| 187 |
+
page, header_src, label_src, format, wait_timeout
|
| 188 |
+
)
|
| 189 |
+
|
| 190 |
+
merged = _stamp_overlay(
|
| 191 |
+
base_pdf, overlay_pdf,
|
| 192 |
+
skip_front=num_cover_pages,
|
| 193 |
+
skip_back=num_bottom_pages,
|
| 194 |
+
)
|
| 195 |
+
logger.info("Final PDF with overlay, size=%d bytes", len(merged))
|
| 196 |
+
return merged
|
| 197 |
+
|
| 198 |
finally:
|
| 199 |
await context.close()
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
async def _build_overlay_pdf(
|
| 203 |
+
page, header_src: str, label_src: str,
|
| 204 |
+
format: str, timeout: int,
|
| 205 |
+
) -> bytes:
|
| 206 |
+
"""Render a single-page transparent overlay PDF with header + label."""
|
| 207 |
+
parts = []
|
| 208 |
+
if header_src:
|
| 209 |
+
parts.append(
|
| 210 |
+
f'<div style="position:fixed;top:0;left:0;width:100%;height:2.54cm;'
|
| 211 |
+
f'margin:0;padding:0;overflow:hidden;z-index:1;">'
|
| 212 |
+
f'<img src="{header_src}" style="display:block;width:100%;'
|
| 213 |
+
f'height:100%;object-fit:fill;margin:0;padding:0;" /></div>'
|
| 214 |
+
)
|
| 215 |
+
if label_src:
|
| 216 |
+
parts.append(
|
| 217 |
+
f'<div style="position:fixed;top:0;right:0;width:1.65cm;'
|
| 218 |
+
f'height:23.42cm;z-index:2;overflow:hidden;">'
|
| 219 |
+
f'<img src="{label_src}" style="display:block;width:100%;'
|
| 220 |
+
f'height:100%;object-fit:fill;" /></div>'
|
| 221 |
+
)
|
| 222 |
+
|
| 223 |
+
overlay_html = (
|
| 224 |
+
'<!doctype html><html><head><meta charset="utf-8">'
|
| 225 |
+
'<style>'
|
| 226 |
+
'@page{size:A4;margin:0}'
|
| 227 |
+
'html,body{margin:0;padding:0;background:transparent}'
|
| 228 |
+
'</style></head><body>'
|
| 229 |
+
+ '\n'.join(parts)
|
| 230 |
+
+ '<div style="height:297mm;width:210mm;"></div>'
|
| 231 |
+
'</body></html>'
|
| 232 |
+
)
|
| 233 |
+
|
| 234 |
+
with tempfile.NamedTemporaryFile(
|
| 235 |
+
mode="w", suffix=".html", delete=False, encoding="utf-8",
|
| 236 |
+
) as tmp:
|
| 237 |
+
tmp.write(overlay_html)
|
| 238 |
+
tmp_path = tmp.name
|
| 239 |
+
|
| 240 |
+
try:
|
| 241 |
+
await page.goto(
|
| 242 |
+
Path(tmp_path).as_uri(),
|
| 243 |
+
wait_until="networkidle",
|
| 244 |
+
timeout=timeout,
|
| 245 |
+
)
|
| 246 |
+
await page.evaluate("() => document.fonts.ready")
|
| 247 |
+
await page.evaluate("""
|
| 248 |
+
() => {
|
| 249 |
+
const images = Array.from(document.querySelectorAll('img'));
|
| 250 |
+
return Promise.all(images.map(img => {
|
| 251 |
+
if (img.complete) return Promise.resolve();
|
| 252 |
+
return new Promise(r => {
|
| 253 |
+
img.addEventListener('load', r);
|
| 254 |
+
img.addEventListener('error', r);
|
| 255 |
+
});
|
| 256 |
+
}));
|
| 257 |
+
}
|
| 258 |
+
""")
|
| 259 |
+
await page.wait_for_timeout(300)
|
| 260 |
+
|
| 261 |
+
overlay_bytes = await page.pdf(
|
| 262 |
+
format=format,
|
| 263 |
+
print_background=True,
|
| 264 |
+
prefer_css_page_size=True,
|
| 265 |
+
margin={"top": "0", "right": "0", "bottom": "0", "left": "0"},
|
| 266 |
+
display_header_footer=False,
|
| 267 |
+
)
|
| 268 |
+
logger.info("Overlay PDF rendered, size=%d bytes", len(overlay_bytes))
|
| 269 |
+
return overlay_bytes
|
| 270 |
+
finally:
|
| 271 |
+
os.unlink(tmp_path)
|
| 272 |
+
|
| 273 |
+
|
| 274 |
+
def _stamp_overlay(
|
| 275 |
+
base_pdf: bytes,
|
| 276 |
+
overlay_pdf: bytes,
|
| 277 |
+
skip_front: int = 2,
|
| 278 |
+
skip_back: int = 4,
|
| 279 |
+
) -> bytes:
|
| 280 |
+
"""Merge overlay onto content pages of the base PDF.
|
| 281 |
+
|
| 282 |
+
Pages 0..(skip_front-1) and (total-skip_back)..(total-1) are left
|
| 283 |
+
untouched. All other pages get the overlay stamped on top.
|
| 284 |
+
"""
|
| 285 |
+
import io
|
| 286 |
+
from pypdf import PdfReader, PdfWriter
|
| 287 |
+
|
| 288 |
+
base = PdfReader(io.BytesIO(base_pdf))
|
| 289 |
+
overlay_reader = PdfReader(io.BytesIO(overlay_pdf))
|
| 290 |
+
overlay_page = overlay_reader.pages[0]
|
| 291 |
+
writer = PdfWriter()
|
| 292 |
+
|
| 293 |
+
total = len(base.pages)
|
| 294 |
+
first_content = skip_front # e.g. page index 2
|
| 295 |
+
last_content = total - skip_back - 1 # e.g. total-5
|
| 296 |
+
|
| 297 |
+
for i, pg in enumerate(base.pages):
|
| 298 |
+
if first_content <= i <= last_content:
|
| 299 |
+
pg.merge_page(overlay_page)
|
| 300 |
+
writer.add_page(pg)
|
| 301 |
+
|
| 302 |
+
buf = io.BytesIO()
|
| 303 |
+
writer.write(buf)
|
| 304 |
+
return buf.getvalue()
|
app/static/css/print.css
CHANGED
|
@@ -65,7 +65,7 @@ body {
|
|
| 65 |
width: 100%;
|
| 66 |
max-width: 100%;
|
| 67 |
margin: 0;
|
| 68 |
-
padding: 0
|
| 69 |
position: relative;
|
| 70 |
z-index: 1;
|
| 71 |
overflow: visible;
|
|
@@ -90,28 +90,23 @@ body {
|
|
| 90 |
|
| 91 |
/* ------------------------------
|
| 92 |
DECORATIVE RIGHT-SIDE LABEL
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
Original: 43.28cm × 3.31cm →
|
| 97 |
-
= 23.42cm × 1.65cm
|
| 98 |
------------------------------ */
|
| 99 |
.hb-right-label {
|
| 100 |
-
position:
|
| 101 |
-
top:
|
| 102 |
-
|
| 103 |
-
width:
|
| 104 |
-
height:
|
| 105 |
-
z-index: 10;
|
| 106 |
-
pointer-events: none;
|
| 107 |
overflow: hidden;
|
|
|
|
| 108 |
}
|
| 109 |
|
| 110 |
.hb-right-label img {
|
| 111 |
display: block;
|
| 112 |
-
width: 100%;
|
| 113 |
-
height: 100%;
|
| 114 |
-
object-fit: fill;
|
| 115 |
}
|
| 116 |
|
| 117 |
/* ------------------------------
|
|
|
|
| 65 |
width: 100%;
|
| 66 |
max-width: 100%;
|
| 67 |
margin: 0;
|
| 68 |
+
padding: 0;
|
| 69 |
position: relative;
|
| 70 |
z-index: 1;
|
| 71 |
overflow: visible;
|
|
|
|
| 90 |
|
| 91 |
/* ------------------------------
|
| 92 |
DECORATIVE RIGHT-SIDE LABEL
|
| 93 |
+
Hidden in the base HTML. Rendered as a PDF overlay
|
| 94 |
+
by pdf_renderer.py onto content pages only.
|
| 95 |
+
Kept here so the <img> loads for src extraction.
|
| 96 |
+
Original: 43.28cm × 3.31cm → 54% × 50% = 23.42cm × 1.65cm
|
|
|
|
| 97 |
------------------------------ */
|
| 98 |
.hb-right-label {
|
| 99 |
+
position: absolute;
|
| 100 |
+
top: -9999px;
|
| 101 |
+
left: -9999px;
|
| 102 |
+
width: 1px;
|
| 103 |
+
height: 1px;
|
|
|
|
|
|
|
| 104 |
overflow: hidden;
|
| 105 |
+
pointer-events: none;
|
| 106 |
}
|
| 107 |
|
| 108 |
.hb-right-label img {
|
| 109 |
display: block;
|
|
|
|
|
|
|
|
|
|
| 110 |
}
|
| 111 |
|
| 112 |
/* ------------------------------
|
requirements.txt
CHANGED
|
@@ -6,4 +6,5 @@ httpx>=0.27.0
|
|
| 6 |
jinja2>=3.1.0
|
| 7 |
markupsafe>=2.1.0
|
| 8 |
playwright>=1.40.0
|
|
|
|
| 9 |
python-dotenv>=1.0.0
|
|
|
|
| 6 |
jinja2>=3.1.0
|
| 7 |
markupsafe>=2.1.0
|
| 8 |
playwright>=1.40.0
|
| 9 |
+
pypdf>=4.0.0
|
| 10 |
python-dotenv>=1.0.0
|