MatteoScript commited on
Commit
7687049
·
verified ·
1 Parent(s): 039524a

Upload 6 files

Browse files
src/reporter/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """BDC report generator package."""
src/reporter/docx_fill.py ADDED
@@ -0,0 +1,591 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ import textwrap
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+ from typing import Dict, Iterable, List, Tuple
8
+
9
+ import numpy as np
10
+ import pandas as pd
11
+
12
+ import matplotlib
13
+ matplotlib.use("Agg")
14
+ import matplotlib.pyplot as plt
15
+
16
+ from docx import Document
17
+ from docx.enum.table import WD_CELL_VERTICAL_ALIGNMENT, WD_TABLE_ALIGNMENT
18
+ from docx.enum.text import WD_ALIGN_PARAGRAPH
19
+ from docx.oxml import OxmlElement
20
+ from docx.oxml.ns import qn
21
+ from docx.shared import Cm, Pt
22
+
23
+ SCORE_MAP: Dict[str, int] = {
24
+ "Absent": 1,
25
+ "Minimum": 2,
26
+ "Sufficient": 3,
27
+ "Good": 4,
28
+ "Excellent": 5,
29
+ "Top": 6,
30
+ }
31
+
32
+
33
+ def to_score(x) -> float:
34
+ if pd.isna(x):
35
+ return float("nan")
36
+ if isinstance(x, (int, float, np.integer, np.floating)):
37
+ return float(x)
38
+ s = str(x).strip()
39
+ return float(SCORE_MAP.get(s, np.nan))
40
+
41
+
42
+ def label_color(label) -> str:
43
+ """Return hex fill for a verbal label (no '#')."""
44
+ if pd.isna(label):
45
+ return "FFFFFF"
46
+ s = str(label).strip()
47
+ if s in ("Top", "Excellent"):
48
+ return "C6EFCE" # light green
49
+ if s in ("Good", "Sufficient"):
50
+ return "FFEB9C" # light yellow
51
+ if s in ("Minimum", "Absent"):
52
+ return "FFC7CE" # light red
53
+ return "FFFFFF"
54
+
55
+
56
+ def extract_competence_blocks(columns: Iterable[str]) -> List[dict]:
57
+ """Infer competences from 'Commento qualitativo - ...' blocks.
58
+
59
+ For each competence, we assume exactly 4 indicator columns immediately
60
+ before the comment column.
61
+ """
62
+ cols = list(columns)
63
+ comment_cols = [
64
+ c
65
+ for c in cols
66
+ if isinstance(c, str) and c.strip().lower().startswith("commento qualitativo -")
67
+ ]
68
+ blocks = []
69
+ for c in comment_cols:
70
+ idx = cols.index(c)
71
+ indicator_cols = cols[idx - 4 : idx]
72
+ name = c.split("-", 1)[1].strip()
73
+ blocks.append({"name": name, "indicator_cols": indicator_cols, "comment_col": c})
74
+ return blocks
75
+
76
+
77
+ def wrap_label(s: str, width: int = 14) -> str:
78
+ return "\n".join(textwrap.wrap(str(s), width=width, break_long_words=False))
79
+
80
+
81
+ def radar_chart(names: List[str], auto_vals: List[float], valut_vals: List[float], out_png: Path) -> None:
82
+ """Radar con 2 sole serie (AUTO vs VALUT).
83
+
84
+ Nota estetica: niente aree piene (o riempimento quasi trasparente) per evitare l'effetto
85
+ "troppe aree" con 11 competenze; legenda grande e fuori dal grafico.
86
+ """
87
+
88
+ labels = [wrap_label(n, 18) for n in names]
89
+ n = len(labels)
90
+ angles = np.linspace(0, 2 * np.pi, n, endpoint=False).tolist()
91
+ angles += angles[:1]
92
+
93
+ a = list(auto_vals) + [auto_vals[0]]
94
+ v = list(valut_vals) + [valut_vals[0]]
95
+
96
+ # Figura più larga per ospitare la legenda fuori dal grafico
97
+ fig = plt.figure(figsize=(9.0, 7.2), dpi=220)
98
+ ax = plt.subplot(111, polar=True)
99
+
100
+ ax.set_theta_offset(np.pi / 2)
101
+ ax.set_theta_direction(-1)
102
+
103
+ ax.set_thetagrids(np.degrees(angles[:-1]), labels, fontsize=9)
104
+ ax.tick_params(axis='x', pad=28)
105
+ ax.set_ylim(0, 6)
106
+ ax.set_yticks([1, 2, 3, 4, 5, 6])
107
+ ax.set_yticklabels(["1", "2", "3", "4", "5", "6"], fontsize=8)
108
+
109
+ # Linee (niente riempimento) per una lettura più pulita
110
+ ax.plot(angles, v, linewidth=2.4, color="#1f77b4", label="Valutazione")
111
+ ax.plot(angles, a, linewidth=2.4, color="#ff7f0e", label="Autovalutazione")
112
+
113
+ # Griglia un filo più leggera
114
+ ax.grid(alpha=0.35)
115
+
116
+ # Legenda: grande e fuori, dentro la figura (a destra)
117
+ ax.legend(
118
+ loc="center left",
119
+ bbox_to_anchor=(1.04, 0.5),
120
+ frameon=False,
121
+ fontsize=12,
122
+ )
123
+
124
+ # Lascia spazio a destra per la legenda
125
+ fig.subplots_adjust(left=0.05, right=0.80, top=0.95, bottom=0.07)
126
+
127
+ fig.savefig(out_png, transparent=True, bbox_inches="tight", pad_inches=0.25)
128
+ plt.close(fig)
129
+
130
+
131
+ def bar_chart(auto_mean: float, valut_mean: float, out_png: Path) -> None:
132
+ """Barre AUTO vs VALUT con legenda grande fuori dal grafico."""
133
+
134
+ fig = plt.figure(figsize=(7.2, 3.4), dpi=220)
135
+ ax = plt.gca()
136
+
137
+ ax.bar([0], [valut_mean], width=0.42, color="#1f77b4", label="Valutazione")
138
+ ax.bar([0.5], [auto_mean], width=0.42, color="#ff7f0e", label="Autovalutazione")
139
+
140
+ ax.set_ylim(0, 6)
141
+ ax.set_xticks([0.25])
142
+ ax.set_xticklabels([""], fontsize=10)
143
+ ax.set_yticks([1, 2, 3, 4, 5, 6])
144
+ ax.grid(axis="y", alpha=0.28)
145
+
146
+ for x, y in [(0, valut_mean), (0.5, auto_mean)]:
147
+ ax.text(x, y + 0.12, f"{y:.2f}", ha="center", va="bottom", fontsize=10)
148
+
149
+ # Legenda fuori (a destra), più grande
150
+ ax.legend(
151
+ loc="center left",
152
+ bbox_to_anchor=(1.01, 0.8),
153
+ frameon=False,
154
+ fontsize=11,
155
+ )
156
+
157
+ fig.subplots_adjust(left=0.08, right=0.80, top=0.92, bottom=0.18)
158
+ fig.savefig(out_png, transparent=True, bbox_inches="tight", pad_inches=0.18)
159
+ plt.close(fig)
160
+ def _set_cell_shading(cell, fill: str) -> None:
161
+ tcPr = cell._tc.get_or_add_tcPr()
162
+ shd = OxmlElement("w:shd")
163
+ shd.set(qn("w:val"), "clear")
164
+ shd.set(qn("w:color"), "auto")
165
+ shd.set(qn("w:fill"), fill)
166
+ tcPr.append(shd)
167
+
168
+
169
+ def _set_cell_text(cell, text, *, bold=False, align="left", font_size=9) -> None:
170
+ cell.text = ""
171
+ p = cell.paragraphs[0]
172
+ run = p.add_run(str(text) if text is not None else "")
173
+ run.bold = bold
174
+ run.font.size = Pt(font_size)
175
+ if align == "center":
176
+ p.alignment = WD_ALIGN_PARAGRAPH.CENTER
177
+ elif align == "right":
178
+ p.alignment = WD_ALIGN_PARAGRAPH.RIGHT
179
+ else:
180
+ p.alignment = WD_ALIGN_PARAGRAPH.LEFT
181
+ cell.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER
182
+
183
+
184
+ def _insert_table_after(paragraph, rows: int, cols: int, width_cm: float = 17.0):
185
+ tbl = paragraph._parent.add_table(rows=rows, cols=cols, width=Cm(width_cm))
186
+ paragraph._p.addnext(tbl._tbl)
187
+ return tbl
188
+
189
+
190
+ def _delete_paragraph(paragraph) -> None:
191
+ p = paragraph._element
192
+ p.getparent().remove(p)
193
+ paragraph._p = paragraph._element = None
194
+
195
+
196
+ def _clear_paragraph(paragraph) -> None:
197
+ for r in paragraph.runs:
198
+ r.text = ""
199
+
200
+
201
+ def _replace_paragraph_with_picture(paragraph, image_path: Path, *, width_cm: float) -> None:
202
+ _clear_paragraph(paragraph)
203
+ run = paragraph.add_run()
204
+ run.add_picture(str(image_path), width=Cm(width_cm))
205
+ paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
206
+
207
+
208
+ def _table_header(tbl, headers: List[str]) -> None:
209
+ for j, h in enumerate(headers):
210
+ c = tbl.cell(0, j)
211
+ _set_cell_text(c, h, bold=True, align="center", font_size=9)
212
+ _set_cell_shading(c, "D9D9D9")
213
+
214
+
215
+ def _build_table_3_2(paragraph, comp_df: pd.DataFrame) -> None:
216
+ tbl = _insert_table_after(paragraph, rows=len(comp_df) + 1, cols=3)
217
+ tbl.alignment = WD_TABLE_ALIGNMENT.CENTER
218
+ tbl.style = "Table Grid"
219
+ _table_header(tbl, ["Competenza", "Autovalutazione", "Valutazione"])
220
+
221
+ for i, (_, row) in enumerate(comp_df.iterrows(), start=1):
222
+ _set_cell_text(tbl.cell(i, 0), row["competenza"], align="left", font_size=9)
223
+ _set_cell_text(tbl.cell(i, 1), f"{row['auto']:.2f}", align="center")
224
+ _set_cell_text(tbl.cell(i, 2), f"{row['valut']:.2f}", align="center")
225
+
226
+ tbl.columns[0].width = Cm(12.5)
227
+ tbl.columns[1].width = Cm(2.5)
228
+ tbl.columns[2].width = Cm(2.5)
229
+ _delete_paragraph(paragraph)
230
+
231
+
232
+ def _build_table_gap_4_1(paragraph, df: pd.DataFrame) -> None:
233
+ tbl = _insert_table_after(paragraph, rows=len(df) + 1, cols=5)
234
+ tbl.alignment = WD_TABLE_ALIGNMENT.CENTER
235
+ tbl.style = "Table Grid"
236
+ _table_header(tbl, ["Competenza", "Autoval.", "Valut.", "Gap", "Trend"])
237
+
238
+ for i, (_, r) in enumerate(df.iterrows(), start=1):
239
+ _set_cell_text(tbl.cell(i, 0), r["competenza"], align="left", font_size=9)
240
+ _set_cell_text(tbl.cell(i, 1), f"{r['auto']:.2f}", align="center")
241
+ _set_cell_text(tbl.cell(i, 2), f"{r['valut']:.2f}", align="center")
242
+ _set_cell_text(tbl.cell(i, 3), f"{r['diff']:+.2f}", align="center")
243
+ _set_cell_text(tbl.cell(i, 4), r["trend"], align="center", font_size=11)
244
+
245
+ tbl.columns[0].width = Cm(10.8)
246
+ for j in range(1, 5):
247
+ tbl.columns[j].width = Cm(1.9)
248
+ _delete_paragraph(paragraph)
249
+
250
+
251
+ def _build_table_gap_4_2(paragraph, df: pd.DataFrame) -> None:
252
+ tbl = _insert_table_after(paragraph, rows=len(df) + 1, cols=3)
253
+ tbl.alignment = WD_TABLE_ALIGNMENT.CENTER
254
+ tbl.style = "Table Grid"
255
+ _table_header(tbl, ["Competenza", "Valut.", "Gap da Top"])
256
+
257
+ for i, (_, r) in enumerate(df.iterrows(), start=1):
258
+ _set_cell_text(tbl.cell(i, 0), r["competenza"], align="left", font_size=9)
259
+ _set_cell_text(tbl.cell(i, 1), f"{r['valut']:.2f}", align="center")
260
+ _set_cell_text(tbl.cell(i, 2), f"{r['gap_top']:.2f}", align="center")
261
+
262
+ tbl.columns[0].width = Cm(12.5)
263
+ tbl.columns[1].width = Cm(2.5)
264
+ tbl.columns[2].width = Cm(2.5)
265
+ _delete_paragraph(paragraph)
266
+
267
+
268
+ def _build_table_indicators(paragraph, indicators: List[dict]) -> None:
269
+ tbl = _insert_table_after(paragraph, rows=len(indicators) + 1, cols=3)
270
+ tbl.alignment = WD_TABLE_ALIGNMENT.CENTER
271
+ tbl.style = "Table Grid"
272
+ _table_header(tbl, ["Comportamento osservabile", "Autovalutazione", "Valutazione"])
273
+
274
+ for i, ind in enumerate(indicators, start=1):
275
+ _set_cell_text(tbl.cell(i, 0), ind["text"], align="left", font_size=8.5)
276
+
277
+ cA = tbl.cell(i, 1)
278
+ _set_cell_text(cA, ind["auto_label"], align="center")
279
+ _set_cell_shading(cA, label_color(ind["auto_label"]))
280
+
281
+ cV = tbl.cell(i, 2)
282
+ _set_cell_text(cV, ind["valut_label"], align="center")
283
+ _set_cell_shading(cV, label_color(ind["valut_label"]))
284
+
285
+ tbl.columns[0].width = Cm(12.0)
286
+ tbl.columns[1].width = Cm(2.6)
287
+ tbl.columns[2].width = Cm(2.6)
288
+ _delete_paragraph(paragraph)
289
+
290
+
291
+ def _build_table_comments(paragraph, auto_comment, valut_comment) -> None:
292
+ tbl = _insert_table_after(paragraph, rows=3, cols=2)
293
+ tbl.alignment = WD_TABLE_ALIGNMENT.CENTER
294
+ tbl.style = "Table Grid"
295
+
296
+ _table_header(tbl, ["Fonte", "Commento qualitativo"])
297
+ _set_cell_text(tbl.cell(1, 0), "Autovalutazione", bold=True, align="left")
298
+ _set_cell_text(tbl.cell(1, 1), auto_comment if pd.notna(auto_comment) else "", align="left")
299
+ _set_cell_text(tbl.cell(2, 0), "Valutazione", bold=True, align="left")
300
+ _set_cell_text(tbl.cell(2, 1), valut_comment if pd.notna(valut_comment) else "", align="left")
301
+
302
+ tbl.columns[0].width = Cm(4.0)
303
+ tbl.columns[1].width = Cm(13.4)
304
+ _delete_paragraph(paragraph)
305
+
306
+
307
+ def _build_table_behaviors(paragraph, rows: List[dict]) -> None:
308
+ tbl = _insert_table_after(paragraph, rows=len(rows) + 1, cols=3)
309
+ tbl.alignment = WD_TABLE_ALIGNMENT.CENTER
310
+ tbl.style = "Table Grid"
311
+ _table_header(tbl, ["Comportamento osservabile", "Competenza", "Valutazione"])
312
+
313
+ for i, r in enumerate(rows, start=1):
314
+ _set_cell_text(tbl.cell(i, 0), r["indicator"], align="left", font_size=8.5)
315
+ _set_cell_text(tbl.cell(i, 1), r["competenza"], align="left", font_size=8.5)
316
+ c = tbl.cell(i, 2)
317
+ _set_cell_text(c, r["label"], align="center")
318
+ _set_cell_shading(c, label_color(r["label"]))
319
+
320
+ tbl.columns[0].width = Cm(9.5)
321
+ tbl.columns[1].width = Cm(5.8)
322
+ tbl.columns[2].width = Cm(2.8)
323
+ _delete_paragraph(paragraph)
324
+
325
+
326
+ def _build_table_tech(paragraph, auto_text, valut_text) -> None:
327
+ tbl = _insert_table_after(paragraph, rows=2, cols=2)
328
+ tbl.alignment = WD_TABLE_ALIGNMENT.CENTER
329
+ tbl.style = "Table Grid"
330
+
331
+ _table_header(tbl, ["Autovalutazione", "Valutazione manager"])
332
+ _set_cell_text(tbl.cell(1, 0), auto_text if pd.notna(auto_text) else "", align="left")
333
+ _set_cell_text(tbl.cell(1, 1), valut_text if pd.notna(valut_text) else "", align="left")
334
+
335
+ tbl.columns[0].width = Cm(8.6)
336
+ tbl.columns[1].width = Cm(8.6)
337
+ _delete_paragraph(paragraph)
338
+
339
+
340
+ def _build_table_feedback(paragraph, qas: List[Tuple[str, str]]) -> None:
341
+ tbl = _insert_table_after(paragraph, rows=len(qas) + 1, cols=2)
342
+ tbl.alignment = WD_TABLE_ALIGNMENT.CENTER
343
+ tbl.style = "Table Grid"
344
+ _table_header(tbl, ["Domanda", "Risposta"])
345
+
346
+ for i, (q, a) in enumerate(qas, start=1):
347
+ _set_cell_text(tbl.cell(i, 0), q, align="left", font_size=8.5)
348
+ _set_cell_text(tbl.cell(i, 1), a if pd.notna(a) else "", align="left")
349
+
350
+ tbl.columns[0].width = Cm(6.5)
351
+ tbl.columns[1].width = Cm(10.7)
352
+ _delete_paragraph(paragraph)
353
+
354
+
355
+ def _build_table_priority(paragraph, priorities: List[str], valut_by_comp: Dict[str, float]) -> None:
356
+ rows = []
357
+ for rank, comp in enumerate(priorities, start=1):
358
+ key = comp.lower()
359
+ if key in valut_by_comp:
360
+ rows.append((rank, comp, valut_by_comp[key]))
361
+
362
+ tbl = _insert_table_after(paragraph, rows=len(rows) + 1, cols=3)
363
+ tbl.alignment = WD_TABLE_ALIGNMENT.CENTER
364
+ tbl.style = "Table Grid"
365
+ _table_header(tbl, ["Priorità", "Competenza", "Valutazione"])
366
+
367
+ for i, (rank, comp, val) in enumerate(rows, start=1):
368
+ _set_cell_text(tbl.cell(i, 0), str(rank), align="center")
369
+ _set_cell_text(tbl.cell(i, 1), comp, align="left")
370
+ _set_cell_text(tbl.cell(i, 2), f"{val:.2f}", align="center")
371
+
372
+ tbl.columns[0].width = Cm(2.0)
373
+ tbl.columns[1].width = Cm(12.5)
374
+ tbl.columns[2].width = Cm(2.5)
375
+ _delete_paragraph(paragraph)
376
+
377
+
378
+ @dataclass
379
+ class PersonData:
380
+ name: str
381
+ comps: List[dict]
382
+ auto_row: pd.Series
383
+ valut_row: pd.Series
384
+
385
+
386
+ def build_person_data(df_auto: pd.DataFrame, df_valut: pd.DataFrame, name: str) -> PersonData:
387
+ # Robust selection: if a row is missing in AUTO or VALUT, we keep NaN/empty values.
388
+ if "Nome e cognome" not in df_auto.columns:
389
+ raise ValueError("Colonna 'Nome e cognome' non trovata nel file AUTO.")
390
+ if "Nome e cognome" not in df_valut.columns:
391
+ raise ValueError("Colonna 'Nome e cognome' non trovata nel file VALUT.")
392
+
393
+ auto_match = df_auto[df_auto["Nome e cognome"] == name]
394
+ valut_match = df_valut[df_valut["Nome e cognome"] == name]
395
+
396
+ auto_row = auto_match.iloc[-1] if len(auto_match) else pd.Series({c: np.nan for c in df_auto.columns})
397
+ valut_row = valut_match.iloc[-1] if len(valut_match) else pd.Series({c: np.nan for c in df_valut.columns})
398
+
399
+ blocks = extract_competence_blocks(df_auto.columns)
400
+ comps = []
401
+
402
+ for b in blocks:
403
+ auto_labels = [auto_row[c] for c in b["indicator_cols"]]
404
+ valut_labels = [valut_row.get(c, np.nan) for c in b["indicator_cols"]]
405
+
406
+ auto_scores = [to_score(x) for x in auto_labels]
407
+ valut_scores = [to_score(x) for x in valut_labels]
408
+
409
+ comps.append(
410
+ {
411
+ "name": b["name"],
412
+ "indicator_texts": b["indicator_cols"],
413
+ "auto_labels": auto_labels,
414
+ "valut_labels": valut_labels,
415
+ "auto_scores": auto_scores,
416
+ "valut_scores": valut_scores,
417
+ "auto_mean": float(np.nanmean(auto_scores)),
418
+ "valut_mean": float(np.nanmean(valut_scores)),
419
+ "auto_comment": auto_row[b["comment_col"]],
420
+ "valut_comment": valut_row.get(b["comment_col"], np.nan),
421
+ }
422
+ )
423
+
424
+ return PersonData(name=name, comps=comps, auto_row=auto_row, valut_row=valut_row)
425
+
426
+
427
+ def fill_template(
428
+ template_path: Path,
429
+ out_docx: Path,
430
+ df_auto: pd.DataFrame,
431
+ df_valut: pd.DataFrame,
432
+ person_name: str,
433
+ kind: str,
434
+ *,
435
+ workdir: Path,
436
+ ) -> Path:
437
+ """Fill a Word template replacing only placeholders (template formatting stays intact)."""
438
+
439
+ doc = Document(str(template_path))
440
+ pdata = build_person_data(df_auto, df_valut, person_name)
441
+ comps = pdata.comps
442
+
443
+ comp_df = pd.DataFrame(
444
+ [{"competenza": c["name"], "auto": c["auto_mean"], "valut": c["valut_mean"]} for c in comps]
445
+ )
446
+ comp_df_sorted = comp_df.sort_values("valut", ascending=False).reset_index(drop=True)
447
+
448
+ gap_df = comp_df.copy()
449
+ gap_df["diff"] = gap_df["valut"] - gap_df["auto"]
450
+
451
+ def trend(diff: float) -> str:
452
+ if -0.5 <= diff <= 0.5:
453
+ return "↔"
454
+ if diff < -0.5:
455
+ return "↑"
456
+ return "↓"
457
+
458
+ gap_df["trend"] = gap_df["diff"].apply(trend)
459
+ gap_df["abs"] = gap_df["diff"].abs()
460
+ gap_df = gap_df.sort_values(["abs", "diff"], ascending=[False, False]).drop(columns=["abs"]).reset_index(drop=True)
461
+
462
+ gtop = comp_df.copy()
463
+ gtop["gap_top"] = 6 - gtop["valut"]
464
+ gtop = gtop.sort_values("gap_top", ascending=False).reset_index(drop=True)
465
+
466
+ behaviors = []
467
+ for c in comps:
468
+ for txt, label, score in zip(c["indicator_texts"], c["valut_labels"], c["valut_scores"]):
469
+ if pd.notna(score):
470
+ behaviors.append({"indicator": txt, "competenza": c["name"], "label": label, "score": float(score)})
471
+ beh_df = pd.DataFrame(behaviors)
472
+ beh_top = beh_df.sort_values("score", ascending=False).head(10).to_dict("records")
473
+ beh_bot = beh_df.sort_values("score", ascending=True).head(10).to_dict("records")
474
+
475
+ # Charts
476
+ img_dir = workdir / re.sub(r"[^A-Za-z0-9_-]+", "_", person_name)
477
+ img_dir.mkdir(parents=True, exist_ok=True)
478
+
479
+ radar_png = img_dir / "radar.png"
480
+ radar_chart([c["name"] for c in comps], [c["auto_mean"] for c in comps], [c["valut_mean"] for c in comps], radar_png)
481
+
482
+ comp_bar: Dict[int, Path] = {}
483
+ for idx, c in enumerate(comps, start=1):
484
+ png = img_dir / f"bar_{idx}.png"
485
+ bar_chart(c["auto_mean"], c["valut_mean"], png)
486
+ comp_bar[idx] = png
487
+
488
+ # Qualitative
489
+ fb_qs = [
490
+ "Quale comportamento/atteggiamento dovrebbe continuare ad agire il mio responsabile?",
491
+ "Quale comportamento/atteggiamento dovrebbe iniziare ad agire?",
492
+ "Quale comportamento/atteggiamento suggerisco di smettere di agire?",
493
+ ]
494
+ qas = [(q, pdata.auto_row.get(q, "")) for q in fb_qs]
495
+
496
+ auto_tech = ""
497
+ val_tech = ""
498
+ if kind == "collaboratori":
499
+ auto_tech_q = [
500
+ c for c in df_auto.columns if isinstance(c, str) and c.strip().lower().startswith("indica 1 competenza tecnica")
501
+ ]
502
+ val_tech_q = [
503
+ c for c in df_valut.columns if isinstance(c, str) and c.strip().lower().startswith("indica 1 competenza tecnica")
504
+ ]
505
+ if auto_tech_q:
506
+ auto_tech = pdata.auto_row.get(auto_tech_q[0], "")
507
+ if val_tech_q:
508
+ val_tech = pdata.valut_row.get(val_tech_q[0], "")
509
+
510
+ priorities = [
511
+ "Attenzione alla qualità",
512
+ "Capacità di comunicazione efficace e ascolto attivo",
513
+ "Spirito di iniziativa e orientamento al risultato",
514
+ "Proporre decisioni e lavorare con senso di responsabilità",
515
+ "Orientamento al cliente (interno/esterno)",
516
+ ]
517
+ valut_by_comp = {c["name"].lower(): float(c["valut_mean"]) for c in comps}
518
+
519
+ # Replace placeholders
520
+ done_radar = False
521
+ for p in list(doc.paragraphs):
522
+ t = p.text.strip().replace("\t", "")
523
+
524
+ if t == "[@NomeCognome]":
525
+ # Mantieni lo stile del template: sostituisci solo il placeholder.
526
+ _clear_paragraph(p)
527
+ p.add_run(person_name)
528
+
529
+ elif t == "[@GraficoSezione3.2]":
530
+ if kind == "manager" and done_radar:
531
+ _delete_paragraph(p)
532
+ else:
533
+ _replace_paragraph_with_picture(p, radar_png, width_cm=16.2)
534
+ done_radar = True
535
+
536
+ elif t == "[@TabellaSezione3.2]":
537
+ _build_table_3_2(p, comp_df_sorted)
538
+
539
+ elif t == "[@TabellaSezione4.1]":
540
+ _build_table_gap_4_1(p, gap_df)
541
+
542
+ elif t == "[@TabellaSezione4.2]":
543
+ _build_table_gap_4_2(p, gtop)
544
+
545
+ else:
546
+ m = re.fullmatch(r"\[@GraficoSezione5\.(\d+)\]", t)
547
+ if m:
548
+ idx = int(m.group(1))
549
+ if idx in comp_bar:
550
+ _replace_paragraph_with_picture(p, comp_bar[idx], width_cm=15.6)
551
+ continue
552
+
553
+ m = re.fullmatch(r"\[@Tabella1Sezione5\.(\d+)\]", t)
554
+ if m:
555
+ idx = int(m.group(1))
556
+ if 1 <= idx <= len(comps):
557
+ c = comps[idx - 1]
558
+ indicators = [
559
+ {"text": txt, "auto_label": al, "valut_label": vl}
560
+ for txt, al, vl in zip(c["indicator_texts"], c["auto_labels"], c["valut_labels"])
561
+ ]
562
+ _build_table_indicators(p, indicators)
563
+ continue
564
+
565
+ m = re.fullmatch(r"\[@Tabella2Sezione5\.(\d+)\]", t)
566
+ if m:
567
+ idx = int(m.group(1))
568
+ if 1 <= idx <= len(comps):
569
+ c = comps[idx - 1]
570
+ _build_table_comments(p, c["auto_comment"], c["valut_comment"])
571
+ continue
572
+
573
+ if t == "[@TabellaSezione6.1]":
574
+ _build_table_behaviors(p, beh_top)
575
+ elif t == "[@TabellaSezione6.2]":
576
+ _build_table_behaviors(p, beh_bot)
577
+ elif t == "[@TabellaSezione7.1]":
578
+ if kind == "collaboratori":
579
+ _build_table_tech(p, auto_tech, val_tech)
580
+ else:
581
+ _build_table_feedback(p, qas)
582
+ elif t == "[@TabellaSezione7.2]":
583
+ _build_table_feedback(p, qas)
584
+ elif t == "[@TabellaSezione8.1]":
585
+ if kind == "collaboratori":
586
+ _build_table_priority(p, priorities, valut_by_comp)
587
+ else:
588
+ _delete_paragraph(p)
589
+
590
+ doc.save(str(out_docx))
591
+ return out_docx
src/reporter/generate.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+ from typing import Iterable, List, Optional, Tuple, Dict
7
+
8
+ import pandas as pd
9
+
10
+ from .docx_fill import fill_template
11
+ from .io import load_pair
12
+
13
+
14
+ @dataclass
15
+ class GeneratedArtifact:
16
+ kind: str # "collaboratori" | "manager"
17
+ person: str
18
+ docx_path: Path
19
+ pdf_path: Optional[Path] = None
20
+ notes: str = ""
21
+
22
+
23
+ @dataclass
24
+ class GenerateResult:
25
+ produced: List[GeneratedArtifact]
26
+ warnings: List[str]
27
+
28
+
29
+ def _safe_filename(name: str) -> str:
30
+ name = str(name).strip()
31
+ name = re.sub(r"\s+", " ", name)
32
+ name = re.sub(r"[^A-Za-z0-9 _-]+", "", name)
33
+ return name.replace(" ", "_")
34
+
35
+
36
+ def list_people(auto_path: Path, valut_path: Path) -> List[str]:
37
+ """Return UNION of names found in AUTO and VALUT (dedup)."""
38
+ df_auto, df_val = load_pair(auto_path, valut_path)
39
+ a = set(df_auto.get("Nome e cognome", []))
40
+ b = set(df_val.get("Nome e cognome", []))
41
+ names = sorted({x for x in a.union(b) if isinstance(x, str) and x.strip()})
42
+ return names
43
+
44
+
45
+ def _generate_one(
46
+ *,
47
+ kind: str,
48
+ person_name: str,
49
+ df_auto: pd.DataFrame,
50
+ df_val: pd.DataFrame,
51
+ template_path: Path,
52
+ output_dir: Path,
53
+ workdir: Path,
54
+ make_pdf: bool,
55
+ ) -> GeneratedArtifact:
56
+ base = f"REPORT_{kind}_{_safe_filename(person_name)}"
57
+ out_docx = output_dir / f"{base}.docx"
58
+ out_pdf = output_dir / f"{base}.pdf"
59
+
60
+ notes = ""
61
+ # Fill template
62
+ fill_template(
63
+ template_path=template_path,
64
+ out_docx=out_docx,
65
+ df_auto=df_auto,
66
+ df_valut=df_val,
67
+ person_name=person_name,
68
+ kind=kind,
69
+ workdir=workdir,
70
+ )
71
+
72
+ pdf_path: Optional[Path] = None
73
+ if make_pdf:
74
+ from .pdf_convert import docx_to_pdf
75
+
76
+ try:
77
+ docx_to_pdf(out_docx, out_pdf)
78
+ pdf_path = out_pdf
79
+ except Exception as e:
80
+ notes = f"PDF non generato: {e}"
81
+ pdf_path = None
82
+
83
+ return GeneratedArtifact(kind=kind, person=person_name, docx_path=out_docx, pdf_path=pdf_path, notes=notes)
84
+
85
+
86
+ def generate_selected(
87
+ *,
88
+ collab_auto: Optional[Path],
89
+ collab_valut: Optional[Path],
90
+ collab_template: Optional[Path],
91
+ manager_auto: Optional[Path],
92
+ manager_valut: Optional[Path],
93
+ manager_template: Optional[Path],
94
+ selected_collaboratori: Iterable[str],
95
+ selected_manager: Iterable[str],
96
+ output_dir: Path,
97
+ make_pdf: bool,
98
+ ) -> GenerateResult:
99
+ produced: List[GeneratedArtifact] = []
100
+ warnings: List[str] = []
101
+
102
+ output_dir.mkdir(parents=True, exist_ok=True)
103
+ workdir = output_dir / "_work"
104
+ workdir.mkdir(parents=True, exist_ok=True)
105
+
106
+ if collab_auto and collab_valut and collab_template:
107
+ df_auto, df_val = load_pair(collab_auto, collab_valut)
108
+ for person in selected_collaboratori:
109
+ try:
110
+ produced.append(
111
+ _generate_one(
112
+ kind="collaboratori",
113
+ person_name=person,
114
+ df_auto=df_auto,
115
+ df_val=df_val,
116
+ template_path=collab_template,
117
+ output_dir=output_dir,
118
+ workdir=workdir / "collaboratori",
119
+ make_pdf=make_pdf,
120
+ )
121
+ )
122
+ except Exception as e:
123
+ warnings.append(f"[Collaboratori] {person}: errore generazione ({e})")
124
+ else:
125
+ if any([collab_auto, collab_valut, collab_template]):
126
+ warnings.append("Collaboratori: mancano uno o più file (AUTO/VALUT/TEMPLATE).")
127
+
128
+ if manager_auto and manager_valut and manager_template:
129
+ df_auto, df_val = load_pair(manager_auto, manager_valut)
130
+ for person in selected_manager:
131
+ try:
132
+ produced.append(
133
+ _generate_one(
134
+ kind="manager",
135
+ person_name=person,
136
+ df_auto=df_auto,
137
+ df_val=df_val,
138
+ template_path=manager_template,
139
+ output_dir=output_dir,
140
+ workdir=workdir / "manager",
141
+ make_pdf=make_pdf,
142
+ )
143
+ )
144
+ except Exception as e:
145
+ warnings.append(f"[Manager] {person}: errore generazione ({e})")
146
+ else:
147
+ if any([manager_auto, manager_valut, manager_template]):
148
+ warnings.append("Manager: mancano uno o più file (AUTO/VALUT/TEMPLATE).")
149
+
150
+ return GenerateResult(produced=produced, warnings=warnings)
src/reporter/io.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Tuple
5
+
6
+ import pandas as pd
7
+
8
+
9
+ def _parse_ts(df: pd.DataFrame) -> pd.DataFrame:
10
+ # Google Forms export uses this column name
11
+ if "Informazioni cronologiche" in df.columns:
12
+ df = df.copy()
13
+ df["_ts"] = pd.to_datetime(df["Informazioni cronologiche"], errors="coerce")
14
+ df = df.sort_values("_ts")
15
+ else:
16
+ df = df.copy()
17
+ df["_ts"] = pd.NaT
18
+ return df
19
+
20
+
21
+ def load_and_dedup(excel_path: Path) -> pd.DataFrame:
22
+ df = pd.read_excel(excel_path, sheet_name=0)
23
+ df = _parse_ts(df)
24
+ if "Nome e cognome" in df.columns:
25
+ df = df.drop_duplicates(subset=["Nome e cognome"], keep="last")
26
+ return df.reset_index(drop=True)
27
+
28
+
29
+ def load_pair(auto_path: Path, valut_path: Path) -> Tuple[pd.DataFrame, pd.DataFrame]:
30
+ auto_df = load_and_dedup(auto_path)
31
+ valut_df = load_and_dedup(valut_path)
32
+ return auto_df, valut_df
src/reporter/pdf_convert.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """DOCX -> PDF conversion (no Aspose).
2
+
3
+ Obiettivo: ottenere un PDF *fedele* al template Word (grafica, tabelle, immagini, header/footer).
4
+
5
+ Scelta converter:
6
+ 1) LibreOffice headless (consigliato su Linux / HF Spaces) ✅ alta fedeltà
7
+ 2) docx2pdf (solo Windows/macOS con MS Word)
8
+ 3) fallback PyMuPDF (bassa fedeltà: solo come ultima spiaggia)
9
+
10
+ Su HuggingFace Spaces puoi installare LibreOffice con `packages.txt`:
11
+ - libreoffice
12
+ - libreoffice-writer
13
+ - fonts-dejavu-core (o altri font richiesti dal template)
14
+
15
+ Questo non richiede Aspose.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import shutil
21
+ import subprocess
22
+ from dataclasses import dataclass
23
+ from pathlib import Path
24
+ from typing import Optional
25
+
26
+ import fitz # PyMuPDF
27
+
28
+
29
+ @dataclass
30
+ class PdfConvertResult:
31
+ backend: str
32
+ removed_blank_pages: int = 0
33
+
34
+
35
+ def _loffice_available() -> Optional[str]:
36
+ return shutil.which("soffice") or shutil.which("libreoffice")
37
+
38
+
39
+ def _convert_with_libreoffice(docx_path: Path, pdf_path: Path) -> None:
40
+ """Convert using LibreOffice headless."""
41
+ docx_path = Path(docx_path)
42
+ pdf_path = Path(pdf_path)
43
+ outdir = pdf_path.parent
44
+ outdir.mkdir(parents=True, exist_ok=True)
45
+
46
+ cmd = [
47
+ _loffice_available(),
48
+ "--headless",
49
+ "--nologo",
50
+ "--nofirststartwizard",
51
+ "--convert-to",
52
+ "pdf",
53
+ "--outdir",
54
+ str(outdir),
55
+ str(docx_path),
56
+ ]
57
+ # Run
58
+ subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
59
+ produced = outdir / (docx_path.stem + ".pdf")
60
+ if not produced.exists():
61
+ # LibreOffice sometimes produces .PDF
62
+ produced = outdir / (docx_path.stem + ".PDF")
63
+ if not produced.exists():
64
+ raise RuntimeError("LibreOffice non ha prodotto il PDF atteso.")
65
+ if produced != pdf_path:
66
+ produced.replace(pdf_path)
67
+
68
+
69
+ def _convert_with_docx2pdf(docx_path: Path, pdf_path: Path) -> None:
70
+ from docx2pdf import convert # type: ignore
71
+
72
+ pdf_path.parent.mkdir(parents=True, exist_ok=True)
73
+ convert(str(docx_path), str(pdf_path))
74
+
75
+
76
+ def _convert_with_pymupdf(docx_path: Path, pdf_path: Path) -> None:
77
+ doc = fitz.open(str(docx_path))
78
+ pdf_bytes = doc.convert_to_pdf()
79
+ pdf = fitz.open("pdf", pdf_bytes)
80
+ pdf.save(str(pdf_path))
81
+ pdf.close()
82
+ doc.close()
83
+
84
+
85
+ def _drop_blank_pages(pdf_path: Path) -> int:
86
+ """Remove visually blank pages (rare, but can happen with complex templates)."""
87
+ pdf = fitz.open(str(pdf_path))
88
+ blanks = []
89
+ for i in range(pdf.page_count):
90
+ page = pdf.load_page(i)
91
+ pix = page.get_pixmap(matrix=fitz.Matrix(0.5, 0.5), alpha=False)
92
+ samples = pix.samples
93
+ nonwhite = 0
94
+ total = len(samples) // 3
95
+ for j in range(0, len(samples), 3):
96
+ if samples[j] < 245 or samples[j + 1] < 245 or samples[j + 2] < 245:
97
+ nonwhite += 1
98
+ if total and (nonwhite / total) < 0.002:
99
+ blanks.append(i)
100
+ for i in reversed(blanks):
101
+ pdf.delete_page(i)
102
+ removed = len(blanks)
103
+ if removed:
104
+ tmp = pdf_path.with_suffix(".tmp.pdf")
105
+ pdf.save(str(tmp))
106
+ pdf.close()
107
+ tmp.replace(pdf_path)
108
+ else:
109
+ pdf.close()
110
+ return removed
111
+
112
+
113
+ def docx_to_pdf(docx_path: Path, pdf_path: Path, *, drop_blank_pages: bool = True) -> PdfConvertResult:
114
+ docx_path = Path(docx_path)
115
+ pdf_path = Path(pdf_path)
116
+
117
+ backend = ""
118
+ if _loffice_available():
119
+ backend = "libreoffice"
120
+ _convert_with_libreoffice(docx_path, pdf_path)
121
+ else:
122
+ # docx2pdf works only with MS Word
123
+ try:
124
+ backend = "docx2pdf"
125
+ _convert_with_docx2pdf(docx_path, pdf_path)
126
+ except Exception:
127
+ backend = "pymupdf_fallback"
128
+ _convert_with_pymupdf(docx_path, pdf_path)
129
+
130
+ removed = 0
131
+ if drop_blank_pages:
132
+ try:
133
+ removed = _drop_blank_pages(pdf_path)
134
+ except Exception:
135
+ removed = 0
136
+
137
+ return PdfConvertResult(backend=backend, removed_blank_pages=removed)
src/reporter/pdf_post.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Post-processing PDF (pip-only).
2
+
3
+ Obiettivo:
4
+ - rimuovere pagine *visivamente* vuote (tipicamente solo footer/logo e numero pagina)
5
+ che possono comparire in template DOCX complessi dopo la conversione.
6
+
7
+ Implementazione:
8
+ - usa PyMuPDF (fitz) per rasterizzare ogni pagina a bassa risoluzione
9
+ - calcola la frazione di pixel "quasi bianchi" e il numero di pixel non bianchi
10
+ - se la pagina è "troppo bianca" e con pochissimi pixel scuri => considerata vuota
11
+ - crea un nuovo PDF senza quelle pagine.
12
+
13
+ Nota:
14
+ La soglia è tarata per il caso tipico "pagina bianca con solo numero pagina".
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from pathlib import Path
20
+ from typing import List
21
+
22
+
23
+ def remove_blank_pages_pdf(
24
+ pdf_path: Path,
25
+ *,
26
+ zoom: float = 0.35,
27
+ white_thr: int = 245,
28
+ white_frac_thr: float = 0.995,
29
+ max_nonwhite_pixels: int = 3500,
30
+ ) -> int:
31
+ """Rimuove pagine visivamente vuote da un PDF.
32
+
33
+ Ritorna il numero di pagine rimosse.
34
+ """
35
+
36
+ import fitz # PyMuPDF
37
+ import numpy as np
38
+
39
+ pdf_path = Path(pdf_path)
40
+ doc = fitz.open(str(pdf_path))
41
+ if doc.page_count == 0:
42
+ return 0
43
+
44
+ keep: List[int] = []
45
+ for i in range(doc.page_count):
46
+ page = doc.load_page(i)
47
+ pix = page.get_pixmap(matrix=fitz.Matrix(zoom, zoom), alpha=False)
48
+ img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, 3)
49
+ white = np.all(img >= white_thr, axis=2)
50
+ white_frac = float(white.mean())
51
+ nonwhite = int((~white).sum())
52
+
53
+ # Se è molto bianca e con pochissimi pixel non bianchi => pagina vuota
54
+ if not (white_frac >= white_frac_thr and nonwhite <= max_nonwhite_pixels):
55
+ keep.append(i)
56
+
57
+ removed = doc.page_count - len(keep)
58
+ if removed <= 0:
59
+ doc.close()
60
+ return 0
61
+
62
+ new_doc = fitz.open()
63
+ new_doc.insert_pdf(doc, from_page=min(keep), to_page=max(keep), start_at=0)
64
+ # insert_pdf sopra copia range continuo: per keep non contiguo bisogna copiare singole pagine
65
+ if len(keep) != (max(keep) - min(keep) + 1):
66
+ new_doc = fitz.open()
67
+ for i in keep:
68
+ new_doc.insert_pdf(doc, from_page=i, to_page=i)
69
+
70
+ doc.close()
71
+ tmp = pdf_path.with_suffix(".tmp.pdf")
72
+ new_doc.save(str(tmp))
73
+ new_doc.close()
74
+ tmp.replace(pdf_path)
75
+ return removed