| """Create thumbnail grids from PowerPoint presentation slides. |
| |
| Creates a grid layout of slide thumbnails for quick visual analysis. |
| Labels each thumbnail with its XML filename (e.g., slide1.xml). |
| Hidden slides are shown with a placeholder pattern. |
| |
| Usage: |
| python thumbnail.py input.pptx [output_prefix] [--cols N] |
| |
| Examples: |
| python thumbnail.py presentation.pptx |
| # Creates: thumbnails.jpg |
| |
| python thumbnail.py template.pptx grid --cols 4 |
| # Creates: grid.jpg (or grid-1.jpg, grid-2.jpg for large decks) |
| """ |
|
|
| import argparse |
| import subprocess |
| import sys |
| import tempfile |
| import zipfile |
| from pathlib import Path |
|
|
| import defusedxml.minidom |
| from office.soffice import get_soffice_env |
| from PIL import Image, ImageDraw, ImageFont |
|
|
| THUMBNAIL_WIDTH = 300 |
| CONVERSION_DPI = 100 |
| MAX_COLS = 6 |
| DEFAULT_COLS = 3 |
| JPEG_QUALITY = 95 |
| GRID_PADDING = 20 |
| BORDER_WIDTH = 2 |
| FONT_SIZE_RATIO = 0.10 |
| LABEL_PADDING_RATIO = 0.4 |
|
|
|
|
| def main(): |
| parser = argparse.ArgumentParser( |
| description="Create thumbnail grids from PowerPoint slides." |
| ) |
| parser.add_argument("input", help="Input PowerPoint file (.pptx)") |
| parser.add_argument( |
| "output_prefix", |
| nargs="?", |
| default="thumbnails", |
| help="Output prefix for image files (default: thumbnails)", |
| ) |
| parser.add_argument( |
| "--cols", |
| type=int, |
| default=DEFAULT_COLS, |
| help=f"Number of columns (default: {DEFAULT_COLS}, max: {MAX_COLS})", |
| ) |
|
|
| args = parser.parse_args() |
|
|
| cols = min(args.cols, MAX_COLS) |
| if args.cols > MAX_COLS: |
| print(f"Warning: Columns limited to {MAX_COLS}") |
|
|
| input_path = Path(args.input) |
| if not input_path.exists() or input_path.suffix.lower() != ".pptx": |
| print(f"Error: Invalid PowerPoint file: {args.input}", file=sys.stderr) |
| sys.exit(1) |
|
|
| output_path = Path(f"{args.output_prefix}.jpg") |
|
|
| try: |
| slide_info = get_slide_info(input_path) |
|
|
| with tempfile.TemporaryDirectory() as temp_dir: |
| temp_path = Path(temp_dir) |
| visible_images = convert_to_images(input_path, temp_path) |
|
|
| if not visible_images and not any(s["hidden"] for s in slide_info): |
| print("Error: No slides found", file=sys.stderr) |
| sys.exit(1) |
|
|
| slides = build_slide_list(slide_info, visible_images, temp_path) |
|
|
| grid_files = create_grids(slides, cols, THUMBNAIL_WIDTH, output_path) |
|
|
| print(f"Created {len(grid_files)} grid(s):") |
| for grid_file in grid_files: |
| print(f" {grid_file}") |
|
|
| except Exception as e: |
| print(f"Error: {e}", file=sys.stderr) |
| sys.exit(1) |
|
|
|
|
| def get_slide_info(pptx_path: Path) -> list[dict]: |
| with zipfile.ZipFile(pptx_path, "r") as zf: |
| rels_content = zf.read("ppt/_rels/presentation.xml.rels").decode("utf-8") |
| rels_dom = defusedxml.minidom.parseString(rels_content) |
|
|
| rid_to_slide = {} |
| for rel in rels_dom.getElementsByTagName("Relationship"): |
| rid = rel.getAttribute("Id") |
| target = rel.getAttribute("Target") |
| rel_type = rel.getAttribute("Type") |
| if "slide" in rel_type and target.startswith("slides/"): |
| rid_to_slide[rid] = target.replace("slides/", "") |
|
|
| pres_content = zf.read("ppt/presentation.xml").decode("utf-8") |
| pres_dom = defusedxml.minidom.parseString(pres_content) |
|
|
| slides = [] |
| for sld_id in pres_dom.getElementsByTagName("p:sldId"): |
| rid = sld_id.getAttribute("r:id") |
| if rid in rid_to_slide: |
| hidden = sld_id.getAttribute("show") == "0" |
| slides.append({"name": rid_to_slide[rid], "hidden": hidden}) |
|
|
| return slides |
|
|
|
|
| def build_slide_list( |
| slide_info: list[dict], |
| visible_images: list[Path], |
| temp_dir: Path, |
| ) -> list[tuple[Path, str]]: |
| if visible_images: |
| with Image.open(visible_images[0]) as img: |
| placeholder_size = img.size |
| else: |
| placeholder_size = (1920, 1080) |
|
|
| slides = [] |
| visible_idx = 0 |
|
|
| for info in slide_info: |
| if info["hidden"]: |
| placeholder_path = temp_dir / f"hidden-{info['name']}.jpg" |
| placeholder_img = create_hidden_placeholder(placeholder_size) |
| placeholder_img.save(placeholder_path, "JPEG") |
| slides.append((placeholder_path, f"{info['name']} (hidden)")) |
| else: |
| if visible_idx < len(visible_images): |
| slides.append((visible_images[visible_idx], info["name"])) |
| visible_idx += 1 |
|
|
| return slides |
|
|
|
|
| def create_hidden_placeholder(size: tuple[int, int]) -> Image.Image: |
| img = Image.new("RGB", size, color="#F0F0F0") |
| draw = ImageDraw.Draw(img) |
| line_width = max(5, min(size) // 100) |
| draw.line([(0, 0), size], fill="#CCCCCC", width=line_width) |
| draw.line([(size[0], 0), (0, size[1])], fill="#CCCCCC", width=line_width) |
| return img |
|
|
|
|
| def convert_to_images(pptx_path: Path, temp_dir: Path) -> list[Path]: |
| pdf_path = temp_dir / f"{pptx_path.stem}.pdf" |
|
|
| result = subprocess.run( |
| [ |
| "soffice", |
| "--headless", |
| "--convert-to", |
| "pdf", |
| "--outdir", |
| str(temp_dir), |
| str(pptx_path), |
| ], |
| capture_output=True, |
| text=True, |
| env=get_soffice_env(), |
| ) |
| if result.returncode != 0 or not pdf_path.exists(): |
| raise RuntimeError("PDF conversion failed") |
|
|
| result = subprocess.run( |
| [ |
| "pdftoppm", |
| "-jpeg", |
| "-r", |
| str(CONVERSION_DPI), |
| str(pdf_path), |
| str(temp_dir / "slide"), |
| ], |
| capture_output=True, |
| text=True, |
| ) |
| if result.returncode != 0: |
| raise RuntimeError("Image conversion failed") |
|
|
| return sorted(temp_dir.glob("slide-*.jpg")) |
|
|
|
|
| def create_grids( |
| slides: list[tuple[Path, str]], |
| cols: int, |
| width: int, |
| output_path: Path, |
| ) -> list[str]: |
| max_per_grid = cols * (cols + 1) |
| grid_files = [] |
|
|
| for chunk_idx, start_idx in enumerate(range(0, len(slides), max_per_grid)): |
| end_idx = min(start_idx + max_per_grid, len(slides)) |
| chunk_slides = slides[start_idx:end_idx] |
|
|
| grid = create_grid(chunk_slides, cols, width) |
|
|
| if len(slides) <= max_per_grid: |
| grid_filename = output_path |
| else: |
| stem = output_path.stem |
| suffix = output_path.suffix |
| grid_filename = output_path.parent / f"{stem}-{chunk_idx + 1}{suffix}" |
|
|
| grid_filename.parent.mkdir(parents=True, exist_ok=True) |
| grid.save(str(grid_filename), quality=JPEG_QUALITY) |
| grid_files.append(str(grid_filename)) |
|
|
| return grid_files |
|
|
|
|
| def create_grid( |
| slides: list[tuple[Path, str]], |
| cols: int, |
| width: int, |
| ) -> Image.Image: |
| font_size = int(width * FONT_SIZE_RATIO) |
| label_padding = int(font_size * LABEL_PADDING_RATIO) |
|
|
| with Image.open(slides[0][0]) as img: |
| aspect = img.height / img.width |
| height = int(width * aspect) |
|
|
| rows = (len(slides) + cols - 1) // cols |
| grid_w = cols * width + (cols + 1) * GRID_PADDING |
| grid_h = rows * (height + font_size + label_padding * 2) + (rows + 1) * GRID_PADDING |
|
|
| grid = Image.new("RGB", (grid_w, grid_h), "white") |
| draw = ImageDraw.Draw(grid) |
|
|
| try: |
| font = ImageFont.load_default(size=font_size) |
| except Exception: |
| font = ImageFont.load_default() |
|
|
| for i, (img_path, slide_name) in enumerate(slides): |
| row, col = i // cols, i % cols |
| x = col * width + (col + 1) * GRID_PADDING |
| y_base = ( |
| row * (height + font_size + label_padding * 2) + (row + 1) * GRID_PADDING |
| ) |
|
|
| label = slide_name |
| bbox = draw.textbbox((0, 0), label, font=font) |
| text_w = bbox[2] - bbox[0] |
| draw.text( |
| (x + (width - text_w) // 2, y_base + label_padding), |
| label, |
| fill="black", |
| font=font, |
| ) |
|
|
| y_thumbnail = y_base + label_padding + font_size + label_padding |
|
|
| with Image.open(img_path) as img: |
| img.thumbnail((width, height), Image.Resampling.LANCZOS) |
| w, h = img.size |
| tx = x + (width - w) // 2 |
| ty = y_thumbnail + (height - h) // 2 |
| grid.paste(img, (tx, ty)) |
|
|
| if BORDER_WIDTH > 0: |
| draw.rectangle( |
| [ |
| (tx - BORDER_WIDTH, ty - BORDER_WIDTH), |
| (tx + w + BORDER_WIDTH - 1, ty + h + BORDER_WIDTH - 1), |
| ], |
| outline="gray", |
| width=BORDER_WIDTH, |
| ) |
|
|
| return grid |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|