#!/usr/bin/env python3 """ Resize Math Vision images to reduce memory usage while maintaining readability. This script resizes images in dataset/math_vision/images/ to a maximum dimension while preserving aspect ratio. Original images are backed up. Usage: python scripts/resize_math_vision_images.py --max_size 600 --backup python scripts/resize_math_vision_images.py --max_size 800 --no-backup --quality 95 """ import os import argparse import shutil from pathlib import Path from PIL import Image from tqdm import tqdm import logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') def resize_image(image_path: str, max_size: int, quality: int = 90) -> tuple: """ Resize image to have maximum dimension of max_size while preserving aspect ratio. Args: image_path: Path to the image max_size: Maximum dimension (width or height) quality: JPEG/PNG quality (1-100) Returns: (original_size, new_size) tuple """ img = Image.open(image_path) original_size = img.size # Calculate new size width, height = img.size if width <= max_size and height <= max_size: # Image is already small enough return original_size, original_size # Resize keeping aspect ratio if width > height: new_width = max_size new_height = int(height * max_size / width) else: new_height = max_size new_width = int(width * max_size / height) # Resize using high-quality Lanczos resampling img_resized = img.resize((new_width, new_height), Image.Resampling.LANCZOS) # Save with high quality if image_path.lower().endswith('.png'): # PNG: optimize for size while maintaining quality img_resized.save(image_path, 'PNG', optimize=True) else: # JPEG img_resized.save(image_path, 'JPEG', quality=quality, optimize=True) return original_size, (new_width, new_height) def backup_images(image_dir: str, backup_dir: str): """Create backup of images.""" if os.path.exists(backup_dir): logging.warning(f"Backup directory {backup_dir} already exists, skipping backup") return logging.info(f"Creating backup: {image_dir} -> {backup_dir}") shutil.copytree(image_dir, backup_dir) logging.info("Backup completed") def main(): parser = argparse.ArgumentParser(description="Resize Math Vision images") parser.add_argument( "--image_dir", type=str, default="dataset/math_vision/images", help="Directory containing images to resize" ) parser.add_argument( "--max_size", type=int, default=600, help="Maximum dimension (width or height) for resized images (default: 600)" ) parser.add_argument( "--quality", type=int, default=90, help="Image quality for JPEG (1-100, default: 90)" ) parser.add_argument( "--backup", action="store_true", help="Create backup of original images before resizing" ) parser.add_argument( "--no-backup", dest="backup", action="store_false", help="Do not create backup (faster but risky)" ) parser.set_defaults(backup=True) args = parser.parse_args() image_dir = args.image_dir max_size = args.max_size quality = args.quality if not os.path.exists(image_dir): logging.error(f"Image directory not found: {image_dir}") return logging.info("=" * 80) logging.info("Math Vision Image Resizing") logging.info("=" * 80) logging.info(f"Image directory: {image_dir}") logging.info(f"Max dimension: {max_size}px") logging.info(f"Quality: {quality}") logging.info(f"Backup: {args.backup}") # Create backup if requested if args.backup: backup_dir = f"{image_dir}_backup_original" backup_images(image_dir, backup_dir) # Find all images image_patterns = ['*.png', '*.jpg', '*.jpeg', '*.PNG', '*.JPG', '*.JPEG'] image_files = [] for pattern in image_patterns: image_files.extend(Path(image_dir).glob(pattern)) logging.info(f"Found {len(image_files)} images to process") if len(image_files) == 0: logging.warning("No images found!") return # Process images resized_count = 0 skipped_count = 0 total_original_pixels = 0 total_new_pixels = 0 for image_path in tqdm(image_files, desc="Resizing images"): try: original_size, new_size = resize_image(str(image_path), max_size, quality) if original_size != new_size: resized_count += 1 original_pixels = original_size[0] * original_size[1] new_pixels = new_size[0] * new_size[1] total_original_pixels += original_pixels total_new_pixels += new_pixels else: skipped_count += 1 except Exception as e: logging.error(f"Error processing {image_path}: {e}") # Summary logging.info("=" * 80) logging.info("Summary") logging.info("=" * 80) logging.info(f"Total images processed: {len(image_files)}") logging.info(f"Resized: {resized_count}") logging.info(f"Skipped (already small): {skipped_count}") if resized_count > 0: avg_reduction = (1 - total_new_pixels / total_original_pixels) * 100 logging.info(f"Average pixel reduction: {avg_reduction:.1f}%") logging.info(f"Estimated memory reduction: {avg_reduction:.1f}%") logging.info("\nDone! You can now run training with reduced memory usage.") if args.backup: logging.info(f"\nOriginal images backed up to: {backup_dir}") logging.info("To restore: rm -rf {image_dir} && mv {backup_dir} {image_dir}") if __name__ == "__main__": main()