|
|
|
|
|
""" |
|
|
Resize Math Vision images to reduce memory usage while maintaining readability. |
|
|
|
|
|
This script resizes images in dataset/math_vision/images/ to a maximum dimension |
|
|
while preserving aspect ratio. Original images are backed up. |
|
|
|
|
|
Usage: |
|
|
python scripts/resize_math_vision_images.py --max_size 600 --backup |
|
|
python scripts/resize_math_vision_images.py --max_size 800 --no-backup --quality 95 |
|
|
""" |
|
|
|
|
|
import os |
|
|
import argparse |
|
|
import shutil |
|
|
from pathlib import Path |
|
|
from PIL import Image |
|
|
from tqdm import tqdm |
|
|
import logging |
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
|
|
|
|
|
|
|
|
def resize_image(image_path: str, max_size: int, quality: int = 90) -> tuple: |
|
|
""" |
|
|
Resize image to have maximum dimension of max_size while preserving aspect ratio. |
|
|
|
|
|
Args: |
|
|
image_path: Path to the image |
|
|
max_size: Maximum dimension (width or height) |
|
|
quality: JPEG/PNG quality (1-100) |
|
|
|
|
|
Returns: |
|
|
(original_size, new_size) tuple |
|
|
""" |
|
|
img = Image.open(image_path) |
|
|
original_size = img.size |
|
|
|
|
|
|
|
|
width, height = img.size |
|
|
if width <= max_size and height <= max_size: |
|
|
|
|
|
return original_size, original_size |
|
|
|
|
|
|
|
|
if width > height: |
|
|
new_width = max_size |
|
|
new_height = int(height * max_size / width) |
|
|
else: |
|
|
new_height = max_size |
|
|
new_width = int(width * max_size / height) |
|
|
|
|
|
|
|
|
img_resized = img.resize((new_width, new_height), Image.Resampling.LANCZOS) |
|
|
|
|
|
|
|
|
if image_path.lower().endswith('.png'): |
|
|
|
|
|
img_resized.save(image_path, 'PNG', optimize=True) |
|
|
else: |
|
|
|
|
|
img_resized.save(image_path, 'JPEG', quality=quality, optimize=True) |
|
|
|
|
|
return original_size, (new_width, new_height) |
|
|
|
|
|
|
|
|
def backup_images(image_dir: str, backup_dir: str): |
|
|
"""Create backup of images.""" |
|
|
if os.path.exists(backup_dir): |
|
|
logging.warning(f"Backup directory {backup_dir} already exists, skipping backup") |
|
|
return |
|
|
|
|
|
logging.info(f"Creating backup: {image_dir} -> {backup_dir}") |
|
|
shutil.copytree(image_dir, backup_dir) |
|
|
logging.info("Backup completed") |
|
|
|
|
|
|
|
|
def main(): |
|
|
parser = argparse.ArgumentParser(description="Resize Math Vision images") |
|
|
parser.add_argument( |
|
|
"--image_dir", |
|
|
type=str, |
|
|
default="dataset/math_vision/images", |
|
|
help="Directory containing images to resize" |
|
|
) |
|
|
parser.add_argument( |
|
|
"--max_size", |
|
|
type=int, |
|
|
default=600, |
|
|
help="Maximum dimension (width or height) for resized images (default: 600)" |
|
|
) |
|
|
parser.add_argument( |
|
|
"--quality", |
|
|
type=int, |
|
|
default=90, |
|
|
help="Image quality for JPEG (1-100, default: 90)" |
|
|
) |
|
|
parser.add_argument( |
|
|
"--backup", |
|
|
action="store_true", |
|
|
help="Create backup of original images before resizing" |
|
|
) |
|
|
parser.add_argument( |
|
|
"--no-backup", |
|
|
dest="backup", |
|
|
action="store_false", |
|
|
help="Do not create backup (faster but risky)" |
|
|
) |
|
|
parser.set_defaults(backup=True) |
|
|
|
|
|
args = parser.parse_args() |
|
|
|
|
|
image_dir = args.image_dir |
|
|
max_size = args.max_size |
|
|
quality = args.quality |
|
|
|
|
|
if not os.path.exists(image_dir): |
|
|
logging.error(f"Image directory not found: {image_dir}") |
|
|
return |
|
|
|
|
|
logging.info("=" * 80) |
|
|
logging.info("Math Vision Image Resizing") |
|
|
logging.info("=" * 80) |
|
|
logging.info(f"Image directory: {image_dir}") |
|
|
logging.info(f"Max dimension: {max_size}px") |
|
|
logging.info(f"Quality: {quality}") |
|
|
logging.info(f"Backup: {args.backup}") |
|
|
|
|
|
|
|
|
if args.backup: |
|
|
backup_dir = f"{image_dir}_backup_original" |
|
|
backup_images(image_dir, backup_dir) |
|
|
|
|
|
|
|
|
image_patterns = ['*.png', '*.jpg', '*.jpeg', '*.PNG', '*.JPG', '*.JPEG'] |
|
|
image_files = [] |
|
|
for pattern in image_patterns: |
|
|
image_files.extend(Path(image_dir).glob(pattern)) |
|
|
|
|
|
logging.info(f"Found {len(image_files)} images to process") |
|
|
|
|
|
if len(image_files) == 0: |
|
|
logging.warning("No images found!") |
|
|
return |
|
|
|
|
|
|
|
|
resized_count = 0 |
|
|
skipped_count = 0 |
|
|
total_original_pixels = 0 |
|
|
total_new_pixels = 0 |
|
|
|
|
|
for image_path in tqdm(image_files, desc="Resizing images"): |
|
|
try: |
|
|
original_size, new_size = resize_image(str(image_path), max_size, quality) |
|
|
|
|
|
if original_size != new_size: |
|
|
resized_count += 1 |
|
|
original_pixels = original_size[0] * original_size[1] |
|
|
new_pixels = new_size[0] * new_size[1] |
|
|
total_original_pixels += original_pixels |
|
|
total_new_pixels += new_pixels |
|
|
else: |
|
|
skipped_count += 1 |
|
|
|
|
|
except Exception as e: |
|
|
logging.error(f"Error processing {image_path}: {e}") |
|
|
|
|
|
|
|
|
logging.info("=" * 80) |
|
|
logging.info("Summary") |
|
|
logging.info("=" * 80) |
|
|
logging.info(f"Total images processed: {len(image_files)}") |
|
|
logging.info(f"Resized: {resized_count}") |
|
|
logging.info(f"Skipped (already small): {skipped_count}") |
|
|
|
|
|
if resized_count > 0: |
|
|
avg_reduction = (1 - total_new_pixels / total_original_pixels) * 100 |
|
|
logging.info(f"Average pixel reduction: {avg_reduction:.1f}%") |
|
|
logging.info(f"Estimated memory reduction: {avg_reduction:.1f}%") |
|
|
|
|
|
logging.info("\nDone! You can now run training with reduced memory usage.") |
|
|
|
|
|
if args.backup: |
|
|
logging.info(f"\nOriginal images backed up to: {backup_dir}") |
|
|
logging.info("To restore: rm -rf {image_dir} && mv {backup_dir} {image_dir}") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|
|
|
|