model111 / scripts /resize_math_vision_images.py
LCZZZZ's picture
Upload MemGen code and data
e34b94f verified
#!/usr/bin/env python3
"""
Resize Math Vision images to reduce memory usage while maintaining readability.
This script resizes images in dataset/math_vision/images/ to a maximum dimension
while preserving aspect ratio. Original images are backed up.
Usage:
python scripts/resize_math_vision_images.py --max_size 600 --backup
python scripts/resize_math_vision_images.py --max_size 800 --no-backup --quality 95
"""
import os
import argparse
import shutil
from pathlib import Path
from PIL import Image
from tqdm import tqdm
import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def resize_image(image_path: str, max_size: int, quality: int = 90) -> tuple:
"""
Resize image to have maximum dimension of max_size while preserving aspect ratio.
Args:
image_path: Path to the image
max_size: Maximum dimension (width or height)
quality: JPEG/PNG quality (1-100)
Returns:
(original_size, new_size) tuple
"""
img = Image.open(image_path)
original_size = img.size
# Calculate new size
width, height = img.size
if width <= max_size and height <= max_size:
# Image is already small enough
return original_size, original_size
# Resize keeping aspect ratio
if width > height:
new_width = max_size
new_height = int(height * max_size / width)
else:
new_height = max_size
new_width = int(width * max_size / height)
# Resize using high-quality Lanczos resampling
img_resized = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
# Save with high quality
if image_path.lower().endswith('.png'):
# PNG: optimize for size while maintaining quality
img_resized.save(image_path, 'PNG', optimize=True)
else:
# JPEG
img_resized.save(image_path, 'JPEG', quality=quality, optimize=True)
return original_size, (new_width, new_height)
def backup_images(image_dir: str, backup_dir: str):
"""Create backup of images."""
if os.path.exists(backup_dir):
logging.warning(f"Backup directory {backup_dir} already exists, skipping backup")
return
logging.info(f"Creating backup: {image_dir} -> {backup_dir}")
shutil.copytree(image_dir, backup_dir)
logging.info("Backup completed")
def main():
parser = argparse.ArgumentParser(description="Resize Math Vision images")
parser.add_argument(
"--image_dir",
type=str,
default="dataset/math_vision/images",
help="Directory containing images to resize"
)
parser.add_argument(
"--max_size",
type=int,
default=600,
help="Maximum dimension (width or height) for resized images (default: 600)"
)
parser.add_argument(
"--quality",
type=int,
default=90,
help="Image quality for JPEG (1-100, default: 90)"
)
parser.add_argument(
"--backup",
action="store_true",
help="Create backup of original images before resizing"
)
parser.add_argument(
"--no-backup",
dest="backup",
action="store_false",
help="Do not create backup (faster but risky)"
)
parser.set_defaults(backup=True)
args = parser.parse_args()
image_dir = args.image_dir
max_size = args.max_size
quality = args.quality
if not os.path.exists(image_dir):
logging.error(f"Image directory not found: {image_dir}")
return
logging.info("=" * 80)
logging.info("Math Vision Image Resizing")
logging.info("=" * 80)
logging.info(f"Image directory: {image_dir}")
logging.info(f"Max dimension: {max_size}px")
logging.info(f"Quality: {quality}")
logging.info(f"Backup: {args.backup}")
# Create backup if requested
if args.backup:
backup_dir = f"{image_dir}_backup_original"
backup_images(image_dir, backup_dir)
# Find all images
image_patterns = ['*.png', '*.jpg', '*.jpeg', '*.PNG', '*.JPG', '*.JPEG']
image_files = []
for pattern in image_patterns:
image_files.extend(Path(image_dir).glob(pattern))
logging.info(f"Found {len(image_files)} images to process")
if len(image_files) == 0:
logging.warning("No images found!")
return
# Process images
resized_count = 0
skipped_count = 0
total_original_pixels = 0
total_new_pixels = 0
for image_path in tqdm(image_files, desc="Resizing images"):
try:
original_size, new_size = resize_image(str(image_path), max_size, quality)
if original_size != new_size:
resized_count += 1
original_pixels = original_size[0] * original_size[1]
new_pixels = new_size[0] * new_size[1]
total_original_pixels += original_pixels
total_new_pixels += new_pixels
else:
skipped_count += 1
except Exception as e:
logging.error(f"Error processing {image_path}: {e}")
# Summary
logging.info("=" * 80)
logging.info("Summary")
logging.info("=" * 80)
logging.info(f"Total images processed: {len(image_files)}")
logging.info(f"Resized: {resized_count}")
logging.info(f"Skipped (already small): {skipped_count}")
if resized_count > 0:
avg_reduction = (1 - total_new_pixels / total_original_pixels) * 100
logging.info(f"Average pixel reduction: {avg_reduction:.1f}%")
logging.info(f"Estimated memory reduction: {avg_reduction:.1f}%")
logging.info("\nDone! You can now run training with reduced memory usage.")
if args.backup:
logging.info(f"\nOriginal images backed up to: {backup_dir}")
logging.info("To restore: rm -rf {image_dir} && mv {backup_dir} {image_dir}")
if __name__ == "__main__":
main()