File size: 6,014 Bytes
e34b94f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
#!/usr/bin/env python3
"""
Resize Math Vision images to reduce memory usage while maintaining readability.

This script resizes images in dataset/math_vision/images/ to a maximum dimension
while preserving aspect ratio. Original images are backed up.

Usage:
    python scripts/resize_math_vision_images.py --max_size 600 --backup
    python scripts/resize_math_vision_images.py --max_size 800 --no-backup --quality 95
"""

import os
import argparse
import shutil
from pathlib import Path
from PIL import Image
from tqdm import tqdm
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')


def resize_image(image_path: str, max_size: int, quality: int = 90) -> tuple:
    """
    Resize image to have maximum dimension of max_size while preserving aspect ratio.
    
    Args:
        image_path: Path to the image
        max_size: Maximum dimension (width or height)
        quality: JPEG/PNG quality (1-100)
    
    Returns:
        (original_size, new_size) tuple
    """
    img = Image.open(image_path)
    original_size = img.size
    
    # Calculate new size
    width, height = img.size
    if width <= max_size and height <= max_size:
        # Image is already small enough
        return original_size, original_size
    
    # Resize keeping aspect ratio
    if width > height:
        new_width = max_size
        new_height = int(height * max_size / width)
    else:
        new_height = max_size
        new_width = int(width * max_size / height)
    
    # Resize using high-quality Lanczos resampling
    img_resized = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
    
    # Save with high quality
    if image_path.lower().endswith('.png'):
        # PNG: optimize for size while maintaining quality
        img_resized.save(image_path, 'PNG', optimize=True)
    else:
        # JPEG
        img_resized.save(image_path, 'JPEG', quality=quality, optimize=True)
    
    return original_size, (new_width, new_height)


def backup_images(image_dir: str, backup_dir: str):
    """Create backup of images."""
    if os.path.exists(backup_dir):
        logging.warning(f"Backup directory {backup_dir} already exists, skipping backup")
        return
    
    logging.info(f"Creating backup: {image_dir} -> {backup_dir}")
    shutil.copytree(image_dir, backup_dir)
    logging.info("Backup completed")


def main():
    parser = argparse.ArgumentParser(description="Resize Math Vision images")
    parser.add_argument(
        "--image_dir", 
        type=str, 
        default="dataset/math_vision/images",
        help="Directory containing images to resize"
    )
    parser.add_argument(
        "--max_size", 
        type=int, 
        default=600,
        help="Maximum dimension (width or height) for resized images (default: 600)"
    )
    parser.add_argument(
        "--quality", 
        type=int, 
        default=90,
        help="Image quality for JPEG (1-100, default: 90)"
    )
    parser.add_argument(
        "--backup",
        action="store_true",
        help="Create backup of original images before resizing"
    )
    parser.add_argument(
        "--no-backup",
        dest="backup",
        action="store_false",
        help="Do not create backup (faster but risky)"
    )
    parser.set_defaults(backup=True)
    
    args = parser.parse_args()
    
    image_dir = args.image_dir
    max_size = args.max_size
    quality = args.quality
    
    if not os.path.exists(image_dir):
        logging.error(f"Image directory not found: {image_dir}")
        return
    
    logging.info("=" * 80)
    logging.info("Math Vision Image Resizing")
    logging.info("=" * 80)
    logging.info(f"Image directory: {image_dir}")
    logging.info(f"Max dimension: {max_size}px")
    logging.info(f"Quality: {quality}")
    logging.info(f"Backup: {args.backup}")
    
    # Create backup if requested
    if args.backup:
        backup_dir = f"{image_dir}_backup_original"
        backup_images(image_dir, backup_dir)
    
    # Find all images
    image_patterns = ['*.png', '*.jpg', '*.jpeg', '*.PNG', '*.JPG', '*.JPEG']
    image_files = []
    for pattern in image_patterns:
        image_files.extend(Path(image_dir).glob(pattern))
    
    logging.info(f"Found {len(image_files)} images to process")
    
    if len(image_files) == 0:
        logging.warning("No images found!")
        return
    
    # Process images
    resized_count = 0
    skipped_count = 0
    total_original_pixels = 0
    total_new_pixels = 0
    
    for image_path in tqdm(image_files, desc="Resizing images"):
        try:
            original_size, new_size = resize_image(str(image_path), max_size, quality)
            
            if original_size != new_size:
                resized_count += 1
                original_pixels = original_size[0] * original_size[1]
                new_pixels = new_size[0] * new_size[1]
                total_original_pixels += original_pixels
                total_new_pixels += new_pixels
            else:
                skipped_count += 1
                
        except Exception as e:
            logging.error(f"Error processing {image_path}: {e}")
    
    # Summary
    logging.info("=" * 80)
    logging.info("Summary")
    logging.info("=" * 80)
    logging.info(f"Total images processed: {len(image_files)}")
    logging.info(f"Resized: {resized_count}")
    logging.info(f"Skipped (already small): {skipped_count}")
    
    if resized_count > 0:
        avg_reduction = (1 - total_new_pixels / total_original_pixels) * 100
        logging.info(f"Average pixel reduction: {avg_reduction:.1f}%")
        logging.info(f"Estimated memory reduction: {avg_reduction:.1f}%")
    
    logging.info("\nDone! You can now run training with reduced memory usage.")
    
    if args.backup:
        logging.info(f"\nOriginal images backed up to: {backup_dir}")
        logging.info("To restore: rm -rf {image_dir} && mv {backup_dir} {image_dir}")


if __name__ == "__main__":
    main()