jebin2 commited on
Commit
9c32a2c
Β·
1 Parent(s): 5522d82
PDF/reduce_pdf_size.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import fitz # PyMuPDF
2
+ import os
3
+
4
+ def extract_images_from_pdf(input_pdf: str, output_dir: str = "extracted_images"):
5
+ """
6
+ Extract all images from a PDF and save them as individual image files.
7
+
8
+ Args:
9
+ input_pdf (str): Path to the PDF file.
10
+ output_dir (str): Directory to save extracted images. Default is 'extracted_images'.
11
+
12
+ Returns:
13
+ List of saved image file paths.
14
+ """
15
+ # Ensure output directory exists
16
+ os.makedirs(output_dir, exist_ok=True)
17
+
18
+ # Open the PDF
19
+ pdf = fitz.open(input_pdf)
20
+ saved_images = []
21
+
22
+ print(f"Extracting images from: {input_pdf}")
23
+
24
+ for page_num in range(len(pdf)):
25
+ page = pdf[page_num]
26
+ images = page.get_images(full=True)
27
+
28
+ for img_index, img in enumerate(images):
29
+ xref = img[0]
30
+ base_image = pdf.extract_image(xref)
31
+ image_bytes = base_image["image"]
32
+ image_ext = base_image["ext"]
33
+ image_filename = f"page{page_num+1}_img{img_index+1}.{image_ext}"
34
+
35
+ output_path = os.path.join(output_dir, image_filename)
36
+ with open(output_path, "wb") as img_file:
37
+ img_file.write(image_bytes)
38
+
39
+ saved_images.append(output_path)
40
+ print(f"Saved: {output_path}")
41
+
42
+ pdf.close()
43
+
44
+ if saved_images:
45
+ print(f"βœ… Extracted {len(saved_images)} images to: {output_dir}")
46
+ else:
47
+ print("⚠️ No images found in the PDF.")
48
+
49
+ return saved_images
50
+
51
+ # Example usage
52
+ extract_images_from_pdf("../CaptionCreator/media/Jebin passport.pdf")
image/remove_background_ai.py ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ warnings.filterwarnings("ignore", category=FutureWarning)
3
+ warnings.filterwarnings("ignore", category=DeprecationWarning)
4
+
5
+ from PIL import Image
6
+ import matplotlib.pyplot as plt
7
+ import torch
8
+ from torchvision import transforms
9
+ from transformers import AutoModelForImageSegmentation
10
+ import numpy as np
11
+ from pathlib import Path
12
+ from tqdm import tqdm
13
+ import os
14
+ import gc
15
+ from .remove_background import RemoveBackground
16
+ from custom_logger import logger_config
17
+
18
+
19
+ class RemoveBackgroundAI(RemoveBackground):
20
+ def __init__(self, model_name='briaai/RMBG-2.0', device='cuda' if torch.cuda.is_available() else 'cpu', image_size=(1024, 1024)):
21
+ """
22
+ Initialize the BackgroundRemover with a pre-trained model.
23
+ """
24
+ super().__init__("remove_background_ai")
25
+ self.device = device
26
+ self.image_size = image_size
27
+
28
+ # Load the model
29
+ self.model = AutoModelForImageSegmentation.from_pretrained(model_name, trust_remote_code=True)
30
+ if device == 'cuda':
31
+ torch.set_float32_matmul_precision('high')
32
+ self.model.to(device)
33
+ self.model.eval()
34
+
35
+ # Define image transformations
36
+ self.transform = transforms.Compose([
37
+ transforms.Resize(image_size, antialias=True),
38
+ transforms.ToTensor(),
39
+ transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
40
+ ])
41
+
42
+ def process(self, image_path, output_path=None, save_alpha=True):
43
+ """
44
+ Remove background while preserving exact foreground position and size.
45
+
46
+ Args:
47
+ image_path (str or Path): Path to the input image
48
+ output_path (str or Path, optional): Path to save the output image
49
+ save_alpha (bool): If True, save with transparency (PNG RGBA), else black background
50
+
51
+ Returns:
52
+ PIL.Image: Processed image with background removed
53
+ """
54
+ # Load and preprocess the image
55
+ image = Image.open(image_path).convert("RGB")
56
+ original_size = image.size
57
+ input_tensor = self.transform(image).unsqueeze(0).to(self.device)
58
+
59
+ # Generate the mask
60
+ with torch.no_grad():
61
+ preds = self.model(input_tensor)[-1].sigmoid().cpu()
62
+ pred = preds[0].squeeze()
63
+ mask_pil = transforms.ToPILImage()(pred)
64
+ mask = mask_pil.resize(image.size, Image.LANCZOS)
65
+
66
+ # Create result image preserving exact position and size
67
+ if save_alpha:
68
+ # Create RGBA image with transparency
69
+ result_image = Image.new("RGBA", original_size, (0, 0, 0, 0))
70
+ image_rgba = image.convert("RGBA")
71
+
72
+ # Apply mask to create transparency
73
+ mask_array = np.array(mask)
74
+ image_array = np.array(image_rgba)
75
+
76
+ # Set alpha channel based on mask
77
+ image_array[:, :, 3] = mask_array
78
+
79
+ result_image = Image.fromarray(image_array)
80
+ else:
81
+ # Create RGB image with black background
82
+ result_image = Image.new("RGB", original_size, (0, 0, 0))
83
+ # Paste original image using mask
84
+ result_image.paste(image, mask=mask)
85
+
86
+ # Save output if path provided
87
+ if output_path:
88
+ if isinstance(output_path, Path):
89
+ output_path = str(output_path)
90
+
91
+ if save_alpha:
92
+ # Ensure PNG extension for transparency
93
+ if not output_path.lower().endswith('.png'):
94
+ output_path = os.path.splitext(output_path)[0] + '.png'
95
+ result_image.save(output_path, format="PNG")
96
+ else:
97
+ result_image.save(output_path)
98
+
99
+ return result_image
100
+
101
+ def remove_background(self, image_path, output_path=None, crop=False, bg_color=(0, 0, 0), save_alpha=False, keep_size=True, skip_crop=False):
102
+ """
103
+ Original method - kept for backward compatibility.
104
+ For preserving exact position/size, use remove_background_preserve_position() instead.
105
+ """
106
+ # Load and preprocess the image
107
+ image = Image.open(image_path).convert("RGB")
108
+ original_size = image.size
109
+ input_tensor = self.transform(image).unsqueeze(0).to(self.device)
110
+
111
+ # Generate the mask
112
+ with torch.no_grad():
113
+ preds = self.model(input_tensor)[-1].sigmoid().cpu()
114
+ pred = preds[0].squeeze()
115
+ mask_pil = transforms.ToPILImage()(pred)
116
+ mask = mask_pil.resize(image.size)
117
+
118
+ # Create RGBA image with transparency
119
+ image_rgba = image.convert("RGBA")
120
+ temp_image = Image.new("RGBA", image.size, (0, 0, 0, 0))
121
+ temp_image.paste(image_rgba, mask=mask)
122
+
123
+ # Skip cropping if requested
124
+ if skip_crop:
125
+ print(f"Skipping cropping for {image_path}")
126
+ else:
127
+ # Crop if requested
128
+ if crop:
129
+ temp_image = self._crop_to_content(temp_image)
130
+ if keep_size:
131
+ # Paste cropped image back onto original canvas size
132
+ padded_image = Image.new("RGBA", original_size, (0, 0, 0, 0))
133
+ offset_x = (original_size[0] - temp_image.size[0]) // 2
134
+ offset_y = (original_size[1] - temp_image.size[1]) // 2
135
+ padded_image.paste(temp_image, (offset_x, offset_y))
136
+ temp_image = padded_image
137
+
138
+ # Save output
139
+ if output_path:
140
+ if isinstance(output_path, Path):
141
+ output_path = str(output_path)
142
+
143
+ if save_alpha:
144
+ if not output_path.lower().endswith('.png'):
145
+ output_path = os.path.splitext(output_path)[0] + '.png'
146
+ temp_image.save(output_path, format="PNG")
147
+ else:
148
+ bg_image = Image.new("RGB", temp_image.size, bg_color)
149
+ bg_image.paste(temp_image, mask=temp_image.split()[3])
150
+ bg_image.save(output_path)
151
+
152
+ return temp_image
153
+
154
+ def _crop_to_content(self, image):
155
+ """
156
+ Crop the image to the bounding box of the non-transparent content.
157
+ """
158
+ img_array = np.array(image)
159
+ alpha_channel = img_array[:, :, 3]
160
+ non_empty_columns = np.where(alpha_channel.max(axis=0) > 0)[0]
161
+ non_empty_rows = np.where(alpha_channel.max(axis=1) > 0)[0]
162
+
163
+ if len(non_empty_columns) > 0 and len(non_empty_rows) > 0:
164
+ crop_box = (
165
+ non_empty_columns.min(),
166
+ non_empty_rows.min(),
167
+ non_empty_columns.max() + 1,
168
+ non_empty_rows.max() + 1
169
+ )
170
+ return image.crop(crop_box)
171
+
172
+ return image
173
+
174
+ def cleanup(self):
175
+ """
176
+ Clean up resources used by the model.
177
+ """
178
+ if self.device == 'cuda':
179
+ self.model.to('cpu')
180
+ del self.model
181
+ if torch.cuda.is_available():
182
+ torch.cuda.empty_cache()
183
+ gc.collect()
184
+ print("Model unloaded and resources cleaned up")
185
+
186
+
187
+ def remove_background_batch(folder, output_path=None, save_alpha=True, preserve_position=True):
188
+ """
189
+ Process all images in a folder and remove backgrounds while preserving position and size.
190
+
191
+ Args:
192
+ folder (str): Folder containing images to process
193
+ output_path (str): Output folder path
194
+ save_alpha (bool): If True, saves output as PNG with transparency
195
+ preserve_position (bool): If True, keeps foreground in exact original position
196
+ """
197
+ remover = BackgroundRemover()
198
+ input_path = Path(folder)
199
+
200
+ # Create output directory if it doesn't exist
201
+ if output_path:
202
+ Path(output_path).mkdir(parents=True, exist_ok=True)
203
+
204
+ # Find all image files
205
+ image_files = []
206
+ for ext in ['jpg', 'jpeg', 'png', 'bmp', 'tiff', 'webp']:
207
+ image_files.extend(input_path.glob(f"*.{ext}"))
208
+ image_files.extend(input_path.glob(f"*.{ext.upper()}"))
209
+
210
+ print(f"Found {len(image_files)} images to process")
211
+
212
+ try:
213
+ for img_path in tqdm(image_files, desc="Removing Background", unit="image"):
214
+ try:
215
+ # Determine output filename
216
+ if output_path:
217
+ output_filename = os.path.basename(img_path)
218
+ if save_alpha and not output_filename.lower().endswith('.png'):
219
+ output_filename = os.path.splitext(output_filename)[0] + '.png'
220
+ output_file = os.path.join(output_path, output_filename)
221
+ else:
222
+ output_file = img_path
223
+
224
+ if preserve_position:
225
+ # Use the new method that preserves exact position
226
+ remover.remove_background_preserve_position(
227
+ image_path=img_path,
228
+ output_path=output_file,
229
+ save_alpha=save_alpha
230
+ )
231
+ else:
232
+ # Use original method with no cropping
233
+ remover.remove_background(
234
+ image_path=img_path,
235
+ output_path=output_file,
236
+ crop=False, # No cropping to preserve position
237
+ bg_color=(0, 0, 0),
238
+ save_alpha=save_alpha,
239
+ skip_crop=True
240
+ )
241
+
242
+ # print(f"βœ“ Processed: {os.path.basename(img_path)}")
243
+
244
+ except Exception as e:
245
+ print(f"βœ— Error processing {img_path}: {str(e)}")
246
+
247
+ except KeyboardInterrupt:
248
+ print("\nProcessing interrupted by user")
249
+ finally:
250
+ remover.cleanup()
251
+
252
+
253
+ # Single image processing function
254
+ def process(image_path, output_path=None, save_alpha=True):
255
+ """
256
+ Process a single image and remove background while preserving position and size.
257
+
258
+ Args:
259
+ image_path (str): Path to input image
260
+ output_path (str, optional): Path to save output image
261
+ save_alpha (bool): If True, saves with transparency
262
+
263
+ Returns:
264
+ PIL.Image: Processed image
265
+ """
266
+ remover = RemoveBackgroundAI()
267
+
268
+ try:
269
+ result = remover.remove_background_preserve_position(
270
+ image_path=image_path,
271
+ output_path=output_path,
272
+ save_alpha=save_alpha
273
+ )
274
+ print(f"βœ“ Successfully processed: {os.path.basename(image_path)}")
275
+ return result
276
+ except Exception as e:
277
+ print(f"βœ— Error processing {image_path}: {str(e)}")
278
+ return None
279
+ finally:
280
+ remover.cleanup()
281
+
282
+
283
+ # Example usage
284
+ if __name__ == "__main__":
285
+ # Process entire folder - preserves exact position and size
286
+ remove_background_batch(
287
+ folder="../CaptionCreator/media/puzzle_x_pic/",
288
+ output_path="../CaptionCreator/media/processed/",
289
+ save_alpha=True,
290
+ preserve_position=True
291
+ )