jebin2 commited on
Commit
05b52e0
·
1 Parent(s): c82772a
comic_panel_extractor/llm_panel_extractor.py ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .config import Config
2
+ from ultralytics import YOLO
3
+ from PIL import Image
4
+ import cv2
5
+ from . import constant
6
+ from . import utils
7
+ import os
8
+ import shutil
9
+ import requests
10
+
11
+ class LLMPanelExtractor:
12
+ """Handles image preprocessing operations."""
13
+
14
+ def __init__(self, config: Config = None):
15
+ self.config = config or Config()
16
+
17
+ # Check if YOLO model exists; if not, download it to the specified path
18
+ yolo_model_path = self.config.yolo_model_path
19
+ if not os.path.exists(yolo_model_path):
20
+ url = "https://huggingface.co/mosesb/best-comic-panel-detection/resolve/main/best.pt"
21
+ print(f"Downloading YOLO model to {yolo_model_path}...")
22
+ response = requests.get(url)
23
+ response.raise_for_status() # Raise an error if the download fails
24
+ with open(yolo_model_path, "wb") as f:
25
+ f.write(response.content)
26
+ print("YOLO model downloaded successfully.")
27
+
28
+ self.yolo_model = YOLO(yolo_model_path)
29
+ os.makedirs(self.config.output_folder, exist_ok=True)
30
+
31
+ def extract_bounding_boxes(self, detection_result_boxes):
32
+ """Extract bounding box coordinates from YOLO detection results."""
33
+ bounding_boxes = []
34
+ for detection_box in detection_result_boxes.xyxy:
35
+ # Extract coordinates
36
+ x_min, y_min, x_max, y_max = map(int, detection_box)
37
+ bounding_boxes.append((x_min, y_min, x_max, y_max))
38
+
39
+ return bounding_boxes
40
+
41
+ def crop_and_save_detected_panels(self, detected_boxes):
42
+ """Crop detected boxes and save them in separate folders"""
43
+ if len(detected_boxes) == 0:
44
+ print(f"No boxes detected for {self.config.org_input_path}")
45
+ return
46
+
47
+ source_image = cv2.imread(self.config.org_input_path)
48
+ for box_coordinates in detected_boxes:
49
+ # Extract coordinates
50
+ x_min, y_min, x_max, y_max = box_coordinates
51
+
52
+ # Crop the image
53
+ cropped_panel = source_image[y_min:y_max, x_min:x_max]
54
+
55
+ # Save cropped image
56
+ constant.INDEX += 1
57
+ panel_output_path = f"{self.config.output_folder}/{constant.INDEX:04d}_panel_{x_min, y_min, x_max, y_max}.jpg"
58
+ cv2.imwrite(panel_output_path, cropped_panel)
59
+
60
+ def pre_all_processed_boxes(self, all_processed_boxes, image_width, image_height):
61
+ all_processed_boxes = utils.extend_boxes_to_image_border(
62
+ all_processed_boxes,
63
+ (image_width, image_height),
64
+ self.config.min_width_ratio,
65
+ self.config.min_height_ratio
66
+ )
67
+ all_processed_boxes = sorted(all_processed_boxes, key=lambda box: (box[1], box[0])) # sort by y_min, then x_min
68
+ all_processed_boxes = utils.extend_to_nearby_boxes(
69
+ all_processed_boxes,
70
+ (image_width, image_height),
71
+ self.config.min_width_ratio,
72
+ self.config.min_height_ratio
73
+ )
74
+ return all_processed_boxes
75
+
76
+ def detect_and_extract_panels(self, input_image_path=None, existing_boxes=None, confidence_threshold=0.9):
77
+ """Main method to detect and extract panels from an image."""
78
+ if not input_image_path:
79
+ input_image_path = self.config.org_input_path
80
+
81
+ # Get image dimensions
82
+ with Image.open(input_image_path) as input_image:
83
+ image_width, image_height = input_image.size
84
+
85
+ # Run YOLO detection
86
+ detection_results = self.yolo_model.predict(source=input_image_path)
87
+ first_detection_result = detection_results[0]
88
+ newly_detected_boxes = None
89
+ all_processed_boxes = []
90
+
91
+ # Add existing boxes if provided
92
+ if existing_boxes:
93
+ all_processed_boxes.extend(existing_boxes)
94
+
95
+ # Filter boxes by confidence threshold
96
+ if first_detection_result.boxes is not None:
97
+ high_confidence_filter = first_detection_result.boxes.conf >= confidence_threshold
98
+ if high_confidence_filter.sum() > 0:
99
+ first_detection_result.boxes = first_detection_result.boxes[high_confidence_filter]
100
+ newly_detected_boxes = self.extract_bounding_boxes(first_detection_result.boxes)
101
+ all_processed_boxes.extend(self.extract_bounding_boxes(first_detection_result.boxes))
102
+
103
+ # Process and extend boxes
104
+ all_processed_boxes = self.pre_all_processed_boxes(all_processed_boxes, image_width, image_height)
105
+
106
+ # Crop and save detected panels
107
+ self.crop_and_save_detected_panels(newly_detected_boxes)
108
+
109
+ # Save prediction visualization
110
+ visualization_result = first_detection_result.plot()
111
+ constant.INDEX += 1
112
+ debug_output_path = f"{self.config.output_folder}/{constant.INDEX:04d}_debug.jpg"
113
+ Image.fromarray(visualization_result[..., ::-1]).save(debug_output_path)
114
+
115
+ # Create black and white mask
116
+ constant.INDEX += 1
117
+ masked_output_path = f"{self.config.output_folder}/{constant.INDEX:04d}_draw_black.jpg"
118
+ masked_image_path = utils.draw_black(self.config.org_input_path, all_processed_boxes, masked_output_path, stripe=False)
119
+ return masked_image_path, newly_detected_boxes
120
+
121
+ # Process boxes even if no new detections
122
+ all_processed_boxes = self.pre_all_processed_boxes(all_processed_boxes, image_width, image_height)
123
+
124
+ constant.INDEX += 1
125
+ masked_output_path = f"{self.config.output_folder}/{constant.INDEX:04d}_draw_black.jpg"
126
+ masked_image_path = utils.draw_black(self.config.org_input_path, all_processed_boxes, masked_output_path, stripe=False)
127
+ return masked_image_path, newly_detected_boxes
128
+
129
+ def check_for_remaining_similarity(self, current_processed_image_path, existing_boxes):
130
+ # Get image dimensions
131
+ with Image.open(self.config.org_input_path) as input_image:
132
+ image_width, image_height = input_image.size
133
+
134
+ all_processed_boxes = self.pre_all_processed_boxes(existing_boxes, image_width, image_height)
135
+
136
+ constant.INDEX += 1
137
+ similar_remaining_regions_path = f"{self.config.output_folder}/{constant.INDEX:04d}_remaining_similarity_debug.jpg"
138
+ similar_remaining_box = utils.find_similar_remaining_regions(all_processed_boxes, (image_width, image_height), similar_remaining_regions_path)
139
+ if similar_remaining_box:
140
+ print(similar_remaining_box)
141
+ self.crop_and_save_detected_panels(similar_remaining_box)
142
+ existing_boxes.extend(similar_remaining_box)
143
+
144
+ all_processed_boxes = self.pre_all_processed_boxes(existing_boxes, image_width, image_height)
145
+
146
+ constant.INDEX += 1
147
+ current_processed_image_path = f"{self.config.output_folder}/{constant.INDEX:04d}_remaining_similarity_draw_black.jpg"
148
+ current_processed_image_path = utils.draw_black(self.config.org_input_path, all_processed_boxes, current_processed_image_path, stripe=False)
149
+
150
+ return current_processed_image_path, existing_boxes
151
+
152
+ def extract_panel_via_llm(input_image_path, config=None):
153
+ """Main function to extract panels using various image processing techniques."""
154
+ # Initialize configuration
155
+ extractor_config = config or Config()
156
+ extractor_config.org_input_path = input_image_path
157
+
158
+ # Clean output folder
159
+ shutil.rmtree(extractor_config.output_folder, ignore_errors=True)
160
+
161
+ # Initialize extractor
162
+ panel_extractor = LLMPanelExtractor(extractor_config)
163
+
164
+ current_processed_image_path = extractor_config.org_input_path
165
+ accumulated_detected_boxes = []
166
+ all_processed_boxes = []
167
+
168
+ # Get original image dimensions
169
+ with Image.open(current_processed_image_path) as original_image:
170
+ original_width, original_height = original_image.size
171
+
172
+ # Define image processing techniques to try
173
+ processing_techniques = [
174
+ {
175
+ 'name': 'clahe',
176
+ 'function': utils.convert_to_clahe,
177
+ 'confidence_level': 1.0,
178
+ 'description': 'CLAHE (Contrast Limited Adaptive Histogram Equalization)'
179
+ },
180
+ {
181
+ 'name': 'grayscale',
182
+ 'function': utils.convert_to_grayscale_pil,
183
+ 'confidence_level': 1.0,
184
+ 'description': 'Grayscale conversion'
185
+ },
186
+ {
187
+ 'name': 'lab_l',
188
+ 'function': utils.convert_to_lab_l,
189
+ 'confidence_level': 1.0,
190
+ 'description': 'LAB L-channel extraction'
191
+ },
192
+ {
193
+ 'name': 'group_color',
194
+ 'function': utils.convert_to_group_colors,
195
+ 'confidence_level': 0.1,
196
+ 'image_path': extractor_config.org_input_path,
197
+ 'description': 'Group Color extraction'
198
+ }
199
+ ]
200
+
201
+ # Process with different techniques until white ratio threshold is met
202
+ for technique in processing_techniques:
203
+ iteration_count = 0
204
+ confidence_level = technique["confidence_level"]
205
+ if technique.get("image_path", None) and utils.box_covered_ratio(panel_extractor.pre_all_processed_boxes(accumulated_detected_boxes, original_width, original_height), (original_width, original_height)) < 0.95:
206
+ current_processed_image_path = technique.get("image_path")
207
+
208
+ while (utils.box_covered_ratio(panel_extractor.pre_all_processed_boxes(accumulated_detected_boxes, original_width, original_height), (original_width, original_height)) < 0.95 and confidence_level > 0):
209
+
210
+ print(f"\n{technique['description']} process iteration: {iteration_count} confidence level: {confidence_level}")
211
+ iteration_count += 1
212
+ confidence_level -= 0.1
213
+
214
+ # Apply image processing technique
215
+ constant.INDEX += 1
216
+ processed_output_path = f"{extractor_config.output_folder}/{constant.INDEX:04d}_convert_to_{technique['name']}.jpg"
217
+ current_processed_image_path = technique['function'](current_processed_image_path, processed_output_path)
218
+
219
+ # Run panel detection on processed image
220
+ current_processed_image_path, newly_detected_boxes = panel_extractor.detect_and_extract_panels(
221
+ input_image_path=current_processed_image_path,
222
+ existing_boxes=accumulated_detected_boxes,
223
+ confidence_threshold=confidence_level
224
+ )
225
+ if newly_detected_boxes:
226
+ accumulated_detected_boxes.extend(newly_detected_boxes)
227
+
228
+ current_processed_image_path, accumulated_detected_boxes = panel_extractor.check_for_remaining_similarity(current_processed_image_path, accumulated_detected_boxes)
229
+ all_processed_boxes = panel_extractor.pre_all_processed_boxes(accumulated_detected_boxes, original_width, original_height)
230
+
231
+ all_path = [file for file in os.listdir(extractor_config.output_folder) if "_panel_" in file]
232
+
233
+ print(f"Processing complete. Final result saved to: {extractor_config.output_folder}")
234
+ print(f"Total panels detected: {len(all_path)}")
235
+ return all_path, accumulated_detected_boxes, all_processed_boxes
236
+
237
+
238
+ if __name__ == "__main__":
239
+ import argparse
240
+
241
+ # Parse command-line arguments
242
+ argument_parser = argparse.ArgumentParser(description="Run panel extractor on an image")
243
+ argument_parser.add_argument("--input", type=str, required=True, help="Path to input image")
244
+ parsed_arguments = argument_parser.parse_args()
245
+
246
+ final_result_path, total_detected_boxes = extract_panel_via_llm(parsed_arguments.input)