import cv2 import pytesseract from PIL import Image, ImageDraw, ImageFont import numpy as np import argparse import io import base64 import time import logging from concurrent.futures import ThreadPoolExecutor, TimeoutError # Setup logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class IPhoneProcessor: def __init__(self, font_path="SF-Pro-Display-Regular.otf"): self.font_path = font_path def process(self, image, anggota, extracted_data, text_list, show_preview=False, return_theme=False): """Process iPhone style images using iPhone logic""" candidates = [] for i, text in enumerate(text_list): if text.isdigit(): for offset in [1,2]: idx = i + offset if idx < len(text_list): next_text = text_list[idx].lower() if next_text in ["anggota", "members", "member"]: # Cari Grup/Group terdekat di atas group_idx = None for j in range(i-1, max(-1, i-10), -1): if text_list[j] in ["Grup", "Group"]: group_idx = j break # Cari split '-' di antara split_idx = None for j in range(group_idx+1 if group_idx is not None else i, i): if text_list[j] == "-": split_idx = j break # Simpan kandidat candidates.append({ 'group_idx': group_idx, 'split_idx': split_idx, 'number_idx': i, 'member_idx': idx }) best = None min_dist = 1e9 for c in candidates: if c['group_idx'] is not None: y_group = extracted_data['top'][c['group_idx']] y_member = extracted_data['top'][c['member_idx']] dist = abs(y_group - y_member) if dist < min_dist: min_dist = dist best = c if not best: logger.error("No valid text pattern found") return None group_idx = best['group_idx'] split_idx = best['split_idx'] number_idx = best['number_idx'] member_idx = best['member_idx'] lang = 'id' if text_list[group_idx] == "Grup" else 'en' # Ambil posisi group_position = { "left": extracted_data['left'][group_idx], "top": extracted_data['top'][group_idx], "width": extracted_data['width'][group_idx], "height": extracted_data['height'][group_idx], } member_position = { "left": extracted_data['left'][member_idx], "top": extracted_data['top'][member_idx], "width": extracted_data['width'][member_idx], "height": extracted_data['height'][member_idx], } member_count_position = { "left": extracted_data['left'][number_idx], "top": extracted_data['top'][number_idx], "width": extracted_data['width'][number_idx], "height": extracted_data['height'][number_idx], } split_position = None if split_idx is not None: split_position = { "left": extracted_data['left'][split_idx], "top": extracted_data['top'][split_idx], "width": extracted_data['width'][split_idx], "height": extracted_data['height'][split_idx], } # Ambil warna background di sekitar member_position x = member_position['left'] + member_position['width'] + 10 y = member_position['top'] + member_position['height'] // 2 bg_color = image[y, x] rgb = (int(bg_color[0]), int(bg_color[1]), int(bg_color[2])) # Deteksi tema r, g, b = float(bg_color[0]), float(bg_color[1]), float(bg_color[2]) brightness = (r * 299 + g * 587 + b * 114) / 1000 is_dark = brightness < 128 theme = 'Dark Mode' if is_dark else 'Light Mode' font_color = (145, 144, 144, 255) if is_dark else (90, 94, 95, 255) margin = 10 # Masking area for pos in [group_position, split_position, member_position, member_count_position]: if pos: cv2.rectangle( image, (pos['left'] - margin, pos['top'] - margin), (pos['left'] + pos['width'] + margin, pos['top'] + pos['height'] + margin), rgb, -1, ) # Teks baru updated_member_count = { 'id': f"Grup · {anggota} anggota", 'en': f"Group · {anggota} members" }.get(lang, f"Group · {anggota} members") # Penyesuaian font size original_height = member_count_position['height'] original_width = member_count_position['width'] font_size = int(original_height * 1.9) image_pil = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) draw = ImageDraw.Draw(image_pil) min_size = int(original_height * 1.4) max_size = int(original_height * 2.3) max_iterations = 5 iteration = 0 while min_size <= max_size and iteration < max_iterations: font = ImageFont.truetype(self.font_path, font_size) text_bbox = draw.textbbox((0, 0), updated_member_count, font=font) text_height = text_bbox[3] - text_bbox[1] text_width = text_bbox[2] - text_bbox[0] if abs(text_height - original_height) <= 2 and text_width <= original_width * 2: break if text_height > original_height or text_width > original_width * 2: font_size = int(font_size * 0.95) else: font_size = int(font_size * 1.05) font_size = max(min_size, min(max_size, font_size)) iteration += 1 # Center posisi antara group dan member top_y = min(group_position['top'], member_position['top']) - margin bot_y = max(group_position['top'] + group_position['height'], member_position['top'] + member_position['height']) + margin left_x = min(group_position['left'], member_position['left']) - margin right_x = max(member_position['left'] + member_position['width'], group_position['left'] + group_position['width']) + margin center_x = (left_x + right_x) // 2 center_y = (top_y + bot_y) // 2 text_bbox = draw.textbbox((0, 0), updated_member_count, font=font) text_width = text_bbox[2] - text_bbox[0] text_height = text_bbox[3] - text_bbox[1] text_x = center_x - (text_width // 2) text_y = center_y - (text_height // 2) draw.text((text_x, text_y), updated_member_count, font=font, fill=font_color) image = cv2.cvtColor(np.array(image_pil), cv2.COLOR_BGR2RGB) cv2.imwrite('output.png', image) result = image if not return_theme else (image, theme, "iPhone") return result class AndroidProcessor: def __init__(self, font_path="Roboto-Regular.ttf"): self.font_path = font_path @staticmethod def find_text_position(text_list, target_text, start_idx=0): """Mencari posisi teks target dalam list teks""" for i in range(start_idx, len(text_list)): if target_text in text_list[i]: return i return None @staticmethod def get_position_data(extracted_data, idx): """Mendapatkan data posisi dari indeks tertentu""" if idx is None or idx >= len(extracted_data['left']): return None return { "left": extracted_data['left'][idx], "top": extracted_data['top'][idx], "width": extracted_data['width'][idx], "height": extracted_data['height'][idx], } @staticmethod def is_dark_mode(bg_color): """Mendeteksi apakah background menggunakan dark mode berdasarkan kecerahan warna""" r, g, b = float(bg_color[0]), float(bg_color[1]), float(bg_color[2]) brightness = (r * 299 + g * 587 + b * 114) / 1000 return brightness < 128 def process(self, image, anggota, extracted_data, text_list, show_preview=False, return_theme=False): """Process Android style images using Android logic""" try: # Inisialisasi variabel posisi group_position = None split_position = None member_position = None member_count_position = None second_member_position = None second_member_count_position = None lang = '' group_idx = self.find_text_position(text_list, "Grup") if group_idx is None: group_idx = self.find_text_position(text_list, "Group") logger.info(f"Found group_idx: {group_idx}") if group_idx is not None: lang = 'id' if "Grup" in text_list[group_idx] else 'en' group_position = self.get_position_data(extracted_data, group_idx) split_idx = self.find_text_position(text_list, "·", group_idx) if split_idx is None: for i in range(group_idx, min(group_idx + 4, len(text_list))): if "-" in text_list[i]: split_idx = i break split_position = self.get_position_data(extracted_data, split_idx) member_idx = self.find_text_position(text_list, "anggota", group_idx) if member_idx is None: member_idx = self.find_text_position(text_list, "member", group_idx) member_position = self.get_position_data(extracted_data, member_idx) logger.info(f"Found member_idx: {member_idx}") for i in range(group_idx, min(group_idx + 5, len(text_list))): if text_list[i].isdigit(): member_count_position = self.get_position_data(extracted_data, i) break second_member_idx = self.find_text_position(text_list, "Anggota", group_idx + 4) if second_member_idx is not None: second_member_position = self.get_position_data(extracted_data, second_member_idx) for i in range(second_member_idx - 3, second_member_idx): if i >= 0 and text_list[i].isdigit(): second_member_count_position = self.get_position_data(extracted_data, i) break else: logger.error("No group text found in image") return None if member_position is None: logger.error("No member text found in image") return None # Mengambil warna dari pojok kanan layar, sedikit ke kiri image_width = image.shape[1] image_height = image.shape[0] x = image_width - 50 # 50 pixel dari pojok kanan (lebih ke kiri dari sebelumnya) y = image_height // 5 # Tengah vertikal bg_color = image[y, x] rgb = (int(bg_color[0]), int(bg_color[1]), int(bg_color[2])) is_dark = self.is_dark_mode(bg_color) theme = 'Dark Mode' if is_dark else 'Light Mode' text_color = (147,151,154,255) if is_dark else (90, 94, 95, 255) for position in [group_position, split_position, member_position, member_count_position, second_member_position, second_member_count_position]: if position: margin_horizontal = 10 cv2.rectangle( image, (position['left'] - margin_horizontal, position['top'] - 5), (position['left'] + position['width'] + margin_horizontal, position['top'] + position['height']), rgb, -1, ) def adjust_font_size(draw, text, original_height, original_width, font_size): min_size = int(original_height * 1.4) max_size = int(original_height * 2.2) max_iterations = 5 iteration = 0 while min_size <= max_size and iteration < max_iterations: font = ImageFont.truetype(self.font_path, font_size) text_bbox = draw.textbbox((0, 0), text, font=font) text_height = text_bbox[3] - text_bbox[1] text_width = text_bbox[2] - text_bbox[0] if abs(text_height - original_height) <= 4 and text_width <= original_width * 1.4: break if text_height > original_height or text_width > original_width * 1.4: font_size = int(font_size * 0.961) else: font_size = int(font_size * 1.02) font_size = max(min_size, min(max_size, font_size)) iteration += 1 return font_size, text_bbox if member_count_position: updated_member_count = { 'id': f"Grup · {anggota} anggota", 'en': f"Group · {anggota} members" }.get(lang) original_height = member_count_position['height'] original_width = member_count_position['width'] font_size = int(original_height * 1.8) image_pil = Image.fromarray(image) draw = ImageDraw.Draw(image_pil) font_size, text_bbox = adjust_font_size(draw, updated_member_count, original_height, original_width, font_size) font = ImageFont.truetype(self.font_path, font_size) text_width = text_bbox[2] - text_bbox[0] image_width = image.shape[1] text_x = (image_width - text_width) // 2 text_y = member_count_position['top'] - 5 draw.text((text_x, text_y), updated_member_count, font=font, fill=text_color) image = np.array(image_pil) if second_member_count_position: updated_second_member_count = { 'id': f"{anggota} Anggota", 'en': f"{anggota} Members" }.get(lang) original_height = second_member_count_position['height'] original_width = second_member_count_position['width'] font_size = int(original_height * 1.8) image_pil = Image.fromarray(image) draw = ImageDraw.Draw(image_pil) font_size, text_bbox = adjust_font_size(draw, updated_second_member_count, original_height, original_width, font_size) font = ImageFont.truetype(self.font_path, font_size) text_width = text_bbox[2] - text_bbox[0] text_x = second_member_count_position['left'] text_y = second_member_count_position['top'] - 5 draw.text((text_x, text_y), updated_second_member_count, font=font, fill=text_color) image = np.array(image_pil) cv2.imwrite('output.png', image) # if show_preview: # cv2.imshow('Preview (Tekan q untuk keluar)', image) # while True: # key = cv2.waitKey(1) & 0xFF # if key == ord('q'): # break # cv2.destroyAllWindows() result = image if not return_theme else (image, theme, "Android") return result except Exception as e: logger.error(f"Error in Android processor: {str(e)}") return None class UnifiedEditor: def __init__(self, iphone_font_path="SF-Pro-Display-Regular.otf", android_font_path="Roboto-Regular.ttf"): self.iphone_processor = IPhoneProcessor(iphone_font_path) self.android_processor = AndroidProcessor(android_font_path) @staticmethod def parse_anggota(anggota_str, ocr_count=None): """Parse anggota parameter and handle + prefix for addition or return string as is""" if anggota_str.startswith('+'): # If starts with +, add to existing OCR count if ocr_count is None: logger.error("OCR count is None, cannot perform addition") return None try: addition = int(anggota_str[1:]) result = ocr_count + addition logger.info(f"Adding {addition} to OCR count {ocr_count} = {result}") return result except ValueError: logger.error(f"Invalid number format in anggota: {anggota_str}") return None else: # Try to convert to number first try: result = int(anggota_str) logger.info(f"Using direct anggota value: {result}") return result except ValueError: # If not a number, treat as string logger.info(f"Using string anggota value: {anggota_str}") return anggota_str def _perform_ocr(self, image_bytes): """Perform OCR with timeout""" def ocr_task(): image_stream = io.BytesIO(image_bytes) pil_image = Image.open(image_stream).convert('RGB') image_array = np.array(pil_image) image_bgr = cv2.cvtColor(image_array, cv2.COLOR_RGB2BGR) height = image_bgr.shape[0] mid_point = height // 2 image_bgr = image_bgr[:mid_point, :] return pytesseract.image_to_data(image_bgr, output_type=pytesseract.Output.DICT) try: with ThreadPoolExecutor(max_workers=1) as executor: future = executor.submit(ocr_task) result = future.result(timeout=30) return result except TimeoutError: logger.error("OCR operation timed out") return None except Exception as e: logger.error(f"OCR error: {str(e)}") return None def detect_platform(self, text_list): """Deteksi platform berdasarkan pola teks""" window_size = 5 for i in range(len(text_list) - window_size + 1): window = " ".join(text_list[i:i+window_size]).lower() if ("info" in window and "grup" in window) or ("info" in window and "group" in window): return "iPhone" return "Android" def _find_text_position(self, text_list, target_text, start_idx=0): """Mencari posisi teks target dalam list teks""" for i in range(start_idx, len(text_list)): if target_text in text_list[i]: return i return None def _get_ocr_count(self, image, extracted_data, text_list): """Get OCR count from image for addition operations""" try: # Try to find group index first group_idx = self._find_text_position(text_list, "Grup") if group_idx is None: group_idx = self._find_text_position(text_list, "Group") if group_idx is not None: # Find the original member count from OCR for i in range(group_idx, min(group_idx + 5, len(text_list))): if text_list[i].isdigit(): try: ocr_count = int(text_list[i]) logger.info(f"Found OCR count: {ocr_count}") return ocr_count except ValueError: continue logger.warning("No valid OCR count found, returning 0") return 0 except Exception as e: logger.error(f"Error getting OCR count: {str(e)}") return 0 def process_image(self, image_path, anggota): start_time = time.time() image = cv2.imread(image_path) if image is None: logger.error("Failed to read image") return None, None, None # First, get the original OCR count for potential addition _, img_encoded = cv2.imencode('.png', image) img_bytes = img_encoded.tobytes() extracted_data = self._perform_ocr(img_bytes) if extracted_data is None: return None, None, None text_list = extracted_data['text'] ocr_count = self._get_ocr_count(image, extracted_data, text_list) # Parse anggota parameter parsed_anggota = self.parse_anggota(anggota, ocr_count) if parsed_anggota is None: logger.error("Invalid anggota parameter") return None, None, None result = self._process_core(image, str(parsed_anggota), show_preview=True, return_theme=True) if result is None: return None, None, None result, theme, platform = result end_time = time.time() logger.info(f"Total processing time: {end_time - start_time:.2f} seconds") return result, theme, platform def process_image_bytes(self, image_bytes, anggota): start_time = time.time() try: image_stream = io.BytesIO(image_bytes) pil_image = Image.open(image_stream).convert('RGB') image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR) # First, get the original OCR count for potential addition _, img_encoded = cv2.imencode('.png', image) img_bytes_for_ocr = img_encoded.tobytes() extracted_data = self._perform_ocr(img_bytes_for_ocr) if extracted_data is None: return None, None, None text_list = extracted_data['text'] ocr_count = self._get_ocr_count(image, extracted_data, text_list) # Parse anggota parameter parsed_anggota = self.parse_anggota(anggota, ocr_count) if parsed_anggota is None: logger.error("Invalid anggota parameter") return None, None, None result = self._process_core(image, str(parsed_anggota), show_preview=False, return_theme=True) if result is None: return None, None, None result, theme, platform = result if result is not None: pil_result = Image.fromarray(cv2.cvtColor(result, cv2.COLOR_BGR2RGB)) output_io = io.BytesIO() pil_result.save(output_io, format='PNG') img_b64 = base64.b64encode(output_io.getvalue()).decode('utf-8') end_time = time.time() logger.info(f"Total processing time: {end_time - start_time:.2f} seconds") return img_b64, theme, platform logger.error("Result image is None") return None, None, None except Exception as e: logger.error(f"Error in process_image_bytes: {str(e)}") return None, None, None def _process_core(self, image, anggota, show_preview=False, return_theme=False): try: # Convert image to bytes for OCR _, img_encoded = cv2.imencode('.png', image) img_bytes = img_encoded.tobytes() # Perform OCR extracted_data = self._perform_ocr(img_bytes) if extracted_data is None: return None text_list = extracted_data['text'] # Deteksi platform platform = self.detect_platform(text_list) logger.info(f"Detected platform: {platform}") if platform == "iPhone": return self.iphone_processor.process(image, anggota, extracted_data, text_list, show_preview, return_theme) else: return self.android_processor.process(image, anggota, extracted_data, text_list, show_preview, return_theme) except Exception as e: logger.error(f"Error in _process_core: {str(e)}") return None if __name__ == '__main__': parser = argparse.ArgumentParser(description='Proses gambar grup unified') parser.add_argument('image_path', help='Path ke file gambar') parser.add_argument('anggota', help='Jumlah anggota (bisa menggunakan + untuk menambah ke jumlah yang ada, contoh: +5 untuk menambah 5)') args = parser.parse_args() editor = UnifiedEditor() result, theme, platform = editor.process_image(args.image_path, args.anggota) if result is not None: print(f"Processing completed successfully!") print(f"Detected platform: {platform}") print(f"Detected theme: {theme}") else: print("Processing failed!") # Contoh penggunaan: # python all.py image.png 10 # Set jumlah anggota menjadi 10 # python all.py image.png +5 # Tambah 5 ke jumlah anggota yang ada di OCR # python all.py image.png +10 # Tambah 10 ke jumlah anggota yang ada di OCR # python all.py image.png "Banyak" # Gunakan string "Banyak" sebagai anggota # python all.py image.png "100+" # Gunakan string "100+" sebagai anggota