import cv2 import pytesseract from PIL import Image, ImageDraw, ImageFont import numpy as np import argparse import io import base64 import time import logging from concurrent.futures import ThreadPoolExecutor, TimeoutError # Setup logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class AndroidEditor: def __init__(self, font_path="Roboto-Regular.ttf"): self.font_path = font_path def _perform_ocr(self, image_bytes): """Perform OCR with timeout""" def ocr_task(): image_stream = io.BytesIO(image_bytes) pil_image = Image.open(image_stream).convert('RGB') image_array = np.array(pil_image) image_bgr = cv2.cvtColor(image_array, cv2.COLOR_RGB2BGR) height = image_bgr.shape[0] mid_point = height // 2 image_bgr = image_bgr[:mid_point, :] return pytesseract.image_to_data(image_bgr, output_type=pytesseract.Output.DICT) try: with ThreadPoolExecutor(max_workers=1) as executor: future = executor.submit(ocr_task) result = future.result(timeout=30) # 30 second timeout return result except TimeoutError: logger.error("OCR operation timed out") return None except Exception as e: logger.error(f"OCR error: {str(e)}") return None @staticmethod def find_text_position(text_list, target_text, start_idx=0): """Mencari posisi teks target dalam list teks""" for i in range(start_idx, len(text_list)): if target_text in text_list[i]: return i return None @staticmethod def get_position_data(extracted_data, idx): """Mendapatkan data posisi dari indeks tertentu""" if idx is None or idx >= len(extracted_data['left']): return None return { "left": extracted_data['left'][idx], "top": extracted_data['top'][idx], "width": extracted_data['width'][idx], "height": extracted_data['height'][idx], } @staticmethod def is_dark_mode(bg_color): """Mendeteksi apakah background menggunakan dark mode berdasarkan kecerahan warna""" r, g, b = float(bg_color[0]), float(bg_color[1]), float(bg_color[2]) brightness = (r * 299 + g * 587 + b * 114) / 1000 return brightness < 128 @staticmethod def parse_anggota(anggota_str, ocr_count=None): """Parse anggota parameter and handle + prefix for addition""" if anggota_str.startswith('+'): # If starts with +, add to existing OCR count if ocr_count is None: logger.error("OCR count is None, cannot perform addition") return None try: addition = int(anggota_str[1:]) result = ocr_count + addition logger.info(f"Adding {addition} to OCR count {ocr_count} = {result}") return result except ValueError: logger.error(f"Invalid number format in anggota: {anggota_str}") return None else: # Direct number try: result = int(anggota_str) logger.info(f"Using direct anggota value: {result}") return result except ValueError: logger.error(f"Invalid number format in anggota: {anggota_str}") return None def process_image(self, image_path, anggota): start_time = time.time() image = cv2.imread(image_path) if image is None: logger.error("Failed to read image") return None # First, get the original OCR count for potential addition original_result = self._process_core(image, "0", show_preview=False, get_ocr_count_only=True) if original_result is None: logger.warning("Could not extract OCR count, using 0 as default") ocr_count = 0 else: ocr_count = original_result # Parse anggota parameter parsed_anggota = self.parse_anggota(anggota, ocr_count) if parsed_anggota is None: logger.error("Invalid anggota parameter") return None result = self._process_core(image, str(parsed_anggota), show_preview=True) end_time = time.time() logger.info(f"Total processing time: {end_time - start_time:.2f} seconds") return result def process_image_bytes(self, image_bytes, anggota): start_time = time.time() image_stream = io.BytesIO(image_bytes) pil_image = Image.open(image_stream).convert('RGB') image = np.array(pil_image) # First, get the original OCR count for potential addition original_result = self._process_core(image, "0", show_preview=False, get_ocr_count_only=True) if original_result is None: logger.warning("Could not extract OCR count, using 0 as default") ocr_count = 0 else: ocr_count = original_result # Parse anggota parameter parsed_anggota = self.parse_anggota(anggota, ocr_count) if parsed_anggota is None: logger.error("Invalid anggota parameter") return None, None result, theme = self._process_core(image, str(parsed_anggota), show_preview=False, return_theme=True) if result is not None: pil_result = Image.fromarray(result) output_io = io.BytesIO() pil_result.save(output_io, format='PNG') img_b64 = base64.b64encode(output_io.getvalue()).decode('utf-8') end_time = time.time() logger.info(f"Total processing time: {end_time - start_time:.2f} seconds") return img_b64, theme return None, None def _process_core(self, image, anggota, show_preview=False, return_theme=False, get_ocr_count_only=False): # Ensure image is in BGR format for OpenCV if len(image.shape) == 3 and image.shape[2] == 3: if image.dtype != np.uint8: image = image.astype(np.uint8) else: logger.error("Invalid image format") return None # Convert image to bytes for OCR _, img_encoded = cv2.imencode('.png', image) img_bytes = img_encoded.tobytes() # Perform OCR extracted_data = self._perform_ocr(img_bytes) if extracted_data is None: return None text_list = extracted_data['text'] # Inisialisasi variabel posisi group_position = None split_position = None member_position = None member_count_position = None second_member_position = None second_member_count_position = None lang = '' group_idx = self.find_text_position(text_list, "Grup") if group_idx is None: group_idx = self.find_text_position(text_list, "Group") if group_idx is not None: lang = 'id' if "Grup" in text_list[group_idx] else 'en' group_position = self.get_position_data(extracted_data, group_idx) split_idx = self.find_text_position(text_list, "·", group_idx) if split_idx is None: for i in range(group_idx, min(group_idx + 4, len(text_list))): if "-" in text_list[i]: split_idx = i break split_position = self.get_position_data(extracted_data, split_idx) member_idx = self.find_text_position(text_list, "anggota", group_idx) if member_idx is None: member_idx = self.find_text_position(text_list, "member", group_idx) member_position = self.get_position_data(extracted_data, member_idx) for i in range(group_idx, min(group_idx + 5, len(text_list))): if text_list[i].isdigit(): member_count_position = self.get_position_data(extracted_data, i) break second_member_idx = self.find_text_position(text_list, "Anggota", group_idx + 4) if second_member_idx is not None: second_member_position = self.get_position_data(extracted_data, second_member_idx) for i in range(second_member_idx - 3, second_member_idx): if i >= 0 and text_list[i].isdigit(): second_member_count_position = self.get_position_data(extracted_data, i) break else: return None if member_position is None: return None # If only getting OCR count, return the original count if get_ocr_count_only: # Find the original member count from OCR for i in range(group_idx, min(group_idx + 5, len(text_list))): if text_list[i].isdigit(): try: ocr_count = int(text_list[i]) logger.info(f"Found OCR count: {ocr_count}") return ocr_count except ValueError: continue logger.warning("No valid OCR count found, returning 0") return 0 # Mengambil warna dari pojok kanan layar, sedikit ke kiri image_width = image.shape[1] image_height = image.shape[0] x = image_width - 50 # 50 pixel dari pojok kanan (lebih ke kiri dari sebelumnya) y = image_height // 5 bg_color = image[y, x] rgb = (int(bg_color[0]), int(bg_color[1]), int(bg_color[2])) # Visualisasi titik sampling warna is_dark = self.is_dark_mode(bg_color) theme = 'Dark Mode' if is_dark else 'Light Mode' text_color = (147,151,154,255) if is_dark else (90, 94, 95, 255) for position in [group_position, split_position, member_position, member_count_position, second_member_position, second_member_count_position]: if position: margin_horizontal = 10 cv2.rectangle( image, (position['left'] - margin_horizontal, position['top'] - 5), (position['left'] + position['width'] + margin_horizontal, position['top'] + position['height']), rgb, -1, ) def adjust_font_size(draw, text, original_height, original_width, font_size): min_size = int(original_height * 1.4) # Increased from 1.2 max_size = int(original_height * 2.2) # Increased from 2.0 max_iterations = 5 # Reduced from 10 iteration = 0 while min_size <= max_size and iteration < max_iterations: font = ImageFont.truetype(self.font_path, font_size) text_bbox = draw.textbbox((0, 0), text, font=font) text_height = text_bbox[3] - text_bbox[1] text_width = text_bbox[2] - text_bbox[0] if abs(text_height - original_height) <= 4 and text_width <= original_width * 1.4: # Increased tolerance and width limit break if text_height > original_height or text_width > original_width * 1.4: font_size = int(font_size * 0.961) # More gradual reduction else: font_size = int(font_size * 1.02) # More gradual increase font_size = max(min_size, min(max_size, font_size)) iteration += 1 return font_size, text_bbox if member_count_position: updated_member_count = { 'id': f"Grup · {anggota} anggota", 'en': f"Group · {anggota} members" }.get(lang) original_height = member_count_position['height'] original_width = member_count_position['width'] font_size = int(original_height * 1.8) # Increased from 1.6 image_pil = Image.fromarray(image) draw = ImageDraw.Draw(image_pil) font_size, text_bbox = adjust_font_size(draw, updated_member_count, original_height, original_width, font_size) font = ImageFont.truetype(self.font_path, font_size) text_width = text_bbox[2] - text_bbox[0] image_width = image.shape[1] text_x = (image_width - text_width) // 2 text_y = member_count_position['top'] - 5 draw.text((text_x, text_y), updated_member_count, font=font, fill=text_color) image = np.array(image_pil) if second_member_count_position: updated_second_member_count = { 'id': f"{anggota} Anggota", 'en': f"{anggota} Members" }.get(lang) original_height = second_member_count_position['height'] original_width = second_member_count_position['width'] font_size = int(original_height * 1.8) # Increased from 1.6 image_pil = Image.fromarray(image) draw = ImageDraw.Draw(image_pil) font_size, text_bbox = adjust_font_size(draw, updated_second_member_count, original_height, original_width, font_size) font = ImageFont.truetype(self.font_path, font_size) text_width = text_bbox[2] - text_bbox[0] text_x = second_member_count_position['left'] text_y = second_member_count_position['top'] - 5 draw.text((text_x, text_y), updated_second_member_count, font=font, fill=text_color) image = np.array(image_pil) # cv2.imwrite('output.png', image) # if show_preview: # cv2.imshow('Preview (Tekan q untuk keluar)', image) # while True: # key = cv2.waitKey(1) & 0xFF # if key == ord('q'): # break # cv2.destroyAllWindows() # Return result result = image if not return_theme else (image, theme) return result if __name__ == '__main__': parser = argparse.ArgumentParser(description='Proses gambar grup') parser.add_argument('image_path', help='Path ke file gambar') parser.add_argument('anggota', help='Jumlah anggota (bisa menggunakan + untuk menambah ke jumlah yang ada, contoh: +5 untuk menambah 5)') args = parser.parse_args() editor = AndroidEditor() editor.process_image(args.image_path, args.anggota) # Contoh penggunaan: # python android.py image.png 10 # Set jumlah anggota menjadi 10 # python android.py image.png +5 # Tambah 5 ke jumlah anggota yang ada di OCR # python android.py image.png +10 # Tambah 10 ke jumlah anggota yang ada di OCR