Spaces:
Sleeping
Sleeping
| import cv2 | |
| import pytesseract | |
| from PIL import Image, ImageDraw, ImageFont | |
| import numpy as np | |
| import argparse | |
| import io | |
| import base64 | |
| import time | |
| import logging | |
| from concurrent.futures import ThreadPoolExecutor, TimeoutError | |
| # Setup logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| class AndroidEditor: | |
| def __init__(self, font_path="Roboto-Regular.ttf"): | |
| self.font_path = font_path | |
| def _perform_ocr(self, image_bytes): | |
| """Perform OCR with timeout""" | |
| def ocr_task(): | |
| image_stream = io.BytesIO(image_bytes) | |
| pil_image = Image.open(image_stream).convert('RGB') | |
| image_array = np.array(pil_image) | |
| image_bgr = cv2.cvtColor(image_array, cv2.COLOR_RGB2BGR) | |
| height = image_bgr.shape[0] | |
| mid_point = height // 2 | |
| image_bgr = image_bgr[:mid_point, :] | |
| return pytesseract.image_to_data(image_bgr, output_type=pytesseract.Output.DICT) | |
| try: | |
| with ThreadPoolExecutor(max_workers=1) as executor: | |
| future = executor.submit(ocr_task) | |
| result = future.result(timeout=30) # 30 second timeout | |
| return result | |
| except TimeoutError: | |
| logger.error("OCR operation timed out") | |
| return None | |
| except Exception as e: | |
| logger.error(f"OCR error: {str(e)}") | |
| return None | |
| def find_text_position(text_list, target_text, start_idx=0): | |
| """Mencari posisi teks target dalam list teks""" | |
| for i in range(start_idx, len(text_list)): | |
| if target_text in text_list[i]: | |
| return i | |
| return None | |
| def get_position_data(extracted_data, idx): | |
| """Mendapatkan data posisi dari indeks tertentu""" | |
| if idx is None or idx >= len(extracted_data['left']): | |
| return None | |
| return { | |
| "left": extracted_data['left'][idx], | |
| "top": extracted_data['top'][idx], | |
| "width": extracted_data['width'][idx], | |
| "height": extracted_data['height'][idx], | |
| } | |
| def is_dark_mode(bg_color): | |
| """Mendeteksi apakah background menggunakan dark mode berdasarkan kecerahan warna""" | |
| r, g, b = float(bg_color[0]), float(bg_color[1]), float(bg_color[2]) | |
| brightness = (r * 299 + g * 587 + b * 114) / 1000 | |
| return brightness < 128 | |
| def parse_anggota(anggota_str, ocr_count=None): | |
| """Parse anggota parameter and handle + prefix for addition""" | |
| if anggota_str.startswith('+'): | |
| # If starts with +, add to existing OCR count | |
| if ocr_count is None: | |
| logger.error("OCR count is None, cannot perform addition") | |
| return None | |
| try: | |
| addition = int(anggota_str[1:]) | |
| result = ocr_count + addition | |
| logger.info(f"Adding {addition} to OCR count {ocr_count} = {result}") | |
| return result | |
| except ValueError: | |
| logger.error(f"Invalid number format in anggota: {anggota_str}") | |
| return None | |
| else: | |
| # Direct number | |
| try: | |
| result = int(anggota_str) | |
| logger.info(f"Using direct anggota value: {result}") | |
| return result | |
| except ValueError: | |
| logger.error(f"Invalid number format in anggota: {anggota_str}") | |
| return None | |
| def process_image(self, image_path, anggota): | |
| start_time = time.time() | |
| image = cv2.imread(image_path) | |
| if image is None: | |
| logger.error("Failed to read image") | |
| return None | |
| # First, get the original OCR count for potential addition | |
| original_result = self._process_core(image, "0", show_preview=False, get_ocr_count_only=True) | |
| if original_result is None: | |
| logger.warning("Could not extract OCR count, using 0 as default") | |
| ocr_count = 0 | |
| else: | |
| ocr_count = original_result | |
| # Parse anggota parameter | |
| parsed_anggota = self.parse_anggota(anggota, ocr_count) | |
| if parsed_anggota is None: | |
| logger.error("Invalid anggota parameter") | |
| return None | |
| result = self._process_core(image, str(parsed_anggota), show_preview=True) | |
| end_time = time.time() | |
| logger.info(f"Total processing time: {end_time - start_time:.2f} seconds") | |
| return result | |
| def process_image_bytes(self, image_bytes, anggota): | |
| start_time = time.time() | |
| image_stream = io.BytesIO(image_bytes) | |
| pil_image = Image.open(image_stream).convert('RGB') | |
| image = np.array(pil_image) | |
| # First, get the original OCR count for potential addition | |
| original_result = self._process_core(image, "0", show_preview=False, get_ocr_count_only=True) | |
| if original_result is None: | |
| logger.warning("Could not extract OCR count, using 0 as default") | |
| ocr_count = 0 | |
| else: | |
| ocr_count = original_result | |
| # Parse anggota parameter | |
| parsed_anggota = self.parse_anggota(anggota, ocr_count) | |
| if parsed_anggota is None: | |
| logger.error("Invalid anggota parameter") | |
| return None, None | |
| result, theme = self._process_core(image, str(parsed_anggota), show_preview=False, return_theme=True) | |
| if result is not None: | |
| pil_result = Image.fromarray(result) | |
| output_io = io.BytesIO() | |
| pil_result.save(output_io, format='PNG') | |
| img_b64 = base64.b64encode(output_io.getvalue()).decode('utf-8') | |
| end_time = time.time() | |
| logger.info(f"Total processing time: {end_time - start_time:.2f} seconds") | |
| return img_b64, theme | |
| return None, None | |
| def _process_core(self, image, anggota, show_preview=False, return_theme=False, get_ocr_count_only=False): | |
| # Ensure image is in BGR format for OpenCV | |
| if len(image.shape) == 3 and image.shape[2] == 3: | |
| if image.dtype != np.uint8: | |
| image = image.astype(np.uint8) | |
| else: | |
| logger.error("Invalid image format") | |
| return None | |
| # Convert image to bytes for OCR | |
| _, img_encoded = cv2.imencode('.png', image) | |
| img_bytes = img_encoded.tobytes() | |
| # Perform OCR | |
| extracted_data = self._perform_ocr(img_bytes) | |
| if extracted_data is None: | |
| return None | |
| text_list = extracted_data['text'] | |
| # Inisialisasi variabel posisi | |
| group_position = None | |
| split_position = None | |
| member_position = None | |
| member_count_position = None | |
| second_member_position = None | |
| second_member_count_position = None | |
| lang = '' | |
| group_idx = self.find_text_position(text_list, "Grup") | |
| if group_idx is None: | |
| group_idx = self.find_text_position(text_list, "Group") | |
| if group_idx is not None: | |
| lang = 'id' if "Grup" in text_list[group_idx] else 'en' | |
| group_position = self.get_position_data(extracted_data, group_idx) | |
| split_idx = self.find_text_position(text_list, "·", group_idx) | |
| if split_idx is None: | |
| for i in range(group_idx, min(group_idx + 4, len(text_list))): | |
| if "-" in text_list[i]: | |
| split_idx = i | |
| break | |
| split_position = self.get_position_data(extracted_data, split_idx) | |
| member_idx = self.find_text_position(text_list, "anggota", group_idx) | |
| if member_idx is None: | |
| member_idx = self.find_text_position(text_list, "member", group_idx) | |
| member_position = self.get_position_data(extracted_data, member_idx) | |
| for i in range(group_idx, min(group_idx + 5, len(text_list))): | |
| if text_list[i].isdigit(): | |
| member_count_position = self.get_position_data(extracted_data, i) | |
| break | |
| second_member_idx = self.find_text_position(text_list, "Anggota", group_idx + 4) | |
| if second_member_idx is not None: | |
| second_member_position = self.get_position_data(extracted_data, second_member_idx) | |
| for i in range(second_member_idx - 3, second_member_idx): | |
| if i >= 0 and text_list[i].isdigit(): | |
| second_member_count_position = self.get_position_data(extracted_data, i) | |
| break | |
| else: | |
| return None | |
| if member_position is None: | |
| return None | |
| # If only getting OCR count, return the original count | |
| if get_ocr_count_only: | |
| # Find the original member count from OCR | |
| for i in range(group_idx, min(group_idx + 5, len(text_list))): | |
| if text_list[i].isdigit(): | |
| try: | |
| ocr_count = int(text_list[i]) | |
| logger.info(f"Found OCR count: {ocr_count}") | |
| return ocr_count | |
| except ValueError: | |
| continue | |
| logger.warning("No valid OCR count found, returning 0") | |
| return 0 | |
| # Mengambil warna dari pojok kanan layar, sedikit ke kiri | |
| image_width = image.shape[1] | |
| image_height = image.shape[0] | |
| x = image_width - 50 # 50 pixel dari pojok kanan (lebih ke kiri dari sebelumnya) | |
| y = image_height // 5 | |
| bg_color = image[y, x] | |
| rgb = (int(bg_color[0]), int(bg_color[1]), int(bg_color[2])) | |
| # Visualisasi titik sampling warna | |
| is_dark = self.is_dark_mode(bg_color) | |
| theme = 'Dark Mode' if is_dark else 'Light Mode' | |
| text_color = (147,151,154,255) if is_dark else (90, 94, 95, 255) | |
| for position in [group_position, split_position, member_position, member_count_position, | |
| second_member_position, second_member_count_position]: | |
| if position: | |
| margin_horizontal = 10 | |
| cv2.rectangle( | |
| image, | |
| (position['left'] - margin_horizontal, position['top'] - 5), | |
| (position['left'] + position['width'] + margin_horizontal, position['top'] + position['height']), | |
| rgb, | |
| -1, | |
| ) | |
| def adjust_font_size(draw, text, original_height, original_width, font_size): | |
| min_size = int(original_height * 1.4) # Increased from 1.2 | |
| max_size = int(original_height * 2.2) # Increased from 2.0 | |
| max_iterations = 5 # Reduced from 10 | |
| iteration = 0 | |
| while min_size <= max_size and iteration < max_iterations: | |
| font = ImageFont.truetype(self.font_path, font_size) | |
| text_bbox = draw.textbbox((0, 0), text, font=font) | |
| text_height = text_bbox[3] - text_bbox[1] | |
| text_width = text_bbox[2] - text_bbox[0] | |
| if abs(text_height - original_height) <= 4 and text_width <= original_width * 1.4: # Increased tolerance and width limit | |
| break | |
| if text_height > original_height or text_width > original_width * 1.4: | |
| font_size = int(font_size * 0.961) # More gradual reduction | |
| else: | |
| font_size = int(font_size * 1.02) # More gradual increase | |
| font_size = max(min_size, min(max_size, font_size)) | |
| iteration += 1 | |
| return font_size, text_bbox | |
| if member_count_position: | |
| updated_member_count = { | |
| 'id': f"Grup · {anggota} anggota", | |
| 'en': f"Group · {anggota} members" | |
| }.get(lang) | |
| original_height = member_count_position['height'] | |
| original_width = member_count_position['width'] | |
| font_size = int(original_height * 1.8) # Increased from 1.6 | |
| image_pil = Image.fromarray(image) | |
| draw = ImageDraw.Draw(image_pil) | |
| font_size, text_bbox = adjust_font_size(draw, updated_member_count, original_height, original_width, font_size) | |
| font = ImageFont.truetype(self.font_path, font_size) | |
| text_width = text_bbox[2] - text_bbox[0] | |
| image_width = image.shape[1] | |
| text_x = (image_width - text_width) // 2 | |
| text_y = member_count_position['top'] - 5 | |
| draw.text((text_x, text_y), updated_member_count, font=font, fill=text_color) | |
| image = np.array(image_pil) | |
| if second_member_count_position: | |
| updated_second_member_count = { | |
| 'id': f"{anggota} Anggota", | |
| 'en': f"{anggota} Members" | |
| }.get(lang) | |
| original_height = second_member_count_position['height'] | |
| original_width = second_member_count_position['width'] | |
| font_size = int(original_height * 1.8) # Increased from 1.6 | |
| image_pil = Image.fromarray(image) | |
| draw = ImageDraw.Draw(image_pil) | |
| font_size, text_bbox = adjust_font_size(draw, updated_second_member_count, original_height, original_width, font_size) | |
| font = ImageFont.truetype(self.font_path, font_size) | |
| text_width = text_bbox[2] - text_bbox[0] | |
| text_x = second_member_count_position['left'] | |
| text_y = second_member_count_position['top'] - 5 | |
| draw.text((text_x, text_y), updated_second_member_count, font=font, fill=text_color) | |
| image = np.array(image_pil) | |
| # cv2.imwrite('output.png', image) | |
| # if show_preview: | |
| # cv2.imshow('Preview (Tekan q untuk keluar)', image) | |
| # while True: | |
| # key = cv2.waitKey(1) & 0xFF | |
| # if key == ord('q'): | |
| # break | |
| # cv2.destroyAllWindows() | |
| # Return result | |
| result = image if not return_theme else (image, theme) | |
| return result | |
| if __name__ == '__main__': | |
| parser = argparse.ArgumentParser(description='Proses gambar grup') | |
| parser.add_argument('image_path', help='Path ke file gambar') | |
| parser.add_argument('anggota', help='Jumlah anggota (bisa menggunakan + untuk menambah ke jumlah yang ada, contoh: +5 untuk menambah 5)') | |
| args = parser.parse_args() | |
| editor = AndroidEditor() | |
| editor.process_image(args.image_path, args.anggota) | |
| # Contoh penggunaan: | |
| # python android.py image.png 10 # Set jumlah anggota menjadi 10 | |
| # python android.py image.png +5 # Tambah 5 ke jumlah anggota yang ada di OCR | |
| # python android.py image.png +10 # Tambah 10 ke jumlah anggota yang ada di OCR |