ocr / iphone.py
kerenmasku's picture
Upload iphone.py with huggingface_hub
adc2d58 verified
import cv2
import pytesseract
from PIL import Image, ImageDraw, ImageFont
import numpy as np
import argparse
import io
import base64
import time
import logging
from functools import lru_cache
from concurrent.futures import ThreadPoolExecutor, TimeoutError
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class IPhoneEditor:
def __init__(self, font_path="SF-Pro-Display-Regular.otf"):
self.font_path = font_path
self._cache = {}
@lru_cache(maxsize=100)
def _perform_ocr(self, image_bytes):
"""Perform OCR with caching and timeout"""
def ocr_task():
# Convert bytes to PIL Image first
image_stream = io.BytesIO(image_bytes)
pil_image = Image.open(image_stream).convert('RGB')
# Convert PIL Image to numpy array
image_array = np.array(pil_image)
# Convert RGB to BGR for OpenCV
image_bgr = cv2.cvtColor(image_array, cv2.COLOR_RGB2BGR)
# Crop vertically by 50%
height = image_bgr.shape[0]
mid_point = height // 2
image_bgr = image_bgr[:mid_point, :]
return pytesseract.image_to_data(image_bgr, output_type=pytesseract.Output.DICT)
try:
with ThreadPoolExecutor(max_workers=1) as executor:
future = executor.submit(ocr_task)
result = future.result(timeout=30) # 30 second timeout
return result
except TimeoutError:
logger.error("OCR operation timed out")
return None
except Exception as e:
logger.error(f"OCR error: {str(e)}")
return None
@staticmethod
def parse_anggota(anggota_str, ocr_count=None):
"""Parse anggota parameter and handle + prefix for addition"""
if anggota_str.startswith('+'):
# If starts with +, add to existing OCR count
if ocr_count is None:
logger.error("OCR count is None, cannot perform addition")
return None
try:
addition = int(anggota_str[1:])
result = ocr_count + addition
logger.info(f"Adding {addition} to OCR count {ocr_count} = {result}")
return result
except ValueError:
logger.error(f"Invalid number format in anggota: {anggota_str}")
return None
else:
# Direct number
try:
result = int(anggota_str)
logger.info(f"Using direct anggota value: {result}")
return result
except ValueError:
logger.error(f"Invalid number format in anggota: {anggota_str}")
return None
def process_image(self, image_path, anggota):
start_time = time.time()
image = cv2.imread(image_path)
if image is None:
logger.error("Failed to read image")
return None
# First, get the original OCR count for potential addition
original_result = self._process_core(image, "0", show_preview=False, get_ocr_count_only=True)
if original_result is None:
logger.warning("Could not extract OCR count, using 0 as default")
ocr_count = 0
else:
ocr_count = original_result
# Parse anggota parameter
parsed_anggota = self.parse_anggota(anggota, ocr_count)
if parsed_anggota is None:
logger.error("Invalid anggota parameter")
return None
result = self._process_core(image, str(parsed_anggota), show_preview=True)
end_time = time.time()
logger.info(f"Total processing time: {end_time - start_time:.2f} seconds")
return result
def process_image_bytes(self, image_bytes, anggota):
start_time = time.time()
try:
image_stream = io.BytesIO(image_bytes)
pil_image = Image.open(image_stream).convert('RGB')
image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
# First, get the original OCR count for potential addition
original_result = self._process_core(image, "0", show_preview=False, get_ocr_count_only=True)
if original_result is None:
logger.warning("Could not extract OCR count, using 0 as default")
ocr_count = 0
else:
ocr_count = original_result
# Parse anggota parameter
parsed_anggota = self.parse_anggota(anggota, ocr_count)
if parsed_anggota is None:
logger.error("Invalid anggota parameter")
return None, None
result, theme = self._process_core(image, str(parsed_anggota), show_preview=False, return_theme=True)
if result is not None:
pil_result = Image.fromarray(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
output_io = io.BytesIO()
pil_result.save(output_io, format='PNG')
img_b64 = base64.b64encode(output_io.getvalue()).decode('utf-8')
end_time = time.time()
logger.info(f"Total processing time: {end_time - start_time:.2f} seconds")
return img_b64, theme
logger.error("Result image is None")
return None, None
except Exception as e:
logger.error(f"Error in process_image_bytes: {str(e)}")
return None, None
def _process_core(self, image, anggota, show_preview=False, return_theme=False, get_ocr_count_only=False):
# Generate cache key
image_hash = hash(image.tobytes())
cache_key = f"{image_hash}_{anggota}_{get_ocr_count_only}"
# Check cache
if cache_key in self._cache:
logger.info("Using cached result")
return self._cache[cache_key]
try:
# Convert image to bytes for OCR
_, img_encoded = cv2.imencode('.png', image)
img_bytes = img_encoded.tobytes()
# Perform OCR with caching
extracted_data = self._perform_ocr(img_bytes)
if extracted_data is None:
return None
text_list = extracted_data['text']
# Cari semua pasangan angka + anggota/members/member
candidates = []
for i, text in enumerate(text_list):
if text.isdigit():
for offset in [1,2]:
idx = i + offset
if idx < len(text_list):
next_text = text_list[idx].lower()
if next_text in ["anggota", "members", "member"]:
# Cari Grup/Group terdekat di atas
group_idx = None
for j in range(i-1, max(-1, i-10), -1):
if text_list[j] in ["Grup", "Group"]:
group_idx = j
break
# Cari split '-' di antara
split_idx = None
for j in range(group_idx+1 if group_idx is not None else i, i):
if text_list[j] == "-":
split_idx = j
break
# Simpan kandidat
candidates.append({
'group_idx': group_idx,
'split_idx': split_idx,
'number_idx': i,
'member_idx': idx
})
# Pilih kandidat dengan group_idx valid dan posisi vertikal berdekatan
best = None
min_dist = 1e9
for c in candidates:
if c['group_idx'] is not None:
y_group = extracted_data['top'][c['group_idx']]
y_member = extracted_data['top'][c['member_idx']]
dist = abs(y_group - y_member)
if dist < min_dist:
min_dist = dist
best = c
if not best:
logger.error("No valid text pattern found")
return None
group_idx = best['group_idx']
split_idx = best['split_idx']
number_idx = best['number_idx']
member_idx = best['member_idx']
lang = 'id' if text_list[group_idx] == "Grup" else 'en'
# If only getting OCR count, return the original count
if get_ocr_count_only:
try:
ocr_count = int(text_list[number_idx])
logger.info(f"Found OCR count: {ocr_count}")
return ocr_count
except ValueError:
logger.warning("No valid OCR count found, returning 0")
return 0
# Ambil posisi
group_position = {
"left": extracted_data['left'][group_idx],
"top": extracted_data['top'][group_idx],
"width": extracted_data['width'][group_idx],
"height": extracted_data['height'][group_idx],
}
member_position = {
"left": extracted_data['left'][member_idx],
"top": extracted_data['top'][member_idx],
"width": extracted_data['width'][member_idx],
"height": extracted_data['height'][member_idx],
}
member_count_position = {
"left": extracted_data['left'][number_idx],
"top": extracted_data['top'][number_idx],
"width": extracted_data['width'][number_idx],
"height": extracted_data['height'][number_idx],
}
split_position = None
if split_idx is not None:
split_position = {
"left": extracted_data['left'][split_idx],
"top": extracted_data['top'][split_idx],
"width": extracted_data['width'][split_idx],
"height": extracted_data['height'][split_idx],
}
# Ambil warna background di sekitar member_position
x = member_position['left'] + member_position['width'] + 10
y = member_position['top'] + member_position['height'] // 2
bg_color = image[y, x]
rgb = (int(bg_color[0]), int(bg_color[1]), int(bg_color[2]))
# Deteksi tema
r, g, b = float(bg_color[0]), float(bg_color[1]), float(bg_color[2])
brightness = (r * 299 + g * 587 + b * 114) / 1000
is_dark = brightness < 128
theme = 'Dark Mode' if is_dark else 'Light Mode'
font_color = (145, 144, 144, 255) if is_dark else (90, 94, 95, 255)
margin = 10
# Masking area
for pos in [group_position, split_position, member_position, member_count_position]:
if pos:
cv2.rectangle(
image,
(pos['left'] - margin, pos['top'] - margin),
(pos['left'] + pos['width'] + margin, pos['top'] + pos['height'] + margin),
rgb,
-1,
)
# Teks baru
updated_member_count = {
'id': f"Grup 路 {anggota} anggota",
'en': f"Group 路 {anggota} members"
}.get(lang, f"Group 路 {anggota} members")
# Penyesuaian font size
original_height = member_count_position['height']
original_width = member_count_position['width']
font_size = int(original_height * 1.9)
image_pil = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
draw = ImageDraw.Draw(image_pil)
min_size = int(original_height * 1.4)
max_size = int(original_height * 2.3)
max_iterations = 5 # Reduced from 10
iteration = 0
while min_size <= max_size and iteration < max_iterations:
font = ImageFont.truetype(self.font_path, font_size)
text_bbox = draw.textbbox((0, 0), updated_member_count, font=font)
text_height = text_bbox[3] - text_bbox[1]
text_width = text_bbox[2] - text_bbox[0]
if abs(text_height - original_height) <= 2 and text_width <= original_width * 2:
break
if text_height > original_height or text_width > original_width * 2:
font_size = int(font_size * 0.963)
else:
font_size = int(font_size * 1.02)
font_size = max(min_size, min(max_size, font_size))
iteration += 1
top_y = min(group_position['top'], member_position['top']) - margin
bot_y = max(group_position['top'] + group_position['height'], member_position['top'] + member_position['height']) + margin
left_x = min(group_position['left'], member_position['left']) - margin
right_x = max(member_position['left'] + member_position['width'], group_position['left'] + group_position['width']) + margin
center_x = (left_x + right_x) // 2
center_y = (top_y + bot_y) // 2
text_bbox = draw.textbbox((0, 0), updated_member_count, font=font)
text_width = text_bbox[2] - text_bbox[0]
text_height = text_bbox[3] - text_bbox[1]
text_x = center_x - (text_width // 2)
text_y = center_y - (text_height // 2) - 5
draw.text((text_x, text_y), updated_member_count, font=font, fill=font_color)
image = cv2.cvtColor(np.array(image_pil), cv2.COLOR_BGR2RGB)
cv2.imwrite('output.png', image)
# Return result
result = image if not return_theme else (image, theme)
self._cache[cache_key] = result
return result
except Exception as e:
logger.error(f"Error in _process_core: {str(e)}")
return None
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Proses gambar grup iPhone')
parser.add_argument('image_path', help='Path ke file gambar')
parser.add_argument('anggota', help='Jumlah anggota (bisa menggunakan + untuk menambah ke jumlah yang ada, contoh: +5 untuk menambah 5)')
args = parser.parse_args()
editor = IPhoneEditor()
editor.process_image(args.image_path, args.anggota)
# Contoh penggunaan:
# python iphone.py image.png 10 # Set jumlah anggota menjadi 10
# python iphone.py image.png +5 # Tambah 5 ke jumlah anggota yang ada di OCR
# python iphone.py image.png +10 # Tambah 10 ke jumlah anggota yang ada di OCR