Spaces:

kerenmasku
/

ocr

Sleeping

App Files Files Community

ocr / iphone.py

kerenmasku

Upload iphone.py with huggingface_hub

adc2d58 verified 8 months ago

raw

history blame contribute delete

15.2 kB

	import cv2
	import pytesseract
	from PIL import Image, ImageDraw, ImageFont
	import numpy as np
	import argparse
	import io
	import base64
	import time
	import logging
	from functools import lru_cache
	from concurrent.futures import ThreadPoolExecutor, TimeoutError

	# Setup logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	class IPhoneEditor:
	def __init__(self, font_path="SF-Pro-Display-Regular.otf"):
	self.font_path = font_path
	self._cache = {}

	@lru_cache(maxsize=100)
	def _perform_ocr(self, image_bytes):
	"""Perform OCR with caching and timeout"""
	def ocr_task():
	# Convert bytes to PIL Image first
	image_stream = io.BytesIO(image_bytes)
	pil_image = Image.open(image_stream).convert('RGB')
	# Convert PIL Image to numpy array
	image_array = np.array(pil_image)
	# Convert RGB to BGR for OpenCV
	image_bgr = cv2.cvtColor(image_array, cv2.COLOR_RGB2BGR)
	# Crop vertically by 50%
	height = image_bgr.shape[0]
	mid_point = height // 2
	image_bgr = image_bgr[:mid_point, :]
	return pytesseract.image_to_data(image_bgr, output_type=pytesseract.Output.DICT)

	try:
	with ThreadPoolExecutor(max_workers=1) as executor:
	future = executor.submit(ocr_task)
	result = future.result(timeout=30) # 30 second timeout
	return result
	except TimeoutError:
	logger.error("OCR operation timed out")
	return None
	except Exception as e:
	logger.error(f"OCR error: {str(e)}")
	return None

	@staticmethod
	def parse_anggota(anggota_str, ocr_count=None):
	"""Parse anggota parameter and handle + prefix for addition"""
	if anggota_str.startswith('+'):
	# If starts with +, add to existing OCR count
	if ocr_count is None:
	logger.error("OCR count is None, cannot perform addition")
	return None
	try:
	addition = int(anggota_str[1:])
	result = ocr_count + addition
	logger.info(f"Adding {addition} to OCR count {ocr_count} = {result}")
	return result
	except ValueError:
	logger.error(f"Invalid number format in anggota: {anggota_str}")
	return None
	else:
	# Direct number
	try:
	result = int(anggota_str)
	logger.info(f"Using direct anggota value: {result}")
	return result
	except ValueError:
	logger.error(f"Invalid number format in anggota: {anggota_str}")
	return None

	def process_image(self, image_path, anggota):
	start_time = time.time()
	image = cv2.imread(image_path)
	if image is None:
	logger.error("Failed to read image")
	return None

	# First, get the original OCR count for potential addition
	original_result = self._process_core(image, "0", show_preview=False, get_ocr_count_only=True)
	if original_result is None:
	logger.warning("Could not extract OCR count, using 0 as default")
	ocr_count = 0
	else:
	ocr_count = original_result

	# Parse anggota parameter
	parsed_anggota = self.parse_anggota(anggota, ocr_count)
	if parsed_anggota is None:
	logger.error("Invalid anggota parameter")
	return None

	result = self._process_core(image, str(parsed_anggota), show_preview=True)
	end_time = time.time()
	logger.info(f"Total processing time: {end_time - start_time:.2f} seconds")
	return result

	def process_image_bytes(self, image_bytes, anggota):
	start_time = time.time()
	try:
	image_stream = io.BytesIO(image_bytes)
	pil_image = Image.open(image_stream).convert('RGB')
	image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)

	# First, get the original OCR count for potential addition
	original_result = self._process_core(image, "0", show_preview=False, get_ocr_count_only=True)
	if original_result is None:
	logger.warning("Could not extract OCR count, using 0 as default")
	ocr_count = 0
	else:
	ocr_count = original_result

	# Parse anggota parameter
	parsed_anggota = self.parse_anggota(anggota, ocr_count)
	if parsed_anggota is None:
	logger.error("Invalid anggota parameter")
	return None, None

	result, theme = self._process_core(image, str(parsed_anggota), show_preview=False, return_theme=True)
	if result is not None:
	pil_result = Image.fromarray(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
	output_io = io.BytesIO()
	pil_result.save(output_io, format='PNG')
	img_b64 = base64.b64encode(output_io.getvalue()).decode('utf-8')
	end_time = time.time()
	logger.info(f"Total processing time: {end_time - start_time:.2f} seconds")
	return img_b64, theme
	logger.error("Result image is None")
	return None, None
	except Exception as e:
	logger.error(f"Error in process_image_bytes: {str(e)}")
	return None, None

	def _process_core(self, image, anggota, show_preview=False, return_theme=False, get_ocr_count_only=False):
	# Generate cache key
	image_hash = hash(image.tobytes())
	cache_key = f"{image_hash}_{anggota}_{get_ocr_count_only}"

	# Check cache
	if cache_key in self._cache:
	logger.info("Using cached result")
	return self._cache[cache_key]

	try:
	# Convert image to bytes for OCR
	_, img_encoded = cv2.imencode('.png', image)
	img_bytes = img_encoded.tobytes()

	# Perform OCR with caching
	extracted_data = self._perform_ocr(img_bytes)
	if extracted_data is None:
	return None

	text_list = extracted_data['text']

	# Cari semua pasangan angka + anggota/members/member
	candidates = []
	for i, text in enumerate(text_list):
	if text.isdigit():
	for offset in [1,2]:
	idx = i + offset
	if idx < len(text_list):
	next_text = text_list[idx].lower()
	if next_text in ["anggota", "members", "member"]:
	# Cari Grup/Group terdekat di atas
	group_idx = None
	for j in range(i-1, max(-1, i-10), -1):
	if text_list[j] in ["Grup", "Group"]:
	group_idx = j
	break
	# Cari split '-' di antara
	split_idx = None
	for j in range(group_idx+1 if group_idx is not None else i, i):
	if text_list[j] == "-":
	split_idx = j
	break
	# Simpan kandidat
	candidates.append({
	'group_idx': group_idx,
	'split_idx': split_idx,
	'number_idx': i,
	'member_idx': idx
	})

	# Pilih kandidat dengan group_idx valid dan posisi vertikal berdekatan
	best = None
	min_dist = 1e9
	for c in candidates:
	if c['group_idx'] is not None:
	y_group = extracted_data['top'][c['group_idx']]
	y_member = extracted_data['top'][c['member_idx']]
	dist = abs(y_group - y_member)
	if dist < min_dist:
	min_dist = dist
	best = c

	if not best:
	logger.error("No valid text pattern found")
	return None

	group_idx = best['group_idx']
	split_idx = best['split_idx']
	number_idx = best['number_idx']
	member_idx = best['member_idx']
	lang = 'id' if text_list[group_idx] == "Grup" else 'en'

	# If only getting OCR count, return the original count
	if get_ocr_count_only:
	try:
	ocr_count = int(text_list[number_idx])
	logger.info(f"Found OCR count: {ocr_count}")
	return ocr_count
	except ValueError:
	logger.warning("No valid OCR count found, returning 0")
	return 0

	# Ambil posisi
	group_position = {
	"left": extracted_data['left'][group_idx],
	"top": extracted_data['top'][group_idx],
	"width": extracted_data['width'][group_idx],
	"height": extracted_data['height'][group_idx],
	}
	member_position = {
	"left": extracted_data['left'][member_idx],
	"top": extracted_data['top'][member_idx],
	"width": extracted_data['width'][member_idx],
	"height": extracted_data['height'][member_idx],
	}
	member_count_position = {
	"left": extracted_data['left'][number_idx],
	"top": extracted_data['top'][number_idx],
	"width": extracted_data['width'][number_idx],
	"height": extracted_data['height'][number_idx],
	}
	split_position = None
	if split_idx is not None:
	split_position = {
	"left": extracted_data['left'][split_idx],
	"top": extracted_data['top'][split_idx],
	"width": extracted_data['width'][split_idx],
	"height": extracted_data['height'][split_idx],
	}

	# Ambil warna background di sekitar member_position
	x = member_position['left'] + member_position['width'] + 10
	y = member_position['top'] + member_position['height'] // 2
	bg_color = image[y, x]
	rgb = (int(bg_color[0]), int(bg_color[1]), int(bg_color[2]))

	# Deteksi tema
	r, g, b = float(bg_color[0]), float(bg_color[1]), float(bg_color[2])
	brightness = (r * 299 + g * 587 + b * 114) / 1000
	is_dark = brightness < 128
	theme = 'Dark Mode' if is_dark else 'Light Mode'
	font_color = (145, 144, 144, 255) if is_dark else (90, 94, 95, 255)
	margin = 10

	# Masking area
	for pos in [group_position, split_position, member_position, member_count_position]:
	if pos:
	cv2.rectangle(
	image,
	(pos['left'] - margin, pos['top'] - margin),
	(pos['left'] + pos['width'] + margin, pos['top'] + pos['height'] + margin),
	rgb,
	-1,
	)

	# Teks baru
	updated_member_count = {
	'id': f"Grup · {anggota} anggota",
	'en': f"Group · {anggota} members"
	}.get(lang, f"Group · {anggota} members")

	# Penyesuaian font size
	original_height = member_count_position['height']
	original_width = member_count_position['width']
	font_size = int(original_height * 1.9)
	image_pil = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
	draw = ImageDraw.Draw(image_pil)

	min_size = int(original_height * 1.4)
	max_size = int(original_height * 2.3)
	max_iterations = 5 # Reduced from 10
	iteration = 0

	while min_size <= max_size and iteration < max_iterations:
	font = ImageFont.truetype(self.font_path, font_size)
	text_bbox = draw.textbbox((0, 0), updated_member_count, font=font)
	text_height = text_bbox[3] - text_bbox[1]
	text_width = text_bbox[2] - text_bbox[0]
	if abs(text_height - original_height) <= 2 and text_width <= original_width * 2:
	break
	if text_height > original_height or text_width > original_width * 2:
	font_size = int(font_size * 0.963)
	else:
	font_size = int(font_size * 1.02)
	font_size = max(min_size, min(max_size, font_size))
	iteration += 1

	top_y = min(group_position['top'], member_position['top']) - margin
	bot_y = max(group_position['top'] + group_position['height'], member_position['top'] + member_position['height']) + margin
	left_x = min(group_position['left'], member_position['left']) - margin
	right_x = max(member_position['left'] + member_position['width'], group_position['left'] + group_position['width']) + margin
	center_x = (left_x + right_x) // 2
	center_y = (top_y + bot_y) // 2

	text_bbox = draw.textbbox((0, 0), updated_member_count, font=font)
	text_width = text_bbox[2] - text_bbox[0]
	text_height = text_bbox[3] - text_bbox[1]
	text_x = center_x - (text_width // 2)
	text_y = center_y - (text_height // 2) - 5

	draw.text((text_x, text_y), updated_member_count, font=font, fill=font_color)
	image = cv2.cvtColor(np.array(image_pil), cv2.COLOR_BGR2RGB)
	cv2.imwrite('output.png', image)

	# Return result
	result = image if not return_theme else (image, theme)
	self._cache[cache_key] = result
	return result

	except Exception as e:
	logger.error(f"Error in _process_core: {str(e)}")
	return None

	if __name__ == '__main__':
	parser = argparse.ArgumentParser(description='Proses gambar grup iPhone')
	parser.add_argument('image_path', help='Path ke file gambar')
	parser.add_argument('anggota', help='Jumlah anggota (bisa menggunakan + untuk menambah ke jumlah yang ada, contoh: +5 untuk menambah 5)')
	args = parser.parse_args()
	editor = IPhoneEditor()
	editor.process_image(args.image_path, args.anggota)

	# Contoh penggunaan:
	# python iphone.py image.png 10 # Set jumlah anggota menjadi 10
	# python iphone.py image.png +5 # Tambah 5 ke jumlah anggota yang ada di OCR
	# python iphone.py image.png +10 # Tambah 10 ke jumlah anggota yang ada di OCR