Spaces:

kerenmasku
/

ocr

Sleeping

App Files Files Community

ocr / bottom.py

kerenmasku

Upload bottom.py with huggingface_hub

d9212ab verified 7 months ago

raw

history blame contribute delete

14.9 kB

	import pytesseract
	import cv2
	import numpy as np
	from PIL import Image, ImageDraw, ImageFont
	import argparse

	class AndroidBottom:
	def __init__(self, font_path_medium="Roboto-Medium.ttf", font_path_regular="Roboto-Regular.ttf"):
	self.font_path_medium = font_path_medium
	self.font_path_regular = font_path_regular

	@staticmethod
	def is_dark_mode(bg_color):
	"""Deteksi dark mode dari warna background (BGR)."""
	r, g, b = float(bg_color[2]), float(bg_color[1]), float(bg_color[0])
	brightness = (r * 299 + g * 587 + b * 114) / 1000
	return brightness < 128

	def extract_anggota_count_from_ocr(self, data):
	for i, text in enumerate(data['text']):
	if text.lower() in ["anggota", "members"]:
	if i > 0 and data['text'][i-1].isdigit():
	return int(data['text'][i-1])
	return 0

	def replace_anggota(self, image, jumlah, font_path=None, show_preview=False, return_theme=False, skip_ocr=False, extracted_data=None, get_ocr_count_only=False):
	"""Process image and return result with optional parameters"""
	try:
	# Handle image path or image array
	if isinstance(image, str):
	# If image is a path, read it
	image = cv2.imread(image)
	if image is None:
	print(f"Error: Tidak dapat membaca gambar {image}")
	return None

	# Use provided OCR data or perform OCR
	if skip_ocr and extracted_data is not None:
	data = extracted_data
	else:
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	data = pytesseract.image_to_data(gray, output_type=pytesseract.Output.DICT)

	if get_ocr_count_only:
	return self.extract_anggota_count_from_ocr(data)

	h, w = image.shape[:2]
	found = False
	for i, text in enumerate(data['text']):
	if text.lower() in ["anggota", "members"]:
	if i > 0 and data['text'][i-1].isdigit():
	found = True
	# Tentukan label pengganti sesuai bahasa
	label = text if text.lower() in ["anggota", "members"] else "anggota"
	replace_text = f"{jumlah} {label}"

	x = min(data['left'][i-1], data['left'][i])
	y = min(data['top'][i-1], data['top'][i])
	w_box = data['width'][i-1] + data['width'][i]
	h_box = max(data['height'][i-1], data['height'][i])
	margin = 10

	# Ambil warna background dari 10% ke kanan dari posisi anggota
	anggota_right = x + w_box
	bg_x = int(anggota_right + (w * 0.1)) # 10% ke kanan dari anggota
	bg_x = min(bg_x, w - 1) # Pastikan tidak melebihi lebar gambar
	bg_color = image[y + h_box//2, bg_x] # Ambil dari tengah tinggi anggota

	dark_mode = self.is_dark_mode(bg_color)
	theme = 'Dark Mode' if dark_mode else 'Light Mode'
	if dark_mode:
	rect_color = tuple(int(x) for x in bg_color)
	text_color = (88,92,98,255)
	else:
	rect_color = tuple(int(x) for x in bg_color)
	text_color = (90, 94, 95, 255)

	# Deteksi sederhana ketebalan font (bold/regular)
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	roi = gray[y:y+h_box, x:x+w_box]
	_, binary = cv2.threshold(roi, 180, 255, cv2.THRESH_BINARY_INV)
	black_ratio = np.sum(binary == 255) / (w_box * h_box)
	print(f"Rasio pixel hitam di area: {black_ratio:.2f}")
	if black_ratio > 0.25:
	print("Kemungkinan besar: Bold")
	auto_font_path = self.font_path_medium
	else:
	print("Kemungkinan besar: Regular/Tipis")
	auto_font_path = self.font_path_regular

	# Jika user tidak override font, pakai auto_font_path
	effective_font_path = font_path if font_path not in [None, "", "auto"] else auto_font_path
	# Pastikan effective_font_path adalah string path
	if not isinstance(effective_font_path, str) or not effective_font_path:
	effective_font_path = auto_font_path

	extra_margin_right = 20 # misal 20px ke kanan
	cv2.rectangle(
	image,
	(x - margin, y - margin),
	(x + w_box + margin + extra_margin_right, y + h_box + margin),
	rect_color,
	-1
	)

	# Tulis teks pengganti dengan PIL agar font bisa custom
	image_pil = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
	draw = ImageDraw.Draw(image_pil)
	try:
	font = ImageFont.truetype(effective_font_path, int(h_box * 1.2))
	except Exception as e:
	print(f"Font error: {e}, fallback ke default font.")
	font = ImageFont.load_default()
	bbox = draw.textbbox((0, 0), replace_text, font=font)
	text_width = bbox[2] - bbox[0]
	text_height = bbox[3] - bbox[1]
	# Geser teks ke kanan juga
	text_x = data['left'][i-1]
	text_y = data['top'][i-1] + (data['height'][i-1] - text_height) // 2
	draw.text((text_x, text_y), replace_text, font=font, fill=text_color)
	image = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
	break

	if not found:
	print("Tidak ditemukan pasangan angka + 'anggota'/'members'.")
	return None

	# cv2.imwrite('output.png', image)
	# if show_preview:
	# cv2.imshow('Preview (Tekan q untuk keluar)', image)
	# while True:
	# key = cv2.waitKey(1) & 0xFF
	# if key == ord('q'):
	# break
	# cv2.destroyAllWindows()

	result = image if not return_theme else (image, theme)
	return result

	except Exception as e:
	print(f"Error in replace_anggota: {str(e)}")
	return None

	class IPhoneBottom:
	def __init__(self, font_path_medium="SFUIText-Bold.otf", font_path_regular="SFUIText-Semibold.otf"):
	self.font_path_medium = font_path_medium
	self.font_path_regular = font_path_regular

	@staticmethod
	def is_dark_mode(bg_color):
	"""Deteksi dark mode dari warna background (BGR)."""
	r, g, b = float(bg_color[2]), float(bg_color[1]), float(bg_color[0])
	brightness = (r * 299 + g * 587 + b * 114) / 1000
	return brightness < 128

	def extract_anggota_count_from_ocr(self, data):
	for i, text in enumerate(data['text']):
	if text.lower() in ["anggota", "members"]:
	if i > 0 and data['text'][i-1].isdigit():
	return int(data['text'][i-1])
	return 0

	def replace_anggota(self, image, jumlah, font_path=None, show_preview=False, return_theme=False, skip_ocr=False, extracted_data=None, get_ocr_count_only=False):
	"""Process image and return result with optional parameters"""
	try:
	# Handle image path or image array
	if isinstance(image, str):
	# If image is a path, read it
	image = cv2.imread(image)
	if image is None:
	print(f"Error: Tidak dapat membaca gambar {image}")
	return None

	# Use provided OCR data or perform OCR
	if skip_ocr and extracted_data is not None:
	data = extracted_data
	else:
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	scale = 2
	processing = cv2.resize(gray, (gray.shape[1]scale, gray.shape[0]scale), interpolation=cv2.INTER_CUBIC)
	data = pytesseract.image_to_data(processing, output_type=pytesseract.Output.DICT)

	if get_ocr_count_only:
	return self.extract_anggota_count_from_ocr(data)

	h, w = image.shape[:2]
	found = False
	for i, text in enumerate(data['text']):
	if text.lower() in ["anggota", "members"]:
	if i > 0 and data['text'][i-1].isdigit():
	found = True
	# Tentukan label pengganti sesuai bahasa
	label = text if text.lower() in ["anggota", "members"] else "anggota"
	replace_text = f"{jumlah} {label}"

	# Konversi koordinat hasil OCR ke gambar asli
	if extracted_data is None:
	scale = 2
	x = int(min(data['left'][i-1], data['left'][i]) / scale)
	y = int(min(data['top'][i-1], data['top'][i]) / scale)
	w_box = int((data['width'][i-1] + data['width'][i]) / scale)
	h_box = int(max(data['height'][i-1], data['height'][i]) / scale)
	else:
	x = min(data['left'][i-1], data['left'][i])
	y = min(data['top'][i-1], data['top'][i])
	w_box = data['width'][i-1] + data['width'][i]
	h_box = max(data['height'][i-1], data['height'][i])
	margin = 10

	# Ambil warna background dari 10% ke kanan dari posisi anggota
	anggota_right = x + w_box
	bg_x = int(anggota_right + (w * 0.1)) # 10% ke kanan dari anggota
	bg_x = min(bg_x, w - 1) # Pastikan tidak melebihi lebar gambar
	bg_color = image[y + h_box//2, bg_x] # Ambil dari tengah tinggi anggota

	dark_mode = self.is_dark_mode(bg_color)
	theme = 'Dark Mode' if dark_mode else 'Light Mode'
	if dark_mode:
	print("MANTAP MEN")
	rect_color = (10, 10, 10, 255)
	text_color = (244, 244, 244, 255)
	else:
	rect_color = tuple(int(x) for x in bg_color)
	text_color = (0, 0, 0, 255)

	# Deteksi sederhana ketebalan font (bold/regular)
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	roi = gray[y:y+h_box, x:x+w_box]
	_, binary = cv2.threshold(roi, 180, 255, cv2.THRESH_BINARY_INV)
	black_ratio = np.sum(binary == 255) / (w_box * h_box)
	if black_ratio > 0.5:
	print("Bold")
	auto_font_path = self.font_path_medium
	else:
	print("Regular")
	auto_font_path = self.font_path_regular

	# Jika user tidak override font, pakai auto_font_path
	effective_font_path = font_path if font_path not in [None, "", "auto"] else auto_font_path
	# Pastikan effective_font_path adalah string path
	if not isinstance(effective_font_path, str) or not effective_font_path:
	effective_font_path = auto_font_path

	extra_margin_right = 20
	cv2.rectangle(
	image,
	(x - margin, y - margin),
	(x + w_box + margin + extra_margin_right, y + h_box + margin),
	rect_color,
	-1
	)

	# Tulis teks pengganti dengan PIL agar font bisa custom
	image_pil = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
	draw = ImageDraw.Draw(image_pil)
	try:
	font = ImageFont.truetype(effective_font_path, int(h_box * 1.2))
	except Exception as e:
	print(f"Font error: {e}, fallback ke default font.")
	font = ImageFont.load_default()
	bbox = draw.textbbox((0, 0), replace_text, font=font)
	text_width = bbox[2] - bbox[0]
	text_height = bbox[3] - bbox[1]
	text_x = x
	text_y = y + (h_box - text_height) // 2
	draw.text((text_x, text_y), replace_text, font=font, fill=text_color)
	image = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
	break

	if not found:
	print("Tidak ditemukan pasangan angka + 'anggota'/'members'.")
	return None

	# cv2.imwrite('output.png', image)
	# if show_preview:
	# cv2.imshow('Preview (Tekan q untuk keluar)', image)
	# while True:
	# key = cv2.waitKey(1) & 0xFF
	# if key == ord('q'):
	# break
	# cv2.destroyAllWindows()

	result = image if not return_theme else (image, theme)
	return result

	except Exception as e:
	print(f"Error in replace_anggota: {str(e)}")
	return None


	if __name__ == "__main__":
	parser = argparse.ArgumentParser(description="OCR dan replace jumlah anggota/members dengan angka custom.")
	parser.add_argument("image_path", help="Path ke file gambar")
	parser.add_argument("jumlah", help="Jumlah anggota/members yang diinginkan (angka saja)")
	parser.add_argument("--font", default=None, help="Path font TTF (override, opsional)")
	args = parser.parse_args()
	replacer = IPhoneBottom()
	replacer.replace_anggota(args.image_path, args.jumlah, args.font)