ocr / bottom.py
kerenmasku's picture
Upload bottom.py with huggingface_hub
d9212ab verified
import pytesseract
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import argparse
class AndroidBottom:
def __init__(self, font_path_medium="Roboto-Medium.ttf", font_path_regular="Roboto-Regular.ttf"):
self.font_path_medium = font_path_medium
self.font_path_regular = font_path_regular
@staticmethod
def is_dark_mode(bg_color):
"""Deteksi dark mode dari warna background (BGR)."""
r, g, b = float(bg_color[2]), float(bg_color[1]), float(bg_color[0])
brightness = (r * 299 + g * 587 + b * 114) / 1000
return brightness < 128
def extract_anggota_count_from_ocr(self, data):
for i, text in enumerate(data['text']):
if text.lower() in ["anggota", "members"]:
if i > 0 and data['text'][i-1].isdigit():
return int(data['text'][i-1])
return 0
def replace_anggota(self, image, jumlah, font_path=None, show_preview=False, return_theme=False, skip_ocr=False, extracted_data=None, get_ocr_count_only=False):
"""Process image and return result with optional parameters"""
try:
# Handle image path or image array
if isinstance(image, str):
# If image is a path, read it
image = cv2.imread(image)
if image is None:
print(f"Error: Tidak dapat membaca gambar {image}")
return None
# Use provided OCR data or perform OCR
if skip_ocr and extracted_data is not None:
data = extracted_data
else:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
data = pytesseract.image_to_data(gray, output_type=pytesseract.Output.DICT)
if get_ocr_count_only:
return self.extract_anggota_count_from_ocr(data)
h, w = image.shape[:2]
found = False
for i, text in enumerate(data['text']):
if text.lower() in ["anggota", "members"]:
if i > 0 and data['text'][i-1].isdigit():
found = True
# Tentukan label pengganti sesuai bahasa
label = text if text.lower() in ["anggota", "members"] else "anggota"
replace_text = f"{jumlah} {label}"
x = min(data['left'][i-1], data['left'][i])
y = min(data['top'][i-1], data['top'][i])
w_box = data['width'][i-1] + data['width'][i]
h_box = max(data['height'][i-1], data['height'][i])
margin = 10
# Ambil warna background dari 10% ke kanan dari posisi anggota
anggota_right = x + w_box
bg_x = int(anggota_right + (w * 0.1)) # 10% ke kanan dari anggota
bg_x = min(bg_x, w - 1) # Pastikan tidak melebihi lebar gambar
bg_color = image[y + h_box//2, bg_x] # Ambil dari tengah tinggi anggota
dark_mode = self.is_dark_mode(bg_color)
theme = 'Dark Mode' if dark_mode else 'Light Mode'
if dark_mode:
rect_color = tuple(int(x) for x in bg_color)
text_color = (88,92,98,255)
else:
rect_color = tuple(int(x) for x in bg_color)
text_color = (90, 94, 95, 255)
# Deteksi sederhana ketebalan font (bold/regular)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
roi = gray[y:y+h_box, x:x+w_box]
_, binary = cv2.threshold(roi, 180, 255, cv2.THRESH_BINARY_INV)
black_ratio = np.sum(binary == 255) / (w_box * h_box)
print(f"Rasio pixel hitam di area: {black_ratio:.2f}")
if black_ratio > 0.25:
print("Kemungkinan besar: Bold")
auto_font_path = self.font_path_medium
else:
print("Kemungkinan besar: Regular/Tipis")
auto_font_path = self.font_path_regular
# Jika user tidak override font, pakai auto_font_path
effective_font_path = font_path if font_path not in [None, "", "auto"] else auto_font_path
# Pastikan effective_font_path adalah string path
if not isinstance(effective_font_path, str) or not effective_font_path:
effective_font_path = auto_font_path
extra_margin_right = 20 # misal 20px ke kanan
cv2.rectangle(
image,
(x - margin, y - margin),
(x + w_box + margin + extra_margin_right, y + h_box + margin),
rect_color,
-1
)
# Tulis teks pengganti dengan PIL agar font bisa custom
image_pil = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
draw = ImageDraw.Draw(image_pil)
try:
font = ImageFont.truetype(effective_font_path, int(h_box * 1.2))
except Exception as e:
print(f"Font error: {e}, fallback ke default font.")
font = ImageFont.load_default()
bbox = draw.textbbox((0, 0), replace_text, font=font)
text_width = bbox[2] - bbox[0]
text_height = bbox[3] - bbox[1]
# Geser teks ke kanan juga
text_x = data['left'][i-1]
text_y = data['top'][i-1] + (data['height'][i-1] - text_height) // 2
draw.text((text_x, text_y), replace_text, font=font, fill=text_color)
image = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
break
if not found:
print("Tidak ditemukan pasangan angka + 'anggota'/'members'.")
return None
# cv2.imwrite('output.png', image)
# if show_preview:
# cv2.imshow('Preview (Tekan q untuk keluar)', image)
# while True:
# key = cv2.waitKey(1) & 0xFF
# if key == ord('q'):
# break
# cv2.destroyAllWindows()
result = image if not return_theme else (image, theme)
return result
except Exception as e:
print(f"Error in replace_anggota: {str(e)}")
return None
class IPhoneBottom:
def __init__(self, font_path_medium="SFUIText-Bold.otf", font_path_regular="SFUIText-Semibold.otf"):
self.font_path_medium = font_path_medium
self.font_path_regular = font_path_regular
@staticmethod
def is_dark_mode(bg_color):
"""Deteksi dark mode dari warna background (BGR)."""
r, g, b = float(bg_color[2]), float(bg_color[1]), float(bg_color[0])
brightness = (r * 299 + g * 587 + b * 114) / 1000
return brightness < 128
def extract_anggota_count_from_ocr(self, data):
for i, text in enumerate(data['text']):
if text.lower() in ["anggota", "members"]:
if i > 0 and data['text'][i-1].isdigit():
return int(data['text'][i-1])
return 0
def replace_anggota(self, image, jumlah, font_path=None, show_preview=False, return_theme=False, skip_ocr=False, extracted_data=None, get_ocr_count_only=False):
"""Process image and return result with optional parameters"""
try:
# Handle image path or image array
if isinstance(image, str):
# If image is a path, read it
image = cv2.imread(image)
if image is None:
print(f"Error: Tidak dapat membaca gambar {image}")
return None
# Use provided OCR data or perform OCR
if skip_ocr and extracted_data is not None:
data = extracted_data
else:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
scale = 2
processing = cv2.resize(gray, (gray.shape[1]*scale, gray.shape[0]*scale), interpolation=cv2.INTER_CUBIC)
data = pytesseract.image_to_data(processing, output_type=pytesseract.Output.DICT)
if get_ocr_count_only:
return self.extract_anggota_count_from_ocr(data)
h, w = image.shape[:2]
found = False
for i, text in enumerate(data['text']):
if text.lower() in ["anggota", "members"]:
if i > 0 and data['text'][i-1].isdigit():
found = True
# Tentukan label pengganti sesuai bahasa
label = text if text.lower() in ["anggota", "members"] else "anggota"
replace_text = f"{jumlah} {label}"
# Konversi koordinat hasil OCR ke gambar asli
if extracted_data is None:
scale = 2
x = int(min(data['left'][i-1], data['left'][i]) / scale)
y = int(min(data['top'][i-1], data['top'][i]) / scale)
w_box = int((data['width'][i-1] + data['width'][i]) / scale)
h_box = int(max(data['height'][i-1], data['height'][i]) / scale)
else:
x = min(data['left'][i-1], data['left'][i])
y = min(data['top'][i-1], data['top'][i])
w_box = data['width'][i-1] + data['width'][i]
h_box = max(data['height'][i-1], data['height'][i])
margin = 10
# Ambil warna background dari 10% ke kanan dari posisi anggota
anggota_right = x + w_box
bg_x = int(anggota_right + (w * 0.1)) # 10% ke kanan dari anggota
bg_x = min(bg_x, w - 1) # Pastikan tidak melebihi lebar gambar
bg_color = image[y + h_box//2, bg_x] # Ambil dari tengah tinggi anggota
dark_mode = self.is_dark_mode(bg_color)
theme = 'Dark Mode' if dark_mode else 'Light Mode'
if dark_mode:
print("MANTAP MEN")
rect_color = (10, 10, 10, 255)
text_color = (244, 244, 244, 255)
else:
rect_color = tuple(int(x) for x in bg_color)
text_color = (0, 0, 0, 255)
# Deteksi sederhana ketebalan font (bold/regular)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
roi = gray[y:y+h_box, x:x+w_box]
_, binary = cv2.threshold(roi, 180, 255, cv2.THRESH_BINARY_INV)
black_ratio = np.sum(binary == 255) / (w_box * h_box)
if black_ratio > 0.5:
print("Bold")
auto_font_path = self.font_path_medium
else:
print("Regular")
auto_font_path = self.font_path_regular
# Jika user tidak override font, pakai auto_font_path
effective_font_path = font_path if font_path not in [None, "", "auto"] else auto_font_path
# Pastikan effective_font_path adalah string path
if not isinstance(effective_font_path, str) or not effective_font_path:
effective_font_path = auto_font_path
extra_margin_right = 20
cv2.rectangle(
image,
(x - margin, y - margin),
(x + w_box + margin + extra_margin_right, y + h_box + margin),
rect_color,
-1
)
# Tulis teks pengganti dengan PIL agar font bisa custom
image_pil = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
draw = ImageDraw.Draw(image_pil)
try:
font = ImageFont.truetype(effective_font_path, int(h_box * 1.2))
except Exception as e:
print(f"Font error: {e}, fallback ke default font.")
font = ImageFont.load_default()
bbox = draw.textbbox((0, 0), replace_text, font=font)
text_width = bbox[2] - bbox[0]
text_height = bbox[3] - bbox[1]
text_x = x
text_y = y + (h_box - text_height) // 2
draw.text((text_x, text_y), replace_text, font=font, fill=text_color)
image = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
break
if not found:
print("Tidak ditemukan pasangan angka + 'anggota'/'members'.")
return None
# cv2.imwrite('output.png', image)
# if show_preview:
# cv2.imshow('Preview (Tekan q untuk keluar)', image)
# while True:
# key = cv2.waitKey(1) & 0xFF
# if key == ord('q'):
# break
# cv2.destroyAllWindows()
result = image if not return_theme else (image, theme)
return result
except Exception as e:
print(f"Error in replace_anggota: {str(e)}")
return None
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="OCR dan replace jumlah anggota/members dengan angka custom.")
parser.add_argument("image_path", help="Path ke file gambar")
parser.add_argument("jumlah", help="Jumlah anggota/members yang diinginkan (angka saja)")
parser.add_argument("--font", default=None, help="Path font TTF (override, opsional)")
args = parser.parse_args()
replacer = IPhoneBottom()
replacer.replace_anggota(args.image_path, args.jumlah, args.font)