""" cover_pipeline_prototype.py Updated prototype pipeline using EasyOCR for text detection and OCR. Requirements: - Python 3.8+ - Install packages: pip install opencv-python pillow easyocr numpy imutils Usage examples: python cover_pipeline_prototype.py --input-dir ./covers --output-dir ./out """ import os import json import argparse from PIL import Image, ImageDraw import cv2 import numpy as np from math import floor import easyocr # ---------------------- Utilities ---------------------- reader = easyocr.Reader(['en'], gpu=False) def mm_to_inches(mm): return mm / 25.4 def inches_to_pixels(inches, dpi): return int(round(inches * dpi)) def mm_to_pixels(mm, dpi): return inches_to_pixels(mm_to_inches(mm), dpi) def read_image(path): img = Image.open(path).convert('RGB') return img def get_image_dpi(img: Image.Image): try: info = img.info if 'dpi' in info and isinstance(info['dpi'], tuple): return int(info['dpi'][0]) except Exception: pass return None def normalize_to_dpi(img: Image.Image, current_dpi: int, target_dpi: int): if current_dpi is None: return img, target_dpi if current_dpi == target_dpi: return img, current_dpi scale = target_dpi / float(current_dpi) new_w = int(round(img.width * scale)) new_h = int(round(img.height * scale)) resized = img.resize((new_w, new_h), resample=Image.LANCZOS) return resized, target_dpi # ---------------------- Badge zone ---------------------- def compute_badge_zone(img_w_px, img_h_px, dpi, badge_height_mm=9): badge_h_px = mm_to_pixels(badge_height_mm, dpi) x1 = img_w_px // 2 y1 = img_h_px - badge_h_px x2 = img_w_px y2 = img_h_px return (x1, y1, x2, y2) # ---------------------- Image quality ---------------------- def variance_of_laplacian(img_cv_gray): return cv2.Laplacian(img_cv_gray, cv2.CV_64F).var() def check_blur_threshold(pil_img: Image.Image, threshold=100.0): cv = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2GRAY) var = variance_of_laplacian(cv) return float(var), bool(var >= threshold) # ---------------------- EasyOCR text detection ---------------------- def detect_text_easyocr(pil_img: Image.Image, reader): img_cv = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR) results = reader.readtext(img_cv) lines = [] for (bbox, text, conf) in results: pts = np.array(bbox).astype(int) x1, y1 = np.min(pts[:, 0]), np.min(pts[:, 1]) x2, y2 = np.max(pts[:, 0]), np.max(pts[:, 1]) lines.append({'text': text.strip(), 'conf': float(conf), 'bbox': (int(x1), int(y1), int(x2), int(y2))}) return lines # ---------------------- Overlap math ---------------------- def rect_intersection_area(a, b): x1 = max(a[0], b[0]) y1 = max(a[1], b[1]) x2 = min(a[2], b[2]) y2 = min(a[3], b[3]) if x2 <= x1 or y2 <= y1: return 0 return (x2 - x1) * (y2 - y1) def rect_area(r): return max(0, (r[2] - r[0])) * max(0, (r[3] - r[1])) # ---------------------- Overlay & report ---------------------- def draw_overlay_and_save(pil_img: Image.Image, badge_rect, text_lines, out_path): img_draw = pil_img.convert('RGBA') draw = ImageDraw.Draw(img_draw) draw.rectangle(badge_rect, outline='red', width=3) for ln in text_lines: draw.rectangle(ln['bbox'], outline='blue', width=2) img_draw.convert('RGB').save(out_path) # ---------------------- Pipeline per image ---------------------- def process_image(path, reader = reader, target_dpi=300, overlap_threshold=0.01, blur_threshold=100.0): img_pil = read_image(path) orig_w, orig_h = img_pil.width, img_pil.height current_dpi = get_image_dpi(img_pil) img_norm, dpi_used = normalize_to_dpi(img_pil, current_dpi, target_dpi) w, h = img_norm.width, img_norm.height badge = compute_badge_zone(w, h, dpi_used) left_margin_px = mm_to_pixels(3, dpi_used) right_margin_px = mm_to_pixels(3, dpi_used) middle_margin_px = mm_to_pixels(6, dpi_used) left_margin = (0, 0, left_margin_px, h) right_margin = (w - right_margin_px, 0, w, h) middle_margin = ((w // 2) - (middle_margin_px // 2), 0, (w // 2) + (middle_margin_px // 2), h) safe_margins = {'left': left_margin, 'right': right_margin, 'middle': middle_margin} blur_var, blur_ok = check_blur_threshold(img_norm, threshold=blur_threshold) text_lines = detect_text_easyocr(img_norm, reader) conf_values = [ln["conf"] for ln in text_lines if "conf" in ln] confidence_score = round(sum(conf_values) / len(conf_values), 2) allowed_words = set("winner of the 21st century emily dickinson award".split()) unauthorized_texts = [] text_in_safe_margin = [] for ln in text_lines: bbox = ln['bbox'] a = rect_area(bbox) if a <= 0: continue # Check award zone overlap ratio ratio_award = rect_intersection_area(bbox, badge) / a # Check safe margins in_safe_margin = ( rect_intersection_area(bbox, left_margin) > 0 or rect_intersection_area(bbox, right_margin) > 0 or rect_intersection_area(bbox, middle_margin) > 0 ) text_words = set(ln['text'].lower().split()) # Flag unauthorized text in award zone if ratio_award >= overlap_threshold and not text_words.issubset(allowed_words): unauthorized_texts.append(ln['text']) # Flag text inside safe margins if in_safe_margin: text_in_safe_margin.append(ln['text']) cover_valid = len(unauthorized_texts) == 0 and len(text_in_safe_margin) == 0 validation_message = "Cover is valid." if cover_valid else "Cover invalid due to unauthorized text in award zone or safe margins." overlay_path = None try: base = os.path.basename(path) name = os.path.splitext(base)[0] overlay_path = name + '_overlay.jpg' draw_overlay_and_save(img_norm, badge, text_lines, overlay_path) except Exception: overlay_path = None results = [] for ln in text_lines: bbox = ln['bbox'] a = rect_area(bbox) inter = rect_intersection_area(bbox, badge) ratio = (inter / a) if a > 0 else 0.0 flagged = (ln['text'] in unauthorized_texts) or (ln['text'] in text_in_safe_margin) results.append({'text': ln['text'], 'conf': ln['conf'], 'bbox': bbox, 'overlap_ratio': ratio, 'flagged': flagged}) report = { 'file': path, 'orig_size': (orig_w, orig_h), 'dpi_inferred': current_dpi, 'dpi_used': dpi_used, 'badge_bbox': badge, 'blur_variance': blur_var, 'blur_ok': blur_ok, 'text_lines': results, 'cover_valid': cover_valid, 'unauthorized_text_in_award_zone': unauthorized_texts, 'text_in_safe_margin': text_in_safe_margin, 'validation_message': validation_message, 'overlay_path': overlay_path, 'confidence_score': (round((1-overlap_threshold),2)*100) } return report # ---------------------- CLI / Bulk runner ---------------------- def process_folder(input_dir, output_dir, target_dpi=300, overlap_threshold=0.01, blur_threshold=100.0): os.makedirs(output_dir, exist_ok=True) reports = [] for fname in os.listdir(input_dir): if not fname.lower().endswith(('.png', '.jpg', '.jpeg', '.tif', '.tiff')): continue path = os.path.join(input_dir, fname) rpt = process_image(path, reader, target_dpi=target_dpi, overlap_threshold=overlap_threshold, blur_threshold=blur_threshold) base = os.path.basename(path) name = os.path.splitext(base)[0] if rpt['overlay_path']: try: os.replace(rpt['overlay_path'], os.path.join(output_dir, os.path.basename(rpt['overlay_path']))) rpt['overlay_path'] = os.path.join(output_dir, os.path.basename(rpt['overlay_path'])) except Exception: rpt['overlay_path'] = None out_json = os.path.join(output_dir, name + '.json') with open(out_json, 'w', encoding='utf-8') as f: json.dump(rpt, f, indent=2) reports.append(rpt) with open(os.path.join(output_dir, 'index.json'), 'w', encoding='utf-8') as f: json.dump({'reports': [r['file'] for r in reports]}, f, indent=2) return reports def cli(): p = argparse.ArgumentParser() p.add_argument('--input-dir', default="/Users/akki/Desktop/AKKI/Presonal Projects/Text overlay in book covers detection/test images") p.add_argument('--output-dir', default="/Users/akki/Desktop/AKKI/Presonal Projects/Text overlay in book covers detection/output(front fix)frfrfr") p.add_argument('--target-dpi', type=int, default=300) p.add_argument('--overlap-threshold', type=float, default=0.01) p.add_argument('--blur-threshold', type=float, default=100.0) args = p.parse_args() process_folder(args.input_dir, args.output_dir, target_dpi=args.target_dpi, overlap_threshold=args.overlap_threshold, blur_threshold=args.blur_threshold) if __name__ == '__main__': cli()