Spaces:

AKKI-AFK
/

Cover_Overlap_Detection

Sleeping

App Files Files Community

Cover_Overlap_Detection / validator.py

AKKI-AFK

Pushing version 1

84693e0 verified 3 months ago

raw

history blame contribute delete

9.22 kB

	"""
	cover_pipeline_prototype.py
	Updated prototype pipeline using EasyOCR for text detection and OCR.

	Requirements:
	- Python 3.8+
	- Install packages: pip install opencv-python pillow easyocr numpy imutils

	Usage examples:
	python cover_pipeline_prototype.py --input-dir ./covers --output-dir ./out
	"""

	import os
	import json
	import argparse
	from PIL import Image, ImageDraw
	import cv2
	import numpy as np
	from math import floor
	import easyocr

	# ---------------------- Utilities ----------------------
	reader = easyocr.Reader(['en'], gpu=False)

	def mm_to_inches(mm):
	return mm / 25.4


	def inches_to_pixels(inches, dpi):
	return int(round(inches * dpi))


	def mm_to_pixels(mm, dpi):
	return inches_to_pixels(mm_to_inches(mm), dpi)


	def read_image(path):
	img = Image.open(path).convert('RGB')
	return img


	def get_image_dpi(img: Image.Image):
	try:
	info = img.info
	if 'dpi' in info and isinstance(info['dpi'], tuple):
	return int(info['dpi'][0])
	except Exception:
	pass
	return None


	def normalize_to_dpi(img: Image.Image, current_dpi: int, target_dpi: int):
	if current_dpi is None:
	return img, target_dpi
	if current_dpi == target_dpi:
	return img, current_dpi
	scale = target_dpi / float(current_dpi)
	new_w = int(round(img.width * scale))
	new_h = int(round(img.height * scale))
	resized = img.resize((new_w, new_h), resample=Image.LANCZOS)
	return resized, target_dpi

	# ---------------------- Badge zone ----------------------

	def compute_badge_zone(img_w_px, img_h_px, dpi, badge_height_mm=9):
	badge_h_px = mm_to_pixels(badge_height_mm, dpi)
	x1 = img_w_px // 2
	y1 = img_h_px - badge_h_px
	x2 = img_w_px
	y2 = img_h_px
	return (x1, y1, x2, y2)

	# ---------------------- Image quality ----------------------

	def variance_of_laplacian(img_cv_gray):
	return cv2.Laplacian(img_cv_gray, cv2.CV_64F).var()


	def check_blur_threshold(pil_img: Image.Image, threshold=100.0):
	cv = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2GRAY)
	var = variance_of_laplacian(cv)
	return float(var), bool(var >= threshold)

	# ---------------------- EasyOCR text detection ----------------------

	def detect_text_easyocr(pil_img: Image.Image, reader):
	img_cv = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
	results = reader.readtext(img_cv)
	lines = []
	for (bbox, text, conf) in results:
	pts = np.array(bbox).astype(int)
	x1, y1 = np.min(pts[:, 0]), np.min(pts[:, 1])
	x2, y2 = np.max(pts[:, 0]), np.max(pts[:, 1])
	lines.append({'text': text.strip(), 'conf': float(conf), 'bbox': (int(x1), int(y1), int(x2), int(y2))})
	return lines

	# ---------------------- Overlap math ----------------------

	def rect_intersection_area(a, b):
	x1 = max(a[0], b[0])
	y1 = max(a[1], b[1])
	x2 = min(a[2], b[2])
	y2 = min(a[3], b[3])
	if x2 <= x1 or y2 <= y1:
	return 0
	return (x2 - x1) * (y2 - y1)


	def rect_area(r):
	return max(0, (r[2] - r[0])) * max(0, (r[3] - r[1]))

	# ---------------------- Overlay & report ----------------------

	def draw_overlay_and_save(pil_img: Image.Image, badge_rect, text_lines, out_path):
	img_draw = pil_img.convert('RGBA')
	draw = ImageDraw.Draw(img_draw)
	draw.rectangle(badge_rect, outline='red', width=3)
	for ln in text_lines:
	draw.rectangle(ln['bbox'], outline='blue', width=2)
	img_draw.convert('RGB').save(out_path)

	# ---------------------- Pipeline per image ----------------------

	def process_image(path, reader = reader, target_dpi=300, overlap_threshold=0.01, blur_threshold=100.0):
	img_pil = read_image(path)
	orig_w, orig_h = img_pil.width, img_pil.height
	current_dpi = get_image_dpi(img_pil)
	img_norm, dpi_used = normalize_to_dpi(img_pil, current_dpi, target_dpi)

	w, h = img_norm.width, img_norm.height
	badge = compute_badge_zone(w, h, dpi_used)

	left_margin_px = mm_to_pixels(3, dpi_used)
	right_margin_px = mm_to_pixels(3, dpi_used)
	middle_margin_px = mm_to_pixels(6, dpi_used)

	left_margin = (0, 0, left_margin_px, h)
	right_margin = (w - right_margin_px, 0, w, h)
	middle_margin = ((w // 2) - (middle_margin_px // 2), 0, (w // 2) + (middle_margin_px // 2), h)
	safe_margins = {'left': left_margin, 'right': right_margin, 'middle': middle_margin}

	blur_var, blur_ok = check_blur_threshold(img_norm, threshold=blur_threshold)

	text_lines = detect_text_easyocr(img_norm, reader)

	conf_values = [ln["conf"] for ln in text_lines if "conf" in ln]
	confidence_score = round(sum(conf_values) / len(conf_values), 2)

	allowed_words = set("winner of the 21st century emily dickinson award".split())
	unauthorized_texts = []
	text_in_safe_margin = []

	for ln in text_lines:
	bbox = ln['bbox']
	a = rect_area(bbox)
	if a <= 0:
	continue

	# Check award zone overlap ratio
	ratio_award = rect_intersection_area(bbox, badge) / a
	# Check safe margins
	in_safe_margin = (
	rect_intersection_area(bbox, left_margin) > 0 or
	rect_intersection_area(bbox, right_margin) > 0 or
	rect_intersection_area(bbox, middle_margin) > 0
	)

	text_words = set(ln['text'].lower().split())

	# Flag unauthorized text in award zone
	if ratio_award >= overlap_threshold and not text_words.issubset(allowed_words):
	unauthorized_texts.append(ln['text'])

	# Flag text inside safe margins
	if in_safe_margin:
	text_in_safe_margin.append(ln['text'])

	cover_valid = len(unauthorized_texts) == 0 and len(text_in_safe_margin) == 0
	validation_message = "Cover is valid." if cover_valid else "Cover invalid due to unauthorized text in award zone or safe margins."

	overlay_path = None
	try:
	base = os.path.basename(path)
	name = os.path.splitext(base)[0]
	overlay_path = name + '_overlay.jpg'
	draw_overlay_and_save(img_norm, badge, text_lines, overlay_path)
	except Exception:
	overlay_path = None

	results = []
	for ln in text_lines:
	bbox = ln['bbox']
	a = rect_area(bbox)
	inter = rect_intersection_area(bbox, badge)
	ratio = (inter / a) if a > 0 else 0.0
	flagged = (ln['text'] in unauthorized_texts) or (ln['text'] in text_in_safe_margin)
	results.append({'text': ln['text'], 'conf': ln['conf'], 'bbox': bbox, 'overlap_ratio': ratio, 'flagged': flagged})

	report = {
	'file': path,
	'orig_size': (orig_w, orig_h),
	'dpi_inferred': current_dpi,
	'dpi_used': dpi_used,
	'badge_bbox': badge,
	'blur_variance': blur_var,
	'blur_ok': blur_ok,
	'text_lines': results,
	'cover_valid': cover_valid,
	'unauthorized_text_in_award_zone': unauthorized_texts,
	'text_in_safe_margin': text_in_safe_margin,
	'validation_message': validation_message,
	'overlay_path': overlay_path,
	'confidence_score': (round((1-overlap_threshold),2)*100)
	}
	return report

	# ---------------------- CLI / Bulk runner ----------------------

	def process_folder(input_dir, output_dir, target_dpi=300, overlap_threshold=0.01, blur_threshold=100.0):
	os.makedirs(output_dir, exist_ok=True)
	reports = []
	for fname in os.listdir(input_dir):
	if not fname.lower().endswith(('.png', '.jpg', '.jpeg', '.tif', '.tiff')):
	continue
	path = os.path.join(input_dir, fname)
	rpt = process_image(path, reader, target_dpi=target_dpi, overlap_threshold=overlap_threshold, blur_threshold=blur_threshold)
	base = os.path.basename(path)
	name = os.path.splitext(base)[0]
	if rpt['overlay_path']:
	try:
	os.replace(rpt['overlay_path'], os.path.join(output_dir, os.path.basename(rpt['overlay_path'])))
	rpt['overlay_path'] = os.path.join(output_dir, os.path.basename(rpt['overlay_path']))
	except Exception:
	rpt['overlay_path'] = None
	out_json = os.path.join(output_dir, name + '.json')
	with open(out_json, 'w', encoding='utf-8') as f:
	json.dump(rpt, f, indent=2)
	reports.append(rpt)
	with open(os.path.join(output_dir, 'index.json'), 'w', encoding='utf-8') as f:
	json.dump({'reports': [r['file'] for r in reports]}, f, indent=2)
	return reports


	def cli():
	p = argparse.ArgumentParser()
	p.add_argument('--input-dir', default="/Users/akki/Desktop/AKKI/Presonal Projects/Text overlay in book covers detection/test images")
	p.add_argument('--output-dir', default="/Users/akki/Desktop/AKKI/Presonal Projects/Text overlay in book covers detection/output(front fix)frfrfr")
	p.add_argument('--target-dpi', type=int, default=300)
	p.add_argument('--overlap-threshold', type=float, default=0.01)
	p.add_argument('--blur-threshold', type=float, default=100.0)
	args = p.parse_args()
	process_folder(args.input_dir, args.output_dir, target_dpi=args.target_dpi,
	overlap_threshold=args.overlap_threshold, blur_threshold=args.blur_threshold)

	if __name__ == '__main__':
	cli()