Spaces:

AKKI-AFK
/

Cover_Overlap_Detection

Sleeping

File size: 9,215 Bytes

84693e0

"""
cover_pipeline_prototype.py
Updated prototype pipeline using EasyOCR for text detection and OCR.

Requirements:
- Python 3.8+
- Install packages: pip install opencv-python pillow easyocr numpy imutils

Usage examples:
python cover_pipeline_prototype.py --input-dir ./covers --output-dir ./out
"""

import os
import json
import argparse
from PIL import Image, ImageDraw
import cv2
import numpy as np
from math import floor
import easyocr

# ---------------------- Utilities ----------------------
reader = easyocr.Reader(['en'], gpu=False)

def mm_to_inches(mm):
    return mm / 25.4


def inches_to_pixels(inches, dpi):
    return int(round(inches * dpi))


def mm_to_pixels(mm, dpi):
    return inches_to_pixels(mm_to_inches(mm), dpi)


def read_image(path):
    img = Image.open(path).convert('RGB')
    return img


def get_image_dpi(img: Image.Image):
    try:
        info = img.info
        if 'dpi' in info and isinstance(info['dpi'], tuple):
            return int(info['dpi'][0])
    except Exception:
        pass
    return None


def normalize_to_dpi(img: Image.Image, current_dpi: int, target_dpi: int):
    if current_dpi is None:
        return img, target_dpi
    if current_dpi == target_dpi:
        return img, current_dpi
    scale = target_dpi / float(current_dpi)
    new_w = int(round(img.width * scale))
    new_h = int(round(img.height * scale))
    resized = img.resize((new_w, new_h), resample=Image.LANCZOS)
    return resized, target_dpi

# ---------------------- Badge zone ----------------------

def compute_badge_zone(img_w_px, img_h_px, dpi, badge_height_mm=9):
    badge_h_px = mm_to_pixels(badge_height_mm, dpi)
    x1 = img_w_px // 2
    y1 = img_h_px - badge_h_px
    x2 = img_w_px
    y2 = img_h_px
    return (x1, y1, x2, y2)

# ---------------------- Image quality ----------------------

def variance_of_laplacian(img_cv_gray):
    return cv2.Laplacian(img_cv_gray, cv2.CV_64F).var()


def check_blur_threshold(pil_img: Image.Image, threshold=100.0):
    cv = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2GRAY)
    var = variance_of_laplacian(cv)
    return float(var), bool(var >= threshold)

# ---------------------- EasyOCR text detection ----------------------

def detect_text_easyocr(pil_img: Image.Image, reader):
    img_cv = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
    results = reader.readtext(img_cv)
    lines = []
    for (bbox, text, conf) in results:
        pts = np.array(bbox).astype(int)
        x1, y1 = np.min(pts[:, 0]), np.min(pts[:, 1])
        x2, y2 = np.max(pts[:, 0]), np.max(pts[:, 1])
        lines.append({'text': text.strip(), 'conf': float(conf), 'bbox': (int(x1), int(y1), int(x2), int(y2))})
    return lines

# ---------------------- Overlap math ----------------------

def rect_intersection_area(a, b):
    x1 = max(a[0], b[0])
    y1 = max(a[1], b[1])
    x2 = min(a[2], b[2])
    y2 = min(a[3], b[3])
    if x2 <= x1 or y2 <= y1:
        return 0
    return (x2 - x1) * (y2 - y1)


def rect_area(r):
    return max(0, (r[2] - r[0])) * max(0, (r[3] - r[1]))

# ---------------------- Overlay & report ----------------------

def draw_overlay_and_save(pil_img: Image.Image, badge_rect, text_lines, out_path):
    img_draw = pil_img.convert('RGBA')
    draw = ImageDraw.Draw(img_draw)
    draw.rectangle(badge_rect, outline='red', width=3)
    for ln in text_lines:
        draw.rectangle(ln['bbox'], outline='blue', width=2)
    img_draw.convert('RGB').save(out_path)

# ---------------------- Pipeline per image ----------------------

def process_image(path, reader = reader, target_dpi=300, overlap_threshold=0.01, blur_threshold=100.0):
    img_pil = read_image(path)
    orig_w, orig_h = img_pil.width, img_pil.height
    current_dpi = get_image_dpi(img_pil)
    img_norm, dpi_used = normalize_to_dpi(img_pil, current_dpi, target_dpi)

    w, h = img_norm.width, img_norm.height
    badge = compute_badge_zone(w, h, dpi_used)
    
    left_margin_px = mm_to_pixels(3, dpi_used)
    right_margin_px = mm_to_pixels(3, dpi_used)
    middle_margin_px = mm_to_pixels(6, dpi_used)

    left_margin = (0, 0, left_margin_px, h)
    right_margin = (w - right_margin_px, 0, w, h)
    middle_margin = ((w // 2) - (middle_margin_px // 2), 0, (w // 2) + (middle_margin_px // 2), h)
    safe_margins = {'left': left_margin, 'right': right_margin, 'middle': middle_margin}

    blur_var, blur_ok = check_blur_threshold(img_norm, threshold=blur_threshold)

    text_lines = detect_text_easyocr(img_norm, reader)

    conf_values = [ln["conf"] for ln in text_lines if "conf" in ln]
    confidence_score = round(sum(conf_values) / len(conf_values), 2)

    allowed_words = set("winner of the 21st century emily dickinson award".split())
    unauthorized_texts = []
    text_in_safe_margin = []

    for ln in text_lines:
        bbox = ln['bbox']
        a = rect_area(bbox)
        if a <= 0:
            continue

        # Check award zone overlap ratio
        ratio_award = rect_intersection_area(bbox, badge) / a
        # Check safe margins
        in_safe_margin = (
            rect_intersection_area(bbox, left_margin) > 0 or
            rect_intersection_area(bbox, right_margin) > 0 or
            rect_intersection_area(bbox, middle_margin) > 0
        )

        text_words = set(ln['text'].lower().split())

        # Flag unauthorized text in award zone
        if ratio_award >= overlap_threshold and not text_words.issubset(allowed_words):
            unauthorized_texts.append(ln['text'])

        # Flag text inside safe margins
        if in_safe_margin:
            text_in_safe_margin.append(ln['text'])

    cover_valid = len(unauthorized_texts) == 0 and len(text_in_safe_margin) == 0
    validation_message = "Cover is valid." if cover_valid else "Cover invalid due to unauthorized text in award zone or safe margins."

    overlay_path = None
    try:
        base = os.path.basename(path)
        name = os.path.splitext(base)[0]
        overlay_path = name + '_overlay.jpg'
        draw_overlay_and_save(img_norm, badge, text_lines, overlay_path)
    except Exception:
        overlay_path = None

    results = []
    for ln in text_lines:
        bbox = ln['bbox']
        a = rect_area(bbox)
        inter = rect_intersection_area(bbox, badge)
        ratio = (inter / a) if a > 0 else 0.0
        flagged = (ln['text'] in unauthorized_texts) or (ln['text'] in text_in_safe_margin)
        results.append({'text': ln['text'], 'conf': ln['conf'], 'bbox': bbox, 'overlap_ratio': ratio, 'flagged': flagged})

    report = {
        'file': path,
        'orig_size': (orig_w, orig_h),
        'dpi_inferred': current_dpi,
        'dpi_used': dpi_used,
        'badge_bbox': badge,
        'blur_variance': blur_var,
        'blur_ok': blur_ok,
        'text_lines': results,
        'cover_valid': cover_valid,
        'unauthorized_text_in_award_zone': unauthorized_texts,
        'text_in_safe_margin': text_in_safe_margin,
        'validation_message': validation_message,
        'overlay_path': overlay_path,
        'confidence_score': (round((1-overlap_threshold),2)*100)
    }
    return report

# ---------------------- CLI / Bulk runner ----------------------

def process_folder(input_dir, output_dir, target_dpi=300, overlap_threshold=0.01, blur_threshold=100.0):
    os.makedirs(output_dir, exist_ok=True)
    reports = []
    for fname in os.listdir(input_dir):
        if not fname.lower().endswith(('.png', '.jpg', '.jpeg', '.tif', '.tiff')):
            continue
        path = os.path.join(input_dir, fname)
        rpt = process_image(path, reader, target_dpi=target_dpi, overlap_threshold=overlap_threshold, blur_threshold=blur_threshold)
        base = os.path.basename(path)
        name = os.path.splitext(base)[0]
        if rpt['overlay_path']:
            try:
                os.replace(rpt['overlay_path'], os.path.join(output_dir, os.path.basename(rpt['overlay_path'])))
                rpt['overlay_path'] = os.path.join(output_dir, os.path.basename(rpt['overlay_path']))
            except Exception:
                rpt['overlay_path'] = None
        out_json = os.path.join(output_dir, name + '.json')
        with open(out_json, 'w', encoding='utf-8') as f:
            json.dump(rpt, f, indent=2)
        reports.append(rpt)
    with open(os.path.join(output_dir, 'index.json'), 'w', encoding='utf-8') as f:
        json.dump({'reports': [r['file'] for r in reports]}, f, indent=2)
    return reports


def cli():
    p = argparse.ArgumentParser()
    p.add_argument('--input-dir', default="/Users/akki/Desktop/AKKI/Presonal Projects/Text overlay in book covers detection/test images")
    p.add_argument('--output-dir', default="/Users/akki/Desktop/AKKI/Presonal Projects/Text overlay in book covers detection/output(front fix)frfrfr")
    p.add_argument('--target-dpi', type=int, default=300)
    p.add_argument('--overlap-threshold', type=float, default=0.01)
    p.add_argument('--blur-threshold', type=float, default=100.0)
    args = p.parse_args()
    process_folder(args.input_dir, args.output_dir, target_dpi=args.target_dpi,
                   overlap_threshold=args.overlap_threshold, blur_threshold=args.blur_threshold)

if __name__ == '__main__':
    cli()