""" tampering.py - Smart document tampering for the Tamper Forge Studio. Each tamper function returns a dict: { "image": PIL.Image, "src_box": (x0,y0,x1,y1) or None, "dst_box": (x0,y0,x1,y1) or None, "description": str, "intensity": str, } """ import io, os, random, shutil import numpy as np from pathlib import Path from PIL import Image, ImageDraw, ImageFont, ImageFilter import cv2 try: import pytesseract _TESS_OK = False for _c in (shutil.which("tesseract"), r"C:\\Program Files\\Tesseract-OCR\\tesseract.exe", r"C:\\Program Files (x86)\\Tesseract-OCR\\tesseract.exe", os.path.expanduser(r"~\\AppData\\Local\\Programs\\Tesseract-OCR\\tesseract.exe")): if _c and os.path.isfile(_c): pytesseract.pytesseract.tesseract_cmd = _c _TESS_OK = True break if not _TESS_OK and shutil.which("tesseract"): _TESS_OK = True except ImportError: _TESS_OK = False def _font(size=18): for path in ("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", r"C:\\Windows\\Fonts\\arial.ttf", "DejaVuSans.ttf", "arial.ttf"): try: return ImageFont.truetype(path, size) except OSError: continue return ImageFont.load_default() _INTENSITY = { "subtle": {"box_scale": 0.7, "jpeg_q": 75, "amount_jump": 1.5}, "moderate": {"box_scale": 1.0, "jpeg_q": 50, "amount_jump": 3.0}, "aggressive": {"box_scale": 1.4, "jpeg_q": 25, "amount_jump": 8.0}, } def _profile(i): return _INTENSITY.get(i, _INTENSITY["moderate"]) def _find_salient_box(img, w_target=180, h_target=80): """High-variance region: likely a seal, signature, or stamp.""" arr = np.array(img.convert("L")) H, W = arr.shape scale = max(1, max(H, W) // 400) small = arr[::scale, ::scale] sh, sw = small.shape kernel = max(15, min(sh, sw) // 8) if kernel >= sh or kernel >= sw: return (W//2 - w_target//2, H//2 - h_target//2, W//2 + w_target//2, H//2 + h_target//2) mean = cv2.boxFilter(small.astype(np.float32), -1, (kernel, kernel)) sq_mean = cv2.boxFilter((small.astype(np.float32) ** 2), -1, (kernel, kernel)) var = sq_mean - mean ** 2 var[:kernel,:] = 0; var[-kernel:,:] = 0 var[:,:kernel] = 0; var[:,-kernel:] = 0 py, px = np.unravel_index(var.argmax(), var.shape) cx, cy = px * scale, py * scale return (max(0, cx - w_target//2), max(0, cy - h_target//2), min(W, cx + w_target//2), min(H, cy + h_target//2)) def _find_text_box_via_ocr(img, keywords=("Rs", "Date", "Amount", "Total", "Principal", "Stamp")): if not _TESS_OK: return None try: import pytesseract data = pytesseract.image_to_data(img, output_type=pytesseract.Output.DICT) except Exception: return None for i, txt in enumerate(data.get("text", [])): for kw in keywords: if kw.lower() in txt.lower(): x, y, w, h = data["left"][i], data["top"][i], data["width"][i], data["height"][i] return (x, y, min(img.width, x + w * 8), y + h + 4) return None def tamper_copy_move(img, intensity="moderate", rng=None): rng = rng or random.Random() img = img.convert("RGB") arr = np.array(img) H, W = arr.shape[:2] p = _profile(intensity) bw, bh = int(180 * p["box_scale"]), int(90 * p["box_scale"]) sx0, sy0, sx1, sy1 = _find_salient_box(img, bw, bh) patch = arr[sy0:sy1, sx0:sx1].copy() tx0 = 40 if sx0 > W // 2 else max(40, W - (sx1 - sx0) - 40) ty0 = sy0 + (1 if sy0 < H // 2 else -1) * (bh + 20) ty0 = max(40, min(H - bh - 40, ty0)) tx1, ty1 = tx0 + (sx1 - sx0), ty0 + (sy1 - sy0) arr[ty0:ty1, tx0:tx1] = patch[: ty1 - ty0, : tx1 - tx0] return {"image": Image.fromarray(arr), "src_box": (sx0, sy0, sx1, sy1), "dst_box": (tx0, ty0, tx1, ty1), "description": f"Duplicated a {bw}x{bh}px high-variance region (likely a seal/signature) to another quadrant.", "intensity": intensity} def tamper_text_edit(img, intensity="moderate", rng=None): rng = rng or random.Random() img = img.convert("RGB").copy() p = _profile(intensity) W, H = img.size box = _find_text_box_via_ocr(img) used_ocr = box is not None if box is None: strip_y = int(H * 0.35); strip_h = int(36 * p["box_scale"]) box = (int(W * 0.15), strip_y, int(W * 0.60), strip_y + strip_h) x0, y0, x1, y1 = box d = ImageDraw.Draw(img) d.rectangle(box, fill="white") new_amount = int(10_00_000 * p["amount_jump"]) new_text = f"Rs {new_amount:,}" d.text((x0 + 6, y0 + 4), new_text, font=_font(int(20 * p["box_scale"])), fill="black") return {"image": img, "src_box": None, "dst_box": box, "description": f"Located a text/amount field via {'OCR' if used_ocr else 'fallback'} and rewrote it as '{new_text}'.", "intensity": intensity} def tamper_splice(img, donor=None, intensity="moderate", rng=None): rng = rng or random.Random() img = img.convert("RGB").copy() p = _profile(intensity) W, H = img.size if donor is None: sample_dir = Path("sample_data/originals") if sample_dir.exists(): cands = [f for f in sample_dir.glob("*.png") if f.stat().st_size > 5000] if cands: donor = Image.open(rng.choice(cands)).convert("RGB") if donor is None: donor = Image.fromarray(np.full((H, W, 3), [rng.randint(180, 255), rng.randint(120, 200), rng.randint(80, 160)], dtype=np.uint8)) donor = donor.resize((W, H)) arr = np.array(img); darr = np.array(donor) bw, bh = int(220 * p["box_scale"]), int(80 * p["box_scale"]) x = rng.randint(40, max(41, W - bw - 40)) y = rng.randint(int(H * 0.55), max(int(H * 0.55) + 1, H - bh - 40)) arr[y:y+bh, x:x+bw] = darr[y:y+bh, x:x+bw] return {"image": Image.fromarray(arr), "src_box": (x, y, x+bw, y+bh), "dst_box": (x, y, x+bw, y+bh), "description": f"Spliced a {bw}x{bh}px region from a different document into the lower area.", "intensity": intensity} def tamper_compression(img, intensity="moderate", rng=None): img = img.convert("RGB"); p = _profile(intensity) buf = io.BytesIO(); img.save(buf, "JPEG", quality=p["jpeg_q"]); buf.seek(0) return {"image": Image.open(buf).convert("RGB"), "src_box": None, "dst_box": None, "description": f"Re-saved at JPEG quality {p['jpeg_q']} (post-edit hide-the-evidence pattern).", "intensity": intensity} def tamper_metadata_strip(img, intensity="moderate", rng=None): img = img.convert("RGB") buf = io.BytesIO(); img.save(buf, "JPEG", quality=92); buf.seek(0) return {"image": Image.open(buf).convert("RGB"), "src_box": None, "dst_box": None, "description": "Stripped all EXIF metadata (mimics photo-editor export).", "intensity": intensity} def tamper_custom_region(img, box, intensity="moderate", rng=None): img = img.convert("RGB").copy() x0, y0, x1, y1 = [int(v) for v in box] crop = img.crop((x0, y0, x1, y1)) radius = {"subtle": 1.5, "moderate": 3.0, "aggressive": 6.0}.get(intensity, 3.0) crop = crop.filter(ImageFilter.GaussianBlur(radius=radius)) img.paste(crop, (x0, y0)) d = ImageDraw.Draw(img) d.rectangle([x0+4, y0+4, min(x1, x0+60), min(y1, y0+24)], fill="white") d.text((x0+8, y0+6), "EDITED", font=_font(12), fill="black") return {"image": img, "src_box": None, "dst_box": (x0, y0, x1, y1), "description": f"User-drawn region blurred (sigma={radius}) and overpainted - adversarial test.", "intensity": intensity} TAMPER_FUNCTIONS = { "copy_move": ("Copy-move", "Duplicates a salient region (e.g. seal). Classic forgery."), "text_edit": ("Text edit", "Whites out a value and writes a new one. Loan-document fraud."), "splice": ("Splice", "Pastes a region from a different doc. Noise inconsistency."), "compression": ("Re-save", "Low JPEG quality to hide tampering. ELA catches it."), "metadata": ("Strip EXIF", "Removes EXIF metadata. EXIF audit catches it."), "custom": ("Custom (adversarial)", "User-drawn region blurred and overpainted. Adversarial."), } def tamper_dispatch(name, img, intensity="moderate", donor=None, custom_box=None, rng=None): if name == "copy_move": return tamper_copy_move(img, intensity, rng) if name == "text_edit": return tamper_text_edit(img, intensity, rng) if name == "splice": return tamper_splice(img, donor, intensity, rng) if name == "compression": return tamper_compression(img, intensity, rng) if name == "metadata": return tamper_metadata_strip(img, intensity, rng) if name == "custom": if custom_box is None: raise ValueError("custom needs custom_box") return tamper_custom_region(img, custom_box, intensity, rng) raise ValueError(f"unknown tamper: {name}") def tamper_chain(img, names, intensity="moderate", rng=None): rng = rng or random.Random() current = img.convert("RGB") steps, last_src, last_dst = [], None, None for n in names: out = tamper_dispatch(n, current, intensity=intensity, rng=rng) current = out["image"] steps.append({"name": n, "description": out["description"], "dst_box": out["dst_box"]}) if out["dst_box"]: last_dst = out["dst_box"] if out["src_box"]: last_src = out["src_box"] return {"image": current, "src_box": last_src, "dst_box": last_dst, "description": " -> ".join(s["name"] for s in steps), "intensity": intensity, "steps": steps} def annotate_before_after(orig_img, tamper_meta, box_width=4): orig = orig_img.convert("RGB").copy() tampered = tamper_meta["image"].convert("RGB").copy() d_orig, d_tamp = ImageDraw.Draw(orig), ImageDraw.Draw(tampered) if tamper_meta.get("src_box"): x0, y0, x1, y1 = tamper_meta["src_box"] d_orig.rectangle(tamper_meta["src_box"], outline=(0, 200, 0), width=box_width) d_orig.rectangle([x0, max(0, y0-20), x0+90, y0], fill=(0, 200, 0)) d_orig.text((x0+4, max(0, y0-18)), "SOURCE", font=_font(12), fill="white") if tamper_meta.get("dst_box"): x0, y0, x1, y1 = tamper_meta["dst_box"] d_tamp.rectangle(tamper_meta["dst_box"], outline=(220, 30, 30), width=box_width) d_tamp.rectangle([x0, max(0, y0-20), x0+110, y0], fill=(220, 30, 30)) d_tamp.text((x0+4, max(0, y0-18)), "TAMPERED", font=_font(12), fill="white") return orig, tampered def overlay_heatmap_on_image(base_img, heat_2d, alpha=0.55, cmap="hot"): # Use the modern colormap registry (matplotlib >= 3.5), # fall back to the deprecated cm.get_cmap on older versions. try: import matplotlib as mpl cmap_fn = mpl.colormaps[cmap] except (AttributeError, KeyError): import matplotlib.cm as cm cmap_fn = cm.get_cmap(cmap) base = base_img.convert("RGBA") W, H = base.size h = heat_2d.astype(np.float32) if h.max() > 0: h = (h - h.min()) / (h.max() - h.min() + 1e-9) h_resized = cv2.resize(h, (W, H), interpolation=cv2.INTER_CUBIC) rgba = (cmap_fn(h_resized) * 255).astype(np.uint8) rgba[..., 3] = (h_resized * 255 * alpha).astype(np.uint8) overlay = Image.fromarray(rgba, mode="RGBA") return Image.alpha_composite(base, overlay).convert("RGB") def detector_scorecard(image_path): import forensics scores = {} _, ela_score = forensics.error_level_analysis(image_path) scores["ELA"] = {"score": min(ela_score / 25.0, 1.0), "raw": round(ela_score, 2), "what": "JPEG re-save artefacts"} _, cm_count, _ = forensics.copy_move_detect(image_path) scores["Copy-move (ORB)"] = {"score": min(cm_count / 50.0, 1.0), "raw": cm_count, "what": "Duplicated regions"} _, noise_ratio = forensics.noise_inconsistency(image_path) scores["Noise inconsistency"] = {"score": min(noise_ratio * 4, 1.0), "raw": round(noise_ratio, 3), "what": "Splicing / region mismatch"} exif_flags = forensics.exif_sanity(image_path) scores["EXIF metadata"] = {"score": 0.0 if exif_flags == ["exif clean"] else 0.6, "raw": "; ".join(exif_flags), "what": "Edit-tool fingerprints"} try: ml = forensics.predict_with_model(image_path) if ml is not None: scores["Random Forest"] = {"score": ml["tamper_probability"], "raw": ml["verdict"], "what": "Learned forensic-feature blend"} except Exception: pass for v in scores.values(): v["caught"] = v["score"] >= 0.4 return scores