Spaces:
Sleeping
Sleeping
| """ | |
| tampering.py - Smart document tampering for the Tamper Forge Studio. | |
| Each tamper function returns a dict: | |
| { | |
| "image": PIL.Image, | |
| "src_box": (x0,y0,x1,y1) or None, | |
| "dst_box": (x0,y0,x1,y1) or None, | |
| "description": str, | |
| "intensity": str, | |
| } | |
| """ | |
| import io, os, random, shutil | |
| import numpy as np | |
| from pathlib import Path | |
| from PIL import Image, ImageDraw, ImageFont, ImageFilter | |
| import cv2 | |
| try: | |
| import pytesseract | |
| _TESS_OK = False | |
| for _c in (shutil.which("tesseract"), | |
| r"C:\\Program Files\\Tesseract-OCR\\tesseract.exe", | |
| r"C:\\Program Files (x86)\\Tesseract-OCR\\tesseract.exe", | |
| os.path.expanduser(r"~\\AppData\\Local\\Programs\\Tesseract-OCR\\tesseract.exe")): | |
| if _c and os.path.isfile(_c): | |
| pytesseract.pytesseract.tesseract_cmd = _c | |
| _TESS_OK = True | |
| break | |
| if not _TESS_OK and shutil.which("tesseract"): | |
| _TESS_OK = True | |
| except ImportError: | |
| _TESS_OK = False | |
| def _font(size=18): | |
| for path in ("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", | |
| r"C:\\Windows\\Fonts\\arial.ttf", | |
| "DejaVuSans.ttf", "arial.ttf"): | |
| try: return ImageFont.truetype(path, size) | |
| except OSError: continue | |
| return ImageFont.load_default() | |
| _INTENSITY = { | |
| "subtle": {"box_scale": 0.7, "jpeg_q": 75, "amount_jump": 1.5}, | |
| "moderate": {"box_scale": 1.0, "jpeg_q": 50, "amount_jump": 3.0}, | |
| "aggressive": {"box_scale": 1.4, "jpeg_q": 25, "amount_jump": 8.0}, | |
| } | |
| def _profile(i): return _INTENSITY.get(i, _INTENSITY["moderate"]) | |
| def _find_salient_box(img, w_target=180, h_target=80): | |
| """High-variance region: likely a seal, signature, or stamp.""" | |
| arr = np.array(img.convert("L")) | |
| H, W = arr.shape | |
| scale = max(1, max(H, W) // 400) | |
| small = arr[::scale, ::scale] | |
| sh, sw = small.shape | |
| kernel = max(15, min(sh, sw) // 8) | |
| if kernel >= sh or kernel >= sw: | |
| return (W//2 - w_target//2, H//2 - h_target//2, | |
| W//2 + w_target//2, H//2 + h_target//2) | |
| mean = cv2.boxFilter(small.astype(np.float32), -1, (kernel, kernel)) | |
| sq_mean = cv2.boxFilter((small.astype(np.float32) ** 2), -1, (kernel, kernel)) | |
| var = sq_mean - mean ** 2 | |
| var[:kernel,:] = 0; var[-kernel:,:] = 0 | |
| var[:,:kernel] = 0; var[:,-kernel:] = 0 | |
| py, px = np.unravel_index(var.argmax(), var.shape) | |
| cx, cy = px * scale, py * scale | |
| return (max(0, cx - w_target//2), max(0, cy - h_target//2), | |
| min(W, cx + w_target//2), min(H, cy + h_target//2)) | |
| def _find_text_box_via_ocr(img, keywords=("Rs", "Date", "Amount", "Total", "Principal", "Stamp")): | |
| if not _TESS_OK: | |
| return None | |
| try: | |
| import pytesseract | |
| data = pytesseract.image_to_data(img, output_type=pytesseract.Output.DICT) | |
| except Exception: | |
| return None | |
| for i, txt in enumerate(data.get("text", [])): | |
| for kw in keywords: | |
| if kw.lower() in txt.lower(): | |
| x, y, w, h = data["left"][i], data["top"][i], data["width"][i], data["height"][i] | |
| return (x, y, min(img.width, x + w * 8), y + h + 4) | |
| return None | |
| def tamper_copy_move(img, intensity="moderate", rng=None): | |
| rng = rng or random.Random() | |
| img = img.convert("RGB") | |
| arr = np.array(img) | |
| H, W = arr.shape[:2] | |
| p = _profile(intensity) | |
| bw, bh = int(180 * p["box_scale"]), int(90 * p["box_scale"]) | |
| sx0, sy0, sx1, sy1 = _find_salient_box(img, bw, bh) | |
| patch = arr[sy0:sy1, sx0:sx1].copy() | |
| tx0 = 40 if sx0 > W // 2 else max(40, W - (sx1 - sx0) - 40) | |
| ty0 = sy0 + (1 if sy0 < H // 2 else -1) * (bh + 20) | |
| ty0 = max(40, min(H - bh - 40, ty0)) | |
| tx1, ty1 = tx0 + (sx1 - sx0), ty0 + (sy1 - sy0) | |
| arr[ty0:ty1, tx0:tx1] = patch[: ty1 - ty0, : tx1 - tx0] | |
| return {"image": Image.fromarray(arr), | |
| "src_box": (sx0, sy0, sx1, sy1), | |
| "dst_box": (tx0, ty0, tx1, ty1), | |
| "description": f"Duplicated a {bw}x{bh}px high-variance region (likely a seal/signature) to another quadrant.", | |
| "intensity": intensity} | |
| def tamper_text_edit(img, intensity="moderate", rng=None): | |
| rng = rng or random.Random() | |
| img = img.convert("RGB").copy() | |
| p = _profile(intensity) | |
| W, H = img.size | |
| box = _find_text_box_via_ocr(img) | |
| used_ocr = box is not None | |
| if box is None: | |
| strip_y = int(H * 0.35); strip_h = int(36 * p["box_scale"]) | |
| box = (int(W * 0.15), strip_y, int(W * 0.60), strip_y + strip_h) | |
| x0, y0, x1, y1 = box | |
| d = ImageDraw.Draw(img) | |
| d.rectangle(box, fill="white") | |
| new_amount = int(10_00_000 * p["amount_jump"]) | |
| new_text = f"Rs {new_amount:,}" | |
| d.text((x0 + 6, y0 + 4), new_text, font=_font(int(20 * p["box_scale"])), fill="black") | |
| return {"image": img, "src_box": None, "dst_box": box, | |
| "description": f"Located a text/amount field via {'OCR' if used_ocr else 'fallback'} and rewrote it as '{new_text}'.", | |
| "intensity": intensity} | |
| def tamper_splice(img, donor=None, intensity="moderate", rng=None): | |
| rng = rng or random.Random() | |
| img = img.convert("RGB").copy() | |
| p = _profile(intensity) | |
| W, H = img.size | |
| if donor is None: | |
| sample_dir = Path("sample_data/originals") | |
| if sample_dir.exists(): | |
| cands = [f for f in sample_dir.glob("*.png") if f.stat().st_size > 5000] | |
| if cands: | |
| donor = Image.open(rng.choice(cands)).convert("RGB") | |
| if donor is None: | |
| donor = Image.fromarray(np.full((H, W, 3), | |
| [rng.randint(180, 255), rng.randint(120, 200), rng.randint(80, 160)], | |
| dtype=np.uint8)) | |
| donor = donor.resize((W, H)) | |
| arr = np.array(img); darr = np.array(donor) | |
| bw, bh = int(220 * p["box_scale"]), int(80 * p["box_scale"]) | |
| x = rng.randint(40, max(41, W - bw - 40)) | |
| y = rng.randint(int(H * 0.55), max(int(H * 0.55) + 1, H - bh - 40)) | |
| arr[y:y+bh, x:x+bw] = darr[y:y+bh, x:x+bw] | |
| return {"image": Image.fromarray(arr), | |
| "src_box": (x, y, x+bw, y+bh), "dst_box": (x, y, x+bw, y+bh), | |
| "description": f"Spliced a {bw}x{bh}px region from a different document into the lower area.", | |
| "intensity": intensity} | |
| def tamper_compression(img, intensity="moderate", rng=None): | |
| img = img.convert("RGB"); p = _profile(intensity) | |
| buf = io.BytesIO(); img.save(buf, "JPEG", quality=p["jpeg_q"]); buf.seek(0) | |
| return {"image": Image.open(buf).convert("RGB"), | |
| "src_box": None, "dst_box": None, | |
| "description": f"Re-saved at JPEG quality {p['jpeg_q']} (post-edit hide-the-evidence pattern).", | |
| "intensity": intensity} | |
| def tamper_metadata_strip(img, intensity="moderate", rng=None): | |
| img = img.convert("RGB") | |
| buf = io.BytesIO(); img.save(buf, "JPEG", quality=92); buf.seek(0) | |
| return {"image": Image.open(buf).convert("RGB"), | |
| "src_box": None, "dst_box": None, | |
| "description": "Stripped all EXIF metadata (mimics photo-editor export).", | |
| "intensity": intensity} | |
| def tamper_custom_region(img, box, intensity="moderate", rng=None): | |
| img = img.convert("RGB").copy() | |
| x0, y0, x1, y1 = [int(v) for v in box] | |
| crop = img.crop((x0, y0, x1, y1)) | |
| radius = {"subtle": 1.5, "moderate": 3.0, "aggressive": 6.0}.get(intensity, 3.0) | |
| crop = crop.filter(ImageFilter.GaussianBlur(radius=radius)) | |
| img.paste(crop, (x0, y0)) | |
| d = ImageDraw.Draw(img) | |
| d.rectangle([x0+4, y0+4, min(x1, x0+60), min(y1, y0+24)], fill="white") | |
| d.text((x0+8, y0+6), "EDITED", font=_font(12), fill="black") | |
| return {"image": img, "src_box": None, "dst_box": (x0, y0, x1, y1), | |
| "description": f"User-drawn region blurred (sigma={radius}) and overpainted - adversarial test.", | |
| "intensity": intensity} | |
| TAMPER_FUNCTIONS = { | |
| "copy_move": ("Copy-move", "Duplicates a salient region (e.g. seal). Classic forgery."), | |
| "text_edit": ("Text edit", "Whites out a value and writes a new one. Loan-document fraud."), | |
| "splice": ("Splice", "Pastes a region from a different doc. Noise inconsistency."), | |
| "compression": ("Re-save", "Low JPEG quality to hide tampering. ELA catches it."), | |
| "metadata": ("Strip EXIF", "Removes EXIF metadata. EXIF audit catches it."), | |
| "custom": ("Custom (adversarial)", "User-drawn region blurred and overpainted. Adversarial."), | |
| } | |
| def tamper_dispatch(name, img, intensity="moderate", donor=None, custom_box=None, rng=None): | |
| if name == "copy_move": return tamper_copy_move(img, intensity, rng) | |
| if name == "text_edit": return tamper_text_edit(img, intensity, rng) | |
| if name == "splice": return tamper_splice(img, donor, intensity, rng) | |
| if name == "compression": return tamper_compression(img, intensity, rng) | |
| if name == "metadata": return tamper_metadata_strip(img, intensity, rng) | |
| if name == "custom": | |
| if custom_box is None: | |
| raise ValueError("custom needs custom_box") | |
| return tamper_custom_region(img, custom_box, intensity, rng) | |
| raise ValueError(f"unknown tamper: {name}") | |
| def tamper_chain(img, names, intensity="moderate", rng=None): | |
| rng = rng or random.Random() | |
| current = img.convert("RGB") | |
| steps, last_src, last_dst = [], None, None | |
| for n in names: | |
| out = tamper_dispatch(n, current, intensity=intensity, rng=rng) | |
| current = out["image"] | |
| steps.append({"name": n, "description": out["description"], "dst_box": out["dst_box"]}) | |
| if out["dst_box"]: last_dst = out["dst_box"] | |
| if out["src_box"]: last_src = out["src_box"] | |
| return {"image": current, "src_box": last_src, "dst_box": last_dst, | |
| "description": " -> ".join(s["name"] for s in steps), | |
| "intensity": intensity, "steps": steps} | |
| def annotate_before_after(orig_img, tamper_meta, box_width=4): | |
| orig = orig_img.convert("RGB").copy() | |
| tampered = tamper_meta["image"].convert("RGB").copy() | |
| d_orig, d_tamp = ImageDraw.Draw(orig), ImageDraw.Draw(tampered) | |
| if tamper_meta.get("src_box"): | |
| x0, y0, x1, y1 = tamper_meta["src_box"] | |
| d_orig.rectangle(tamper_meta["src_box"], outline=(0, 200, 0), width=box_width) | |
| d_orig.rectangle([x0, max(0, y0-20), x0+90, y0], fill=(0, 200, 0)) | |
| d_orig.text((x0+4, max(0, y0-18)), "SOURCE", font=_font(12), fill="white") | |
| if tamper_meta.get("dst_box"): | |
| x0, y0, x1, y1 = tamper_meta["dst_box"] | |
| d_tamp.rectangle(tamper_meta["dst_box"], outline=(220, 30, 30), width=box_width) | |
| d_tamp.rectangle([x0, max(0, y0-20), x0+110, y0], fill=(220, 30, 30)) | |
| d_tamp.text((x0+4, max(0, y0-18)), "TAMPERED", font=_font(12), fill="white") | |
| return orig, tampered | |
| def overlay_heatmap_on_image(base_img, heat_2d, alpha=0.55, cmap="hot"): | |
| # Use the modern colormap registry (matplotlib >= 3.5), | |
| # fall back to the deprecated cm.get_cmap on older versions. | |
| try: | |
| import matplotlib as mpl | |
| cmap_fn = mpl.colormaps[cmap] | |
| except (AttributeError, KeyError): | |
| import matplotlib.cm as cm | |
| cmap_fn = cm.get_cmap(cmap) | |
| base = base_img.convert("RGBA") | |
| W, H = base.size | |
| h = heat_2d.astype(np.float32) | |
| if h.max() > 0: h = (h - h.min()) / (h.max() - h.min() + 1e-9) | |
| h_resized = cv2.resize(h, (W, H), interpolation=cv2.INTER_CUBIC) | |
| rgba = (cmap_fn(h_resized) * 255).astype(np.uint8) | |
| rgba[..., 3] = (h_resized * 255 * alpha).astype(np.uint8) | |
| overlay = Image.fromarray(rgba, mode="RGBA") | |
| return Image.alpha_composite(base, overlay).convert("RGB") | |
| def detector_scorecard(image_path): | |
| import forensics | |
| scores = {} | |
| _, ela_score = forensics.error_level_analysis(image_path) | |
| scores["ELA"] = {"score": min(ela_score / 25.0, 1.0), "raw": round(ela_score, 2), | |
| "what": "JPEG re-save artefacts"} | |
| _, cm_count, _ = forensics.copy_move_detect(image_path) | |
| scores["Copy-move (ORB)"] = {"score": min(cm_count / 50.0, 1.0), "raw": cm_count, | |
| "what": "Duplicated regions"} | |
| _, noise_ratio = forensics.noise_inconsistency(image_path) | |
| scores["Noise inconsistency"] = {"score": min(noise_ratio * 4, 1.0), | |
| "raw": round(noise_ratio, 3), | |
| "what": "Splicing / region mismatch"} | |
| exif_flags = forensics.exif_sanity(image_path) | |
| scores["EXIF metadata"] = {"score": 0.0 if exif_flags == ["exif clean"] else 0.6, | |
| "raw": "; ".join(exif_flags), | |
| "what": "Edit-tool fingerprints"} | |
| try: | |
| ml = forensics.predict_with_model(image_path) | |
| if ml is not None: | |
| scores["Random Forest"] = {"score": ml["tamper_probability"], | |
| "raw": ml["verdict"], | |
| "what": "Learned forensic-feature blend"} | |
| except Exception: | |
| pass | |
| for v in scores.values(): | |
| v["caught"] = v["score"] >= 0.4 | |
| return scores | |