""" DeepGuard — Metadata Forensics Module Extracts EXIF data and checks for known AI software signatures. All processing is stateless and in-memory. """ import io import struct from typing import Optional from PIL import Image # Known AI generation software signatures to look for in EXIF/metadata AI_SIGNATURES = [ "DALL-E", "dall-e", "Midjourney", "midjourney", "Stable Diffusion", "stable-diffusion", "StableDiffusion", "Adobe Firefly", "firefly", "Sora", "sora", "Imagen", "imagen", "Bing Image Creator", "NightCafe", "Craiyon", "FLUX", "flux", "Runway", "runway", "Pika", "pika", "ComfyUI", "comfyui", "Automatic1111", "InvokeAI", "NovelAI", "novelai", "Leonardo", "leonardo.ai", ] def extract_metadata(image_bytes: bytes) -> dict: """ Perform forensic metadata analysis on raw image bytes. Returns: dict with keys: exif_data_present, software_signature_found, warning """ exif_present = False software_found = "None" warning = "" try: img = Image.open(io.BytesIO(image_bytes)) fmt = img.format or "UNKNOWN" # --- EXIF analysis (JPEG / TIFF / WEBP) --- exif_data = img._getexif() if hasattr(img, "_getexif") else None if exif_data: exif_present = True # Tag 305 = Software, Tag 315 = Artist, Tag 270 = ImageDescription tag_values = { 305: "Software", 315: "Artist", 270: "ImageDescription", 37510: "UserComment", } for tag_id, tag_name in tag_values.items(): val = exif_data.get(tag_id, "") if isinstance(val, bytes): try: val = val.decode("utf-8", errors="ignore") except Exception: val = "" val_str = str(val) for sig in AI_SIGNATURES: if sig.lower() in val_str.lower(): software_found = sig break if software_found != "None": break else: # Try PIL's generic info dict (PNG tEXt chunks, etc.) info = getattr(img, "info", {}) if info: exif_present = True # Has some metadata info_str = " ".join(str(v) for v in info.values()) for sig in AI_SIGNATURES: if sig.lower() in info_str.lower(): software_found = sig break else: exif_present = False # Build warning message if not exif_present: warning = ( "EXIF data missing. This is a strong indicator of synthesized media — " "AI generators strip or never write camera metadata." ) elif software_found != "None": warning = ( f"AI software signature detected: '{software_found}'. " "This image was almost certainly generated by an AI tool." ) else: warning = ( "EXIF data present. Metadata appears consistent with a camera-captured image, " "but AI-generated images can be post-processed to include fake EXIF." ) except Exception as e: warning = f"Metadata parsing error: {str(e)}" return { "exif_data_present": exif_present, "software_signature_found": software_found, "warning": warning, }