Spaces:
Sleeping
Sleeping
| """ | |
| DeepGuard — Metadata Forensics Module | |
| Extracts EXIF data and checks for known AI software signatures. | |
| All processing is stateless and in-memory. | |
| """ | |
| import io | |
| import struct | |
| from typing import Optional | |
| from PIL import Image | |
| # Known AI generation software signatures to look for in EXIF/metadata | |
| AI_SIGNATURES = [ | |
| "DALL-E", "dall-e", "Midjourney", "midjourney", | |
| "Stable Diffusion", "stable-diffusion", "StableDiffusion", | |
| "Adobe Firefly", "firefly", "Sora", "sora", | |
| "Imagen", "imagen", "Bing Image Creator", | |
| "NightCafe", "Craiyon", "FLUX", "flux", | |
| "Runway", "runway", "Pika", "pika", | |
| "ComfyUI", "comfyui", "Automatic1111", "InvokeAI", | |
| "NovelAI", "novelai", "Leonardo", "leonardo.ai", | |
| ] | |
| def extract_metadata(image_bytes: bytes) -> dict: | |
| """ | |
| Perform forensic metadata analysis on raw image bytes. | |
| Returns: | |
| dict with keys: exif_data_present, software_signature_found, warning | |
| """ | |
| exif_present = False | |
| software_found = "None" | |
| warning = "" | |
| try: | |
| img = Image.open(io.BytesIO(image_bytes)) | |
| fmt = img.format or "UNKNOWN" | |
| # --- EXIF analysis (JPEG / TIFF / WEBP) --- | |
| exif_data = img._getexif() if hasattr(img, "_getexif") else None | |
| if exif_data: | |
| exif_present = True | |
| # Tag 305 = Software, Tag 315 = Artist, Tag 270 = ImageDescription | |
| tag_values = { | |
| 305: "Software", | |
| 315: "Artist", | |
| 270: "ImageDescription", | |
| 37510: "UserComment", | |
| } | |
| for tag_id, tag_name in tag_values.items(): | |
| val = exif_data.get(tag_id, "") | |
| if isinstance(val, bytes): | |
| try: | |
| val = val.decode("utf-8", errors="ignore") | |
| except Exception: | |
| val = "" | |
| val_str = str(val) | |
| for sig in AI_SIGNATURES: | |
| if sig.lower() in val_str.lower(): | |
| software_found = sig | |
| break | |
| if software_found != "None": | |
| break | |
| else: | |
| # Try PIL's generic info dict (PNG tEXt chunks, etc.) | |
| info = getattr(img, "info", {}) | |
| if info: | |
| exif_present = True # Has some metadata | |
| info_str = " ".join(str(v) for v in info.values()) | |
| for sig in AI_SIGNATURES: | |
| if sig.lower() in info_str.lower(): | |
| software_found = sig | |
| break | |
| else: | |
| exif_present = False | |
| # Build warning message | |
| if not exif_present: | |
| warning = ( | |
| "EXIF data missing. This is a strong indicator of synthesized media — " | |
| "AI generators strip or never write camera metadata." | |
| ) | |
| elif software_found != "None": | |
| warning = ( | |
| f"AI software signature detected: '{software_found}'. " | |
| "This image was almost certainly generated by an AI tool." | |
| ) | |
| else: | |
| warning = ( | |
| "EXIF data present. Metadata appears consistent with a camera-captured image, " | |
| "but AI-generated images can be post-processed to include fake EXIF." | |
| ) | |
| except Exception as e: | |
| warning = f"Metadata parsing error: {str(e)}" | |
| return { | |
| "exif_data_present": exif_present, | |
| "software_signature_found": software_found, | |
| "warning": warning, | |
| } | |