Deepguard-api / metadata.py
suyash-77's picture
Upload 9 files
a02f72f verified
"""
DeepGuard — Metadata Forensics Module
Extracts EXIF data and checks for known AI software signatures.
All processing is stateless and in-memory.
"""
import io
import struct
from typing import Optional
from PIL import Image
# Known AI generation software signatures to look for in EXIF/metadata
AI_SIGNATURES = [
"DALL-E", "dall-e", "Midjourney", "midjourney",
"Stable Diffusion", "stable-diffusion", "StableDiffusion",
"Adobe Firefly", "firefly", "Sora", "sora",
"Imagen", "imagen", "Bing Image Creator",
"NightCafe", "Craiyon", "FLUX", "flux",
"Runway", "runway", "Pika", "pika",
"ComfyUI", "comfyui", "Automatic1111", "InvokeAI",
"NovelAI", "novelai", "Leonardo", "leonardo.ai",
]
def extract_metadata(image_bytes: bytes) -> dict:
"""
Perform forensic metadata analysis on raw image bytes.
Returns:
dict with keys: exif_data_present, software_signature_found, warning
"""
exif_present = False
software_found = "None"
warning = ""
try:
img = Image.open(io.BytesIO(image_bytes))
fmt = img.format or "UNKNOWN"
# --- EXIF analysis (JPEG / TIFF / WEBP) ---
exif_data = img._getexif() if hasattr(img, "_getexif") else None
if exif_data:
exif_present = True
# Tag 305 = Software, Tag 315 = Artist, Tag 270 = ImageDescription
tag_values = {
305: "Software",
315: "Artist",
270: "ImageDescription",
37510: "UserComment",
}
for tag_id, tag_name in tag_values.items():
val = exif_data.get(tag_id, "")
if isinstance(val, bytes):
try:
val = val.decode("utf-8", errors="ignore")
except Exception:
val = ""
val_str = str(val)
for sig in AI_SIGNATURES:
if sig.lower() in val_str.lower():
software_found = sig
break
if software_found != "None":
break
else:
# Try PIL's generic info dict (PNG tEXt chunks, etc.)
info = getattr(img, "info", {})
if info:
exif_present = True # Has some metadata
info_str = " ".join(str(v) for v in info.values())
for sig in AI_SIGNATURES:
if sig.lower() in info_str.lower():
software_found = sig
break
else:
exif_present = False
# Build warning message
if not exif_present:
warning = (
"EXIF data missing. This is a strong indicator of synthesized media — "
"AI generators strip or never write camera metadata."
)
elif software_found != "None":
warning = (
f"AI software signature detected: '{software_found}'. "
"This image was almost certainly generated by an AI tool."
)
else:
warning = (
"EXIF data present. Metadata appears consistent with a camera-captured image, "
"but AI-generated images can be post-processed to include fake EXIF."
)
except Exception as e:
warning = f"Metadata parsing error: {str(e)}"
return {
"exif_data_present": exif_present,
"software_signature_found": software_found,
"warning": warning,
}