File size: 3,582 Bytes
a02f72f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
"""
DeepGuard — Metadata Forensics Module
Extracts EXIF data and checks for known AI software signatures.
All processing is stateless and in-memory.
"""

import io
import struct
from typing import Optional
from PIL import Image

# Known AI generation software signatures to look for in EXIF/metadata
AI_SIGNATURES = [
    "DALL-E", "dall-e", "Midjourney", "midjourney",
    "Stable Diffusion", "stable-diffusion", "StableDiffusion",
    "Adobe Firefly", "firefly", "Sora", "sora",
    "Imagen", "imagen", "Bing Image Creator",
    "NightCafe", "Craiyon", "FLUX", "flux",
    "Runway", "runway", "Pika", "pika",
    "ComfyUI", "comfyui", "Automatic1111", "InvokeAI",
    "NovelAI", "novelai", "Leonardo", "leonardo.ai",
]


def extract_metadata(image_bytes: bytes) -> dict:
    """
    Perform forensic metadata analysis on raw image bytes.

    Returns:
        dict with keys: exif_data_present, software_signature_found, warning
    """
    exif_present = False
    software_found = "None"
    warning = ""

    try:
        img = Image.open(io.BytesIO(image_bytes))
        fmt = img.format or "UNKNOWN"

        # --- EXIF analysis (JPEG / TIFF / WEBP) ---
        exif_data = img._getexif() if hasattr(img, "_getexif") else None

        if exif_data:
            exif_present = True
            # Tag 305 = Software, Tag 315 = Artist, Tag 270 = ImageDescription
            tag_values = {
                305: "Software",
                315: "Artist",
                270: "ImageDescription",
                37510: "UserComment",
            }
            for tag_id, tag_name in tag_values.items():
                val = exif_data.get(tag_id, "")
                if isinstance(val, bytes):
                    try:
                        val = val.decode("utf-8", errors="ignore")
                    except Exception:
                        val = ""
                val_str = str(val)
                for sig in AI_SIGNATURES:
                    if sig.lower() in val_str.lower():
                        software_found = sig
                        break
                if software_found != "None":
                    break
        else:
            # Try PIL's generic info dict (PNG tEXt chunks, etc.)
            info = getattr(img, "info", {})
            if info:
                exif_present = True  # Has some metadata
                info_str = " ".join(str(v) for v in info.values())
                for sig in AI_SIGNATURES:
                    if sig.lower() in info_str.lower():
                        software_found = sig
                        break
            else:
                exif_present = False

        # Build warning message
        if not exif_present:
            warning = (
                "EXIF data missing. This is a strong indicator of synthesized media — "
                "AI generators strip or never write camera metadata."
            )
        elif software_found != "None":
            warning = (
                f"AI software signature detected: '{software_found}'. "
                "This image was almost certainly generated by an AI tool."
            )
        else:
            warning = (
                "EXIF data present. Metadata appears consistent with a camera-captured image, "
                "but AI-generated images can be post-processed to include fake EXIF."
            )

    except Exception as e:
        warning = f"Metadata parsing error: {str(e)}"

    return {
        "exif_data_present": exif_present,
        "software_signature_found": software_found,
        "warning": warning,
    }