| """Image/media preprocessing utilities. |
| |
| Used by A2 (MediaAuthenticityAgent) to extract EXIF metadata and basic |
| file-property signals when no Hive API key is configured. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import hashlib |
| import io |
| import logging |
| from dataclasses import dataclass, field |
| from typing import Optional |
|
|
| logger = logging.getLogger(__name__) |
|
|
|
|
| @dataclass |
| class MediaSnapshot: |
| """Lightweight summary of a media file.""" |
|
|
| sha256: str |
| size_bytes: int |
| width: Optional[int] = None |
| height: Optional[int] = None |
| format: Optional[str] = None |
| mode: Optional[str] = None |
| has_exif: bool = False |
| exif_count: int = 0 |
| exif_tags: dict[str, str] = field(default_factory=dict) |
| suspicious_flags: list[str] = field(default_factory=list) |
|
|
|
|
| class MediaProcessor: |
| """Synchronous helper; cheap enough to call inside async agents.""" |
|
|
| def analyze(self, raw: bytes, filename: str = "") -> MediaSnapshot: |
| snap = MediaSnapshot( |
| sha256=hashlib.sha256(raw).hexdigest(), |
| size_bytes=len(raw), |
| ) |
| self._extract_pillow(raw, snap) |
| self._extract_exif(raw, snap) |
| self._heuristic_flags(snap, filename) |
| return snap |
|
|
| def _extract_pillow(self, raw: bytes, snap: MediaSnapshot) -> None: |
| try: |
| from PIL import Image |
|
|
| img = Image.open(io.BytesIO(raw)) |
| img.verify() |
| img2 = Image.open(io.BytesIO(raw)) |
| snap.width, snap.height = img2.size |
| snap.format = img2.format |
| snap.mode = img2.mode |
| except Exception as e: |
| logger.debug("Pillow open failed: %s", e) |
| snap.suspicious_flags.append("pillow_open_failed") |
|
|
| def _extract_exif(self, raw: bytes, snap: MediaSnapshot) -> None: |
| try: |
| import exifread |
|
|
| tags = exifread.process_file(io.BytesIO(raw), details=False) |
| if tags: |
| snap.has_exif = True |
| snap.exif_count = len(tags) |
| |
| keep = [ |
| "Image Make", |
| "Image Model", |
| "EXIF DateTimeOriginal", |
| "GPS GPSLatitude", |
| "GPS GPSLongitude", |
| "Image Software", |
| ] |
| for k in keep: |
| if k in tags: |
| snap.exif_tags[k] = str(tags[k])[:200] |
| except Exception as e: |
| logger.debug("EXIF extraction failed: %s", e) |
|
|
| def _heuristic_flags(self, snap: MediaSnapshot, filename: str) -> None: |
| if not snap.has_exif: |
| snap.suspicious_flags.append("no_exif_metadata") |
| software = snap.exif_tags.get("Image Software", "").lower() |
| for marker in ("photoshop", "midjourney", "stable", "dall-e", "ai"): |
| if marker in software: |
| snap.suspicious_flags.append(f"editor:{marker}") |
| if snap.size_bytes < 8000: |
| snap.suspicious_flags.append("very_small_file") |
| if snap.format and snap.format.upper() == "WEBP": |
| snap.suspicious_flags.append("webp_repackaged") |
| lowered = filename.lower() |
| for marker in ("ai_", "_ai", "midjourney", "synthetic", "fake"): |
| if marker in lowered: |
| snap.suspicious_flags.append(f"filename:{marker}") |
|
|