Spaces:
Running
Running
| """Image/media preprocessing utilities. | |
| Used by A2 (MediaAuthenticityAgent) to extract EXIF metadata and basic | |
| file-property signals when no Hive API key is configured. | |
| """ | |
| from __future__ import annotations | |
| import hashlib | |
| import io | |
| import logging | |
| from dataclasses import dataclass, field | |
| from typing import Optional | |
| logger = logging.getLogger(__name__) | |
| class MediaSnapshot: | |
| """Lightweight summary of a media file.""" | |
| sha256: str | |
| size_bytes: int | |
| width: Optional[int] = None | |
| height: Optional[int] = None | |
| format: Optional[str] = None | |
| mode: Optional[str] = None | |
| has_exif: bool = False | |
| exif_count: int = 0 | |
| exif_tags: dict[str, str] = field(default_factory=dict) | |
| suspicious_flags: list[str] = field(default_factory=list) | |
| class MediaProcessor: | |
| """Synchronous helper; cheap enough to call inside async agents.""" | |
| def analyze(self, raw: bytes, filename: str = "") -> MediaSnapshot: | |
| snap = MediaSnapshot( | |
| sha256=hashlib.sha256(raw).hexdigest(), | |
| size_bytes=len(raw), | |
| ) | |
| self._extract_pillow(raw, snap) | |
| self._extract_exif(raw, snap) | |
| self._heuristic_flags(snap, filename) | |
| return snap | |
| def _extract_pillow(self, raw: bytes, snap: MediaSnapshot) -> None: | |
| try: | |
| from PIL import Image | |
| img = Image.open(io.BytesIO(raw)) | |
| img.verify() | |
| img2 = Image.open(io.BytesIO(raw)) | |
| snap.width, snap.height = img2.size | |
| snap.format = img2.format | |
| snap.mode = img2.mode | |
| except Exception as e: | |
| logger.debug("Pillow open failed: %s", e) | |
| snap.suspicious_flags.append("pillow_open_failed") | |
| def _extract_exif(self, raw: bytes, snap: MediaSnapshot) -> None: | |
| try: | |
| import exifread | |
| tags = exifread.process_file(io.BytesIO(raw), details=False) | |
| if tags: | |
| snap.has_exif = True | |
| snap.exif_count = len(tags) | |
| # keep a small selection of high-signal tags | |
| keep = [ | |
| "Image Make", | |
| "Image Model", | |
| "EXIF DateTimeOriginal", | |
| "GPS GPSLatitude", | |
| "GPS GPSLongitude", | |
| "Image Software", | |
| ] | |
| for k in keep: | |
| if k in tags: | |
| snap.exif_tags[k] = str(tags[k])[:200] | |
| except Exception as e: | |
| logger.debug("EXIF extraction failed: %s", e) | |
| def _heuristic_flags(self, snap: MediaSnapshot, filename: str) -> None: | |
| if not snap.has_exif: | |
| snap.suspicious_flags.append("no_exif_metadata") | |
| software = snap.exif_tags.get("Image Software", "").lower() | |
| for marker in ("photoshop", "midjourney", "stable", "dall-e", "ai"): | |
| if marker in software: | |
| snap.suspicious_flags.append(f"editor:{marker}") | |
| if snap.size_bytes < 8000: | |
| snap.suspicious_flags.append("very_small_file") | |
| if snap.format and snap.format.upper() == "WEBP": | |
| snap.suspicious_flags.append("webp_repackaged") | |
| lowered = filename.lower() | |
| for marker in ("ai_", "_ai", "midjourney", "synthetic", "fake"): | |
| if marker in lowered: | |
| snap.suspicious_flags.append(f"filename:{marker}") | |