feat: enhance detector agent with fast screening
Browse files- README.md +2 -5
- app.py +4 -4
- detector_agent.py +510 -12
- requirements.txt +2 -0
README.md
CHANGED
|
@@ -20,9 +20,10 @@ This Space now provides a LangGraph-based AI-image detection agent:
|
|
| 20 |
3. The tool layer combines:
|
| 21 |
- common-sense consistency probe (agent-callable tool)
|
| 22 |
- image metadata inspection (via `exiftool`)
|
|
|
|
| 23 |
- low-level forensic heuristics
|
| 24 |
- external vision specialist probe with auto multi-region discovery and zoomed crop analysis
|
| 25 |
-
|
| 26 |
- `classification`: `Real` / `Fake`
|
| 27 |
- `confidence`: `0-100`
|
| 28 |
|
|
@@ -34,10 +35,6 @@ Set the following in your Space settings:
|
|
| 34 |
- `OPENAI_MODEL` (optional, default: `gpt-5-mini`)
|
| 35 |
- `OPENAI_BASE_URL` (optional, for OpenAI-compatible third-party services)
|
| 36 |
- `APP_API_TOKEN` (optional, used to protect API endpoint calls)
|
| 37 |
-
- `DETECTOR_CACHE_ENABLED` (optional, default: `1`)
|
| 38 |
-
- `DETECTOR_CACHE_TTL_SECONDS` (optional, default: `21600`)
|
| 39 |
-
- `DETECTOR_CACHE_DIR` (optional, default: `/tmp/aifo_detector_cache`)
|
| 40 |
-
- `DETECTOR_GRAPH_RECURSION_LIMIT` (optional, default: `24`)
|
| 41 |
|
| 42 |
## UI Features
|
| 43 |
|
|
|
|
| 20 |
3. The tool layer combines:
|
| 21 |
- common-sense consistency probe (agent-callable tool)
|
| 22 |
- image metadata inspection (via `exiftool`)
|
| 23 |
+
- two pretrained Hugging Face AI-image detectors
|
| 24 |
- low-level forensic heuristics
|
| 25 |
- external vision specialist probe with auto multi-region discovery and zoomed crop analysis
|
| 26 |
+
5. The agent synthesizes the evidence and returns:
|
| 27 |
- `classification`: `Real` / `Fake`
|
| 28 |
- `confidence`: `0-100`
|
| 29 |
|
|
|
|
| 35 |
- `OPENAI_MODEL` (optional, default: `gpt-5-mini`)
|
| 36 |
- `OPENAI_BASE_URL` (optional, for OpenAI-compatible third-party services)
|
| 37 |
- `APP_API_TOKEN` (optional, used to protect API endpoint calls)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
## UI Features
|
| 40 |
|
app.py
CHANGED
|
@@ -62,9 +62,6 @@ Before running, configure these Hugging Face Space Secrets:
|
|
| 62 |
- `OPENAI_MODEL` (optional, default: `gpt-5-mini`)
|
| 63 |
- `OPENAI_BASE_URL` (optional, for compatible third-party endpoints)
|
| 64 |
- `APP_API_TOKEN` (optional, protect API endpoint calls)
|
| 65 |
-
- `DETECTOR_CACHE_ENABLED` (optional, default: `1`)
|
| 66 |
-
- `DETECTOR_CACHE_TTL_SECONDS` (optional, default: `21600`)
|
| 67 |
-
- `DETECTOR_CACHE_DIR` (optional, default: `/tmp/aifo_detector_cache`)
|
| 68 |
"""
|
| 69 |
)
|
| 70 |
|
|
@@ -80,7 +77,10 @@ Before running, configure these Hugging Face Space Secrets:
|
|
| 80 |
run_btn.click(
|
| 81 |
fn=analyze_image_ui,
|
| 82 |
inputs=[image_input],
|
| 83 |
-
outputs=[
|
|
|
|
|
|
|
|
|
|
| 84 |
api_name="analyze_ui",
|
| 85 |
)
|
| 86 |
|
|
|
|
| 62 |
- `OPENAI_MODEL` (optional, default: `gpt-5-mini`)
|
| 63 |
- `OPENAI_BASE_URL` (optional, for compatible third-party endpoints)
|
| 64 |
- `APP_API_TOKEN` (optional, protect API endpoint calls)
|
|
|
|
|
|
|
|
|
|
| 65 |
"""
|
| 66 |
)
|
| 67 |
|
|
|
|
| 77 |
run_btn.click(
|
| 78 |
fn=analyze_image_ui,
|
| 79 |
inputs=[image_input],
|
| 80 |
+
outputs=[
|
| 81 |
+
classification_out,
|
| 82 |
+
confidence_out,
|
| 83 |
+
],
|
| 84 |
api_name="analyze_ui",
|
| 85 |
)
|
| 86 |
|
detector_agent.py
CHANGED
|
@@ -7,6 +7,7 @@ import shutil
|
|
| 7 |
import subprocess
|
| 8 |
import tempfile
|
| 9 |
import time
|
|
|
|
| 10 |
from pathlib import Path
|
| 11 |
from typing import Any, Literal
|
| 12 |
|
|
@@ -20,6 +21,17 @@ from openai import OpenAI
|
|
| 20 |
from PIL import ExifTags, Image, ImageChops, ImageStat
|
| 21 |
from pydantic import BaseModel, Field, field_validator
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
AGENT_SYSTEM_PROMPT = """
|
| 25 |
You are an image-forensics coordinator.
|
|
@@ -28,7 +40,7 @@ You do not see the image directly. Your job is to decide which tools to call.
|
|
| 28 |
Rules:
|
| 29 |
- Prefer gathering evidence before reaching a conclusion.
|
| 30 |
- Prefer calling the common-sense consistency tool early when image logic/plausibility matters.
|
| 31 |
-
- Usually call inspect_image_metadata
|
| 32 |
- For vision probe calls, prefer multi-region inspection to cover diverse clues (faces, text, boundaries, limbs, animals, reflections).
|
| 33 |
- Stop calling tools once you have enough evidence. Do not provide the final verdict yourself.
|
| 34 |
- Keep the process efficient. Usually 2-5 tool calls are enough.
|
|
@@ -39,13 +51,47 @@ SYNTHESIS_SYSTEM_PROMPT = """
|
|
| 39 |
You are a senior image-forensics judge.
|
| 40 |
Review the collected tool evidence and return a final verdict.
|
| 41 |
|
| 42 |
-
Output requirements:
|
| 43 |
- classification must be Real or Fake
|
| 44 |
- confidence must be an integer between 0 and 100
|
| 45 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
-
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
""".strip()
|
| 50 |
|
| 51 |
|
|
@@ -185,9 +231,21 @@ AI_METADATA_KEYWORDS = [
|
|
| 185 |
]
|
| 186 |
|
| 187 |
|
| 188 |
-
CACHE_SCHEMA_VERSION = "
|
| 189 |
CACHE_DEFAULT_TTL_SECONDS = 6 * 60 * 60
|
| 190 |
GRAPH_RECURSION_LIMIT_DEFAULT = 24
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
|
| 192 |
|
| 193 |
def _normalize_final_classification(value: Any) -> str:
|
|
@@ -209,9 +267,22 @@ def _normalize_final_confidence(value: Any) -> int:
|
|
| 209 |
return max(0, min(100, numeric))
|
| 210 |
|
| 211 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
class DetectorVerdict(BaseModel):
|
| 213 |
classification: Literal["Real", "Fake"]
|
| 214 |
confidence: int = Field(ge=0, le=100)
|
|
|
|
| 215 |
|
| 216 |
@field_validator("classification", mode="before")
|
| 217 |
@classmethod
|
|
@@ -223,6 +294,14 @@ class DetectorVerdict(BaseModel):
|
|
| 223 |
def normalize_confidence(cls, value: Any) -> int:
|
| 224 |
return _normalize_final_confidence(value)
|
| 225 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
|
| 227 |
def _build_openai_client() -> OpenAI:
|
| 228 |
api_key = os.getenv("OPENAI_API_KEY")
|
|
@@ -344,6 +423,7 @@ def _build_detector_cache_key(image: Image.Image, image_path: str | None) -> str
|
|
| 344 |
[
|
| 345 |
AGENT_SYSTEM_PROMPT,
|
| 346 |
SYNTHESIS_SYSTEM_PROMPT,
|
|
|
|
| 347 |
VISION_TOOL_PROMPT,
|
| 348 |
VISION_REGION_DISCOVERY_PROMPT,
|
| 349 |
COMMON_SENSE_SYSTEM_PROMPT,
|
|
@@ -356,6 +436,9 @@ def _build_detector_cache_key(image: Image.Image, image_path: str | None) -> str
|
|
| 356 |
"image_digest": image_digest,
|
| 357 |
"model": model,
|
| 358 |
"base_url": base_url,
|
|
|
|
|
|
|
|
|
|
| 359 |
"prompt_fingerprint": prompt_fingerprint,
|
| 360 |
}
|
| 361 |
raw = json.dumps(payload, ensure_ascii=False, sort_keys=True)
|
|
@@ -386,9 +469,26 @@ def _load_cached_result(cache_key: str, ttl_seconds: int) -> dict[str, Any] | No
|
|
| 386 |
except Exception:
|
| 387 |
pass
|
| 388 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 389 |
return {
|
| 390 |
-
"classification":
|
| 391 |
-
"confidence":
|
|
|
|
| 392 |
}
|
| 393 |
|
| 394 |
|
|
@@ -836,6 +936,333 @@ def _to_float(value: Any, default: float) -> float:
|
|
| 836 |
return default
|
| 837 |
|
| 838 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 839 |
def _normalize_discovered_regions(payload: dict[str, Any], max_regions: int) -> list[dict[str, Any]]:
|
| 840 |
raw_regions = payload.get("regions", [])
|
| 841 |
if not isinstance(raw_regions, list):
|
|
@@ -1095,7 +1522,11 @@ def _messages_to_text(messages: list[Any]) -> str:
|
|
| 1095 |
return "\n\n".join(lines)
|
| 1096 |
|
| 1097 |
|
| 1098 |
-
def _build_tools(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1099 |
@tool
|
| 1100 |
def common_sense_consistency_probe() -> str:
|
| 1101 |
"""Analyze full-image common-sense consistency (anatomy, text, physics, geometry, lighting, reflections)."""
|
|
@@ -1106,6 +1537,13 @@ def _build_tools(image: Image.Image, image_path: str | None = None) -> list[Any]
|
|
| 1106 |
"""Use exiftool to extract selected metadata tags and summarize forensic clues."""
|
| 1107 |
return _json_dumps(_metadata_report(image, image_path=image_path))
|
| 1108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1109 |
@tool
|
| 1110 |
def run_low_level_forensics() -> str:
|
| 1111 |
"""Run low-level image heuristics such as texture smoothness, saturation, clipping, and ELA artifacts."""
|
|
@@ -1126,13 +1564,22 @@ def _build_tools(image: Image.Image, image_path: str | None = None) -> list[Any]
|
|
| 1126 |
return [
|
| 1127 |
common_sense_consistency_probe,
|
| 1128 |
inspect_image_metadata,
|
|
|
|
| 1129 |
run_low_level_forensics,
|
| 1130 |
vision_specialist_probe,
|
| 1131 |
]
|
| 1132 |
|
| 1133 |
|
| 1134 |
-
def _build_graph(
|
| 1135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1136 |
llm = _build_langchain_model()
|
| 1137 |
llm_with_tools = llm.bind_tools(tools)
|
| 1138 |
tool_node = ToolNode(tools, handle_tool_errors=True)
|
|
@@ -1230,7 +1677,54 @@ def run_detector_agent(
|
|
| 1230 |
if cached is not None:
|
| 1231 |
return dict(cached)
|
| 1232 |
|
| 1233 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1234 |
initial_message = HumanMessage(
|
| 1235 |
content=(
|
| 1236 |
"Analyze the uploaded image with the available tools. "
|
|
@@ -1244,6 +1738,10 @@ def run_detector_agent(
|
|
| 1244 |
result = {
|
| 1245 |
"classification": _normalize_final_classification(verdict.classification),
|
| 1246 |
"confidence": _normalize_final_confidence(verdict.confidence),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1247 |
}
|
| 1248 |
if cache_enabled and cache_key:
|
| 1249 |
try:
|
|
|
|
| 7 |
import subprocess
|
| 8 |
import tempfile
|
| 9 |
import time
|
| 10 |
+
from functools import lru_cache
|
| 11 |
from pathlib import Path
|
| 12 |
from typing import Any, Literal
|
| 13 |
|
|
|
|
| 21 |
from PIL import ExifTags, Image, ImageChops, ImageStat
|
| 22 |
from pydantic import BaseModel, Field, field_validator
|
| 23 |
|
| 24 |
+
try:
|
| 25 |
+
import torch
|
| 26 |
+
from transformers import AutoImageProcessor, AutoModelForImageClassification
|
| 27 |
+
|
| 28 |
+
HF_DETECTOR_RUNTIME_AVAILABLE = True
|
| 29 |
+
except Exception:
|
| 30 |
+
torch = None
|
| 31 |
+
AutoImageProcessor = None
|
| 32 |
+
AutoModelForImageClassification = None
|
| 33 |
+
HF_DETECTOR_RUNTIME_AVAILABLE = False
|
| 34 |
+
|
| 35 |
|
| 36 |
AGENT_SYSTEM_PROMPT = """
|
| 37 |
You are an image-forensics coordinator.
|
|
|
|
| 40 |
Rules:
|
| 41 |
- Prefer gathering evidence before reaching a conclusion.
|
| 42 |
- Prefer calling the common-sense consistency tool early when image logic/plausibility matters.
|
| 43 |
+
- Usually call inspect_image_metadata and run_pretrained_hf_detectors early, then at least one deterministic local forensic tool and one vision probe.
|
| 44 |
- For vision probe calls, prefer multi-region inspection to cover diverse clues (faces, text, boundaries, limbs, animals, reflections).
|
| 45 |
- Stop calling tools once you have enough evidence. Do not provide the final verdict yourself.
|
| 46 |
- Keep the process efficient. Usually 2-5 tool calls are enough.
|
|
|
|
| 51 |
You are a senior image-forensics judge.
|
| 52 |
Review the collected tool evidence and return a final verdict.
|
| 53 |
|
| 54 |
+
Output requirements (JSON only):
|
| 55 |
- classification must be Real or Fake
|
| 56 |
- confidence must be an integer between 0 and 100
|
| 57 |
+
- summary must be one concise paragraph that combines:
|
| 58 |
+
1) the final analysis,
|
| 59 |
+
2) evidence basis for the verdict,
|
| 60 |
+
3) image-specific forensic reasons
|
| 61 |
+
|
| 62 |
+
Evidence weighting guidance:
|
| 63 |
+
- Treat pretrained detector outputs as auxiliary signals.
|
| 64 |
+
- If detector signals conflict with visual/metadata evidence, lower confidence and reflect uncertainty.
|
| 65 |
+
|
| 66 |
+
Fake means likely AI-generated or not a real photo.
|
| 67 |
+
Real means likely a real photo.
|
| 68 |
+
""".strip()
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
FAST_SCREENING_SYSTEM_PROMPT = """
|
| 72 |
+
You are a fast image-authenticity screener.
|
| 73 |
+
Task:
|
| 74 |
+
- Determine whether the image is likely a real photo or likely AI-generated synthetic imagery.
|
| 75 |
+
- Use only visible evidence in the image.
|
| 76 |
+
|
| 77 |
+
Return strict JSON only:
|
| 78 |
+
{
|
| 79 |
+
"assessment": "REAL|AI_GENERATED|UNSURE",
|
| 80 |
+
"confidence": 0-100,
|
| 81 |
+
"summary": "one concise paragraph"
|
| 82 |
+
}
|
| 83 |
|
| 84 |
+
Definitions:
|
| 85 |
+
- REAL: likely captured by a real camera from the physical world (normal edits/compression are allowed).
|
| 86 |
+
- AI_GENERATED: likely primarily synthesized by a generative model.
|
| 87 |
+
|
| 88 |
+
Decision policy:
|
| 89 |
+
- Prefer speed over exhaustive analysis.
|
| 90 |
+
- Use strong visual cues only: anatomy coherence, text fidelity, geometry/perspective, reflections/shadows,
|
| 91 |
+
object boundaries, repeated texture patterns, and local artifacts.
|
| 92 |
+
- Output REAL or AI_GENERATED only if evidence is clearly one-sided.
|
| 93 |
+
- Output UNSURE when evidence is mixed/weak/insufficient.
|
| 94 |
+
- summary must include main cues and uncertainty in one short paragraph.
|
| 95 |
""".strip()
|
| 96 |
|
| 97 |
|
|
|
|
| 231 |
]
|
| 232 |
|
| 233 |
|
| 234 |
+
CACHE_SCHEMA_VERSION = "detector_cache_v5"
|
| 235 |
CACHE_DEFAULT_TTL_SECONDS = 6 * 60 * 60
|
| 236 |
GRAPH_RECURSION_LIMIT_DEFAULT = 24
|
| 237 |
+
FAST_SCREENING_ENABLED = True
|
| 238 |
+
FAST_SCREENING_CONFIDENCE_THRESHOLD = 80
|
| 239 |
+
HF_PRETRAINED_DETECTOR_MODELS = [
|
| 240 |
+
{
|
| 241 |
+
"name": "organika_sdxl_detector",
|
| 242 |
+
"repo_id": "Organika/sdxl-detector",
|
| 243 |
+
},
|
| 244 |
+
{
|
| 245 |
+
"name": "haywoodsloan_ai_image_detector_dev_deploy",
|
| 246 |
+
"repo_id": "haywoodsloan/ai-image-detector-dev-deploy",
|
| 247 |
+
},
|
| 248 |
+
]
|
| 249 |
|
| 250 |
|
| 251 |
def _normalize_final_classification(value: Any) -> str:
|
|
|
|
| 267 |
return max(0, min(100, numeric))
|
| 268 |
|
| 269 |
|
| 270 |
+
def _normalize_non_empty_text(value: Any, fallback: str) -> str:
|
| 271 |
+
text = str(value or "").strip()
|
| 272 |
+
if not text:
|
| 273 |
+
return fallback
|
| 274 |
+
return text
|
| 275 |
+
|
| 276 |
+
|
| 277 |
+
def _is_real_like_label(label: str) -> bool:
|
| 278 |
+
normalized = label.strip().lower()
|
| 279 |
+
return normalized in {"real", "authentic", "natural", "photo", "photograph", "camera"}
|
| 280 |
+
|
| 281 |
+
|
| 282 |
class DetectorVerdict(BaseModel):
|
| 283 |
classification: Literal["Real", "Fake"]
|
| 284 |
confidence: int = Field(ge=0, le=100)
|
| 285 |
+
summary: str = ""
|
| 286 |
|
| 287 |
@field_validator("classification", mode="before")
|
| 288 |
@classmethod
|
|
|
|
| 294 |
def normalize_confidence(cls, value: Any) -> int:
|
| 295 |
return _normalize_final_confidence(value)
|
| 296 |
|
| 297 |
+
@field_validator("summary", mode="before")
|
| 298 |
+
@classmethod
|
| 299 |
+
def normalize_summary(cls, value: Any) -> str:
|
| 300 |
+
return _normalize_non_empty_text(
|
| 301 |
+
value,
|
| 302 |
+
"Insufficient evidence for a detailed summary.",
|
| 303 |
+
)
|
| 304 |
+
|
| 305 |
|
| 306 |
def _build_openai_client() -> OpenAI:
|
| 307 |
api_key = os.getenv("OPENAI_API_KEY")
|
|
|
|
| 423 |
[
|
| 424 |
AGENT_SYSTEM_PROMPT,
|
| 425 |
SYNTHESIS_SYSTEM_PROMPT,
|
| 426 |
+
FAST_SCREENING_SYSTEM_PROMPT,
|
| 427 |
VISION_TOOL_PROMPT,
|
| 428 |
VISION_REGION_DISCOVERY_PROMPT,
|
| 429 |
COMMON_SENSE_SYSTEM_PROMPT,
|
|
|
|
| 436 |
"image_digest": image_digest,
|
| 437 |
"model": model,
|
| 438 |
"base_url": base_url,
|
| 439 |
+
"fast_screening_enabled": FAST_SCREENING_ENABLED,
|
| 440 |
+
"fast_screening_confidence_threshold": FAST_SCREENING_CONFIDENCE_THRESHOLD,
|
| 441 |
+
"hf_pretrained_detector_repos": [item["repo_id"] for item in HF_PRETRAINED_DETECTOR_MODELS],
|
| 442 |
"prompt_fingerprint": prompt_fingerprint,
|
| 443 |
}
|
| 444 |
raw = json.dumps(payload, ensure_ascii=False, sort_keys=True)
|
|
|
|
| 469 |
except Exception:
|
| 470 |
pass
|
| 471 |
return None
|
| 472 |
+
classification = _normalize_final_classification(result.get("classification"))
|
| 473 |
+
confidence = _normalize_final_confidence(result.get("confidence"))
|
| 474 |
+
summary_value = result.get("summary")
|
| 475 |
+
if not str(summary_value or "").strip():
|
| 476 |
+
legacy_parts = [
|
| 477 |
+
str(result.get("final_analysis") or "").strip(),
|
| 478 |
+
str(result.get("judgment_basis") or "").strip(),
|
| 479 |
+
str(result.get("image_analysis_reason") or "").strip(),
|
| 480 |
+
]
|
| 481 |
+
legacy_parts = [part for part in legacy_parts if part]
|
| 482 |
+
if legacy_parts:
|
| 483 |
+
summary_value = " ".join(legacy_parts)
|
| 484 |
+
summary = _normalize_non_empty_text(
|
| 485 |
+
summary_value,
|
| 486 |
+
f"Cached verdict: {classification} (confidence={confidence}).",
|
| 487 |
+
)
|
| 488 |
return {
|
| 489 |
+
"classification": classification,
|
| 490 |
+
"confidence": confidence,
|
| 491 |
+
"summary": summary,
|
| 492 |
}
|
| 493 |
|
| 494 |
|
|
|
|
| 936 |
return default
|
| 937 |
|
| 938 |
|
| 939 |
+
def _normalize_fast_screening_result(data: dict[str, Any]) -> dict[str, Any]:
|
| 940 |
+
assessment = str(data.get("assessment", "UNSURE")).upper()
|
| 941 |
+
if assessment not in {"REAL", "AI_GENERATED", "UNSURE"}:
|
| 942 |
+
assessment = "UNSURE"
|
| 943 |
+
|
| 944 |
+
confidence = _normalize_final_confidence(data.get("confidence"))
|
| 945 |
+
summary = _normalize_non_empty_text(
|
| 946 |
+
data.get("summary"),
|
| 947 |
+
"Fast screening was inconclusive.",
|
| 948 |
+
)
|
| 949 |
+
return {
|
| 950 |
+
"assessment": assessment,
|
| 951 |
+
"confidence": confidence,
|
| 952 |
+
"summary": summary,
|
| 953 |
+
}
|
| 954 |
+
|
| 955 |
+
|
| 956 |
+
def _fast_screening_detector_signal_text(pretrained_signal: dict[str, Any] | None) -> str:
|
| 957 |
+
if not isinstance(pretrained_signal, dict):
|
| 958 |
+
return "No pretrained detector signal available."
|
| 959 |
+
|
| 960 |
+
aggregate = pretrained_signal.get("aggregate", {})
|
| 961 |
+
if not isinstance(aggregate, dict):
|
| 962 |
+
aggregate = {}
|
| 963 |
+
|
| 964 |
+
lines: list[str] = [
|
| 965 |
+
"Pretrained detector auxiliary signals (label 'artificial' => AI-generated):",
|
| 966 |
+
(
|
| 967 |
+
f"- aggregate.overall_hint={aggregate.get('overall_hint', 'UNCERTAIN')}, "
|
| 968 |
+
f"aggregate.confidence={aggregate.get('confidence', 0)}, "
|
| 969 |
+
f"aggregate.mean_artificial_probability={aggregate.get('mean_artificial_probability', None)}"
|
| 970 |
+
),
|
| 971 |
+
]
|
| 972 |
+
|
| 973 |
+
detectors = pretrained_signal.get("detectors", [])
|
| 974 |
+
if isinstance(detectors, list):
|
| 975 |
+
for item in detectors[:4]:
|
| 976 |
+
if not isinstance(item, dict):
|
| 977 |
+
continue
|
| 978 |
+
lines.append(
|
| 979 |
+
(
|
| 980 |
+
f"- {item.get('name', 'unknown')}: "
|
| 981 |
+
f"label={item.get('predicted_label', 'N/A')}, "
|
| 982 |
+
f"confidence={item.get('predicted_confidence', 'N/A')}, "
|
| 983 |
+
f"artificial_probability={item.get('artificial_probability', 'N/A')}, "
|
| 984 |
+
f"signal={item.get('signal', 'UNCERTAIN')}"
|
| 985 |
+
)
|
| 986 |
+
)
|
| 987 |
+
|
| 988 |
+
failures = pretrained_signal.get("load_failures", [])
|
| 989 |
+
if isinstance(failures, list) and failures:
|
| 990 |
+
lines.append(f"- load_failures={len(failures)}")
|
| 991 |
+
return "\n".join(lines)
|
| 992 |
+
|
| 993 |
+
|
| 994 |
+
def _run_fast_screening(
|
| 995 |
+
image: Image.Image,
|
| 996 |
+
pretrained_signal: dict[str, Any] | None = None,
|
| 997 |
+
) -> dict[str, Any]:
|
| 998 |
+
client = _build_openai_client()
|
| 999 |
+
model = os.getenv("OPENAI_MODEL", "gpt-5-mini")
|
| 1000 |
+
data_url = _image_to_data_url(image)
|
| 1001 |
+
detector_signal_text = _fast_screening_detector_signal_text(pretrained_signal)
|
| 1002 |
+
|
| 1003 |
+
response = client.responses.create(
|
| 1004 |
+
model=model,
|
| 1005 |
+
input=[
|
| 1006 |
+
{"role": "system", "content": [{"type": "input_text", "text": FAST_SCREENING_SYSTEM_PROMPT}]},
|
| 1007 |
+
{
|
| 1008 |
+
"role": "user",
|
| 1009 |
+
"content": [
|
| 1010 |
+
{
|
| 1011 |
+
"type": "input_text",
|
| 1012 |
+
"text": (
|
| 1013 |
+
"Classify image authenticity in one quick pass: REAL vs AI_GENERATED vs UNSURE. "
|
| 1014 |
+
"This is a real-photo-vs-AI-generated decision task.\n"
|
| 1015 |
+
"Use pretrained detector signals below as auxiliary evidence (not as absolute truth):\n"
|
| 1016 |
+
f"{detector_signal_text}\n"
|
| 1017 |
+
"Return strict JSON only."
|
| 1018 |
+
),
|
| 1019 |
+
},
|
| 1020 |
+
{"type": "input_image", "image_url": data_url},
|
| 1021 |
+
],
|
| 1022 |
+
},
|
| 1023 |
+
],
|
| 1024 |
+
)
|
| 1025 |
+
|
| 1026 |
+
raw_text = _extract_output_text(response)
|
| 1027 |
+
parsed = _safe_parse_json(raw_text)
|
| 1028 |
+
if parsed is None:
|
| 1029 |
+
return {
|
| 1030 |
+
"assessment": "UNSURE",
|
| 1031 |
+
"confidence": 35,
|
| 1032 |
+
"summary": "Fast screening returned non-JSON output; escalate to full forensic workflow.",
|
| 1033 |
+
"error": raw_text or "(empty response)",
|
| 1034 |
+
}
|
| 1035 |
+
return _normalize_fast_screening_result(parsed)
|
| 1036 |
+
|
| 1037 |
+
|
| 1038 |
+
@lru_cache(maxsize=1)
|
| 1039 |
+
def _load_hf_pretrained_detectors() -> dict[str, Any]:
|
| 1040 |
+
if not HF_DETECTOR_RUNTIME_AVAILABLE or torch is None:
|
| 1041 |
+
raise RuntimeError("transformers/torch runtime is unavailable")
|
| 1042 |
+
|
| 1043 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 1044 |
+
loaded: list[dict[str, Any]] = []
|
| 1045 |
+
failures: list[dict[str, str]] = []
|
| 1046 |
+
for item in HF_PRETRAINED_DETECTOR_MODELS:
|
| 1047 |
+
repo_id = item["repo_id"]
|
| 1048 |
+
name = item["name"]
|
| 1049 |
+
try:
|
| 1050 |
+
processor = AutoImageProcessor.from_pretrained(repo_id) # type: ignore[union-attr]
|
| 1051 |
+
model = AutoModelForImageClassification.from_pretrained(repo_id) # type: ignore[union-attr]
|
| 1052 |
+
model.to(device)
|
| 1053 |
+
model.eval()
|
| 1054 |
+
|
| 1055 |
+
id2label_raw = getattr(model.config, "id2label", {}) or {}
|
| 1056 |
+
id2label: dict[int, str] = {}
|
| 1057 |
+
for key, value in id2label_raw.items():
|
| 1058 |
+
try:
|
| 1059 |
+
index = int(key)
|
| 1060 |
+
except Exception:
|
| 1061 |
+
index = int(_to_float(key, -1))
|
| 1062 |
+
if index < 0:
|
| 1063 |
+
continue
|
| 1064 |
+
id2label[index] = str(value)
|
| 1065 |
+
loaded.append(
|
| 1066 |
+
{
|
| 1067 |
+
"name": name,
|
| 1068 |
+
"repo_id": repo_id,
|
| 1069 |
+
"processor": processor,
|
| 1070 |
+
"model": model,
|
| 1071 |
+
"id2label": id2label,
|
| 1072 |
+
}
|
| 1073 |
+
)
|
| 1074 |
+
except Exception as exc:
|
| 1075 |
+
failures.append({"name": name, "repo_id": repo_id, "error": f"{type(exc).__name__}: {exc}"})
|
| 1076 |
+
|
| 1077 |
+
return {"device": device, "detectors": loaded, "load_failures": failures}
|
| 1078 |
+
|
| 1079 |
+
|
| 1080 |
+
def _predict_single_hf_detector(runtime: dict[str, Any], image: Image.Image) -> dict[str, Any]:
|
| 1081 |
+
if not HF_DETECTOR_RUNTIME_AVAILABLE or torch is None:
|
| 1082 |
+
return {
|
| 1083 |
+
"name": runtime.get("name", "unknown"),
|
| 1084 |
+
"repo_id": runtime.get("repo_id", ""),
|
| 1085 |
+
"available": False,
|
| 1086 |
+
"error": "transformers/torch runtime is unavailable",
|
| 1087 |
+
}
|
| 1088 |
+
|
| 1089 |
+
name = runtime["name"]
|
| 1090 |
+
repo_id = runtime["repo_id"]
|
| 1091 |
+
processor = runtime["processor"]
|
| 1092 |
+
model = runtime["model"]
|
| 1093 |
+
id2label = runtime["id2label"]
|
| 1094 |
+
device = next(model.parameters()).device
|
| 1095 |
+
|
| 1096 |
+
inputs = processor(images=image.convert("RGB"), return_tensors="pt")
|
| 1097 |
+
inputs = {key: value.to(device) for key, value in inputs.items()}
|
| 1098 |
+
|
| 1099 |
+
with torch.no_grad():
|
| 1100 |
+
logits = model(**inputs).logits
|
| 1101 |
+
probs = torch.softmax(logits, dim=-1)[0]
|
| 1102 |
+
|
| 1103 |
+
top_index = int(torch.argmax(probs).item())
|
| 1104 |
+
top_confidence = float(probs[top_index].item())
|
| 1105 |
+
top_label = str(id2label.get(top_index, str(top_index)))
|
| 1106 |
+
top_label_normalized = top_label.strip().lower()
|
| 1107 |
+
|
| 1108 |
+
artificial_index: int | None = None
|
| 1109 |
+
for idx, label in id2label.items():
|
| 1110 |
+
if str(label).strip().lower() == "artificial":
|
| 1111 |
+
artificial_index = idx
|
| 1112 |
+
break
|
| 1113 |
+
|
| 1114 |
+
artificial_probability: float | None = None
|
| 1115 |
+
real_probability: float | None = None
|
| 1116 |
+
if artificial_index is not None:
|
| 1117 |
+
artificial_probability = float(probs[artificial_index].item())
|
| 1118 |
+
real_probability = max(0.0, min(1.0, 1.0 - artificial_probability))
|
| 1119 |
+
elif top_label_normalized == "artificial":
|
| 1120 |
+
artificial_probability = top_confidence
|
| 1121 |
+
real_probability = max(0.0, min(1.0, 1.0 - artificial_probability))
|
| 1122 |
+
elif _is_real_like_label(top_label_normalized):
|
| 1123 |
+
real_probability = top_confidence
|
| 1124 |
+
artificial_probability = max(0.0, min(1.0, 1.0 - real_probability))
|
| 1125 |
+
|
| 1126 |
+
signal = "UNCERTAIN"
|
| 1127 |
+
if artificial_probability is not None:
|
| 1128 |
+
if artificial_probability >= 0.75:
|
| 1129 |
+
signal = "AI_HINT"
|
| 1130 |
+
elif artificial_probability <= 0.25:
|
| 1131 |
+
signal = "REAL_HINT"
|
| 1132 |
+
elif top_label_normalized == "artificial" and top_confidence >= 0.75:
|
| 1133 |
+
signal = "AI_HINT"
|
| 1134 |
+
elif _is_real_like_label(top_label_normalized) and top_confidence >= 0.75:
|
| 1135 |
+
signal = "REAL_HINT"
|
| 1136 |
+
|
| 1137 |
+
class_probabilities: list[dict[str, Any]] = []
|
| 1138 |
+
for idx in range(int(probs.shape[-1])):
|
| 1139 |
+
label = str(id2label.get(idx, str(idx)))
|
| 1140 |
+
class_probabilities.append(
|
| 1141 |
+
{
|
| 1142 |
+
"label": label,
|
| 1143 |
+
"probability": round(float(probs[idx].item()) * 100, 2),
|
| 1144 |
+
}
|
| 1145 |
+
)
|
| 1146 |
+
class_probabilities.sort(key=lambda item: item["probability"], reverse=True)
|
| 1147 |
+
|
| 1148 |
+
return {
|
| 1149 |
+
"name": name,
|
| 1150 |
+
"repo_id": repo_id,
|
| 1151 |
+
"available": True,
|
| 1152 |
+
"predicted_label": top_label,
|
| 1153 |
+
"predicted_confidence": round(top_confidence * 100, 2),
|
| 1154 |
+
"artificial_probability": None if artificial_probability is None else round(artificial_probability * 100, 2),
|
| 1155 |
+
"real_probability": None if real_probability is None else round(real_probability * 100, 2),
|
| 1156 |
+
"signal": signal,
|
| 1157 |
+
"class_probabilities": class_probabilities[:5],
|
| 1158 |
+
"label_interpretation": "Label 'artificial' is interpreted as AI-generated imagery.",
|
| 1159 |
+
}
|
| 1160 |
+
|
| 1161 |
+
|
| 1162 |
+
def _aggregate_hf_detector_predictions(predictions: list[dict[str, Any]]) -> dict[str, Any]:
|
| 1163 |
+
valid = [item for item in predictions if item.get("available") is True]
|
| 1164 |
+
if not valid:
|
| 1165 |
+
return {
|
| 1166 |
+
"available_detector_count": 0,
|
| 1167 |
+
"overall_hint": "UNCERTAIN",
|
| 1168 |
+
"confidence": 0,
|
| 1169 |
+
"mean_artificial_probability": None,
|
| 1170 |
+
"observation": "No pretrained detector predictions are available.",
|
| 1171 |
+
}
|
| 1172 |
+
|
| 1173 |
+
artificial_probs: list[float] = []
|
| 1174 |
+
ai_hint_count = 0
|
| 1175 |
+
real_hint_count = 0
|
| 1176 |
+
|
| 1177 |
+
for item in valid:
|
| 1178 |
+
ap_raw = item.get("artificial_probability")
|
| 1179 |
+
if ap_raw is not None:
|
| 1180 |
+
artificial_probs.append(max(0.0, min(100.0, float(ap_raw))) / 100.0)
|
| 1181 |
+
signal = str(item.get("signal", "UNCERTAIN"))
|
| 1182 |
+
if signal == "AI_HINT":
|
| 1183 |
+
ai_hint_count += 1
|
| 1184 |
+
elif signal == "REAL_HINT":
|
| 1185 |
+
real_hint_count += 1
|
| 1186 |
+
|
| 1187 |
+
mean_ap: float | None = None
|
| 1188 |
+
if artificial_probs:
|
| 1189 |
+
mean_ap = float(sum(artificial_probs) / len(artificial_probs))
|
| 1190 |
+
|
| 1191 |
+
if mean_ap is not None:
|
| 1192 |
+
if mean_ap >= 0.65:
|
| 1193 |
+
overall_hint = "AI_GENERATED"
|
| 1194 |
+
elif mean_ap <= 0.35:
|
| 1195 |
+
overall_hint = "REAL"
|
| 1196 |
+
else:
|
| 1197 |
+
overall_hint = "UNCERTAIN"
|
| 1198 |
+
confidence = int(round(min(100.0, max(0.0, abs(mean_ap - 0.5) * 200.0))))
|
| 1199 |
+
else:
|
| 1200 |
+
if ai_hint_count > real_hint_count:
|
| 1201 |
+
overall_hint = "AI_GENERATED"
|
| 1202 |
+
confidence = 60
|
| 1203 |
+
elif real_hint_count > ai_hint_count:
|
| 1204 |
+
overall_hint = "REAL"
|
| 1205 |
+
confidence = 60
|
| 1206 |
+
else:
|
| 1207 |
+
overall_hint = "UNCERTAIN"
|
| 1208 |
+
confidence = 40
|
| 1209 |
+
|
| 1210 |
+
return {
|
| 1211 |
+
"available_detector_count": len(valid),
|
| 1212 |
+
"overall_hint": overall_hint,
|
| 1213 |
+
"confidence": confidence,
|
| 1214 |
+
"mean_artificial_probability": None if mean_ap is None else round(mean_ap * 100, 2),
|
| 1215 |
+
"ai_hint_count": ai_hint_count,
|
| 1216 |
+
"real_hint_count": real_hint_count,
|
| 1217 |
+
"observation": (
|
| 1218 |
+
"Both pretrained detectors treat label 'artificial' as the AI-image signal. "
|
| 1219 |
+
"Use this output as auxiliary evidence, not standalone proof."
|
| 1220 |
+
),
|
| 1221 |
+
}
|
| 1222 |
+
|
| 1223 |
+
|
| 1224 |
+
def _run_pretrained_hf_detectors(image: Image.Image) -> dict[str, Any]:
|
| 1225 |
+
if not HF_DETECTOR_RUNTIME_AVAILABLE:
|
| 1226 |
+
return {
|
| 1227 |
+
"runtime_available": False,
|
| 1228 |
+
"detectors": [],
|
| 1229 |
+
"aggregate": {
|
| 1230 |
+
"available_detector_count": 0,
|
| 1231 |
+
"overall_hint": "UNCERTAIN",
|
| 1232 |
+
"confidence": 0,
|
| 1233 |
+
"mean_artificial_probability": None,
|
| 1234 |
+
"observation": "transformers/torch is not available in runtime.",
|
| 1235 |
+
},
|
| 1236 |
+
"load_failures": [
|
| 1237 |
+
{"name": "runtime", "repo_id": "", "error": "transformers/torch is not installed or import failed"}
|
| 1238 |
+
],
|
| 1239 |
+
}
|
| 1240 |
+
|
| 1241 |
+
runtime = _load_hf_pretrained_detectors()
|
| 1242 |
+
detectors = runtime.get("detectors", [])
|
| 1243 |
+
predictions: list[dict[str, Any]] = []
|
| 1244 |
+
for detector in detectors:
|
| 1245 |
+
try:
|
| 1246 |
+
predictions.append(_predict_single_hf_detector(detector, image=image))
|
| 1247 |
+
except Exception as exc:
|
| 1248 |
+
predictions.append(
|
| 1249 |
+
{
|
| 1250 |
+
"name": detector.get("name", "unknown"),
|
| 1251 |
+
"repo_id": detector.get("repo_id", ""),
|
| 1252 |
+
"available": False,
|
| 1253 |
+
"error": f"{type(exc).__name__}: {exc}",
|
| 1254 |
+
}
|
| 1255 |
+
)
|
| 1256 |
+
aggregate = _aggregate_hf_detector_predictions(predictions)
|
| 1257 |
+
return {
|
| 1258 |
+
"runtime_available": True,
|
| 1259 |
+
"device": runtime.get("device"),
|
| 1260 |
+
"detectors": predictions,
|
| 1261 |
+
"aggregate": aggregate,
|
| 1262 |
+
"load_failures": runtime.get("load_failures", []),
|
| 1263 |
+
}
|
| 1264 |
+
|
| 1265 |
+
|
| 1266 |
def _normalize_discovered_regions(payload: dict[str, Any], max_regions: int) -> list[dict[str, Any]]:
|
| 1267 |
raw_regions = payload.get("regions", [])
|
| 1268 |
if not isinstance(raw_regions, list):
|
|
|
|
| 1522 |
return "\n\n".join(lines)
|
| 1523 |
|
| 1524 |
|
| 1525 |
+
def _build_tools(
|
| 1526 |
+
image: Image.Image,
|
| 1527 |
+
image_path: str | None = None,
|
| 1528 |
+
pretrained_hf_signal: dict[str, Any] | None = None,
|
| 1529 |
+
) -> list[Any]:
|
| 1530 |
@tool
|
| 1531 |
def common_sense_consistency_probe() -> str:
|
| 1532 |
"""Analyze full-image common-sense consistency (anatomy, text, physics, geometry, lighting, reflections)."""
|
|
|
|
| 1537 |
"""Use exiftool to extract selected metadata tags and summarize forensic clues."""
|
| 1538 |
return _json_dumps(_metadata_report(image, image_path=image_path))
|
| 1539 |
|
| 1540 |
+
@tool
|
| 1541 |
+
def run_pretrained_hf_detectors() -> str:
|
| 1542 |
+
"""Run two pretrained Hugging Face AI-image detectors and return their predictions as auxiliary forensic signals."""
|
| 1543 |
+
if isinstance(pretrained_hf_signal, dict):
|
| 1544 |
+
return _json_dumps(pretrained_hf_signal)
|
| 1545 |
+
return _json_dumps(_run_pretrained_hf_detectors(image=image))
|
| 1546 |
+
|
| 1547 |
@tool
|
| 1548 |
def run_low_level_forensics() -> str:
|
| 1549 |
"""Run low-level image heuristics such as texture smoothness, saturation, clipping, and ELA artifacts."""
|
|
|
|
| 1564 |
return [
|
| 1565 |
common_sense_consistency_probe,
|
| 1566 |
inspect_image_metadata,
|
| 1567 |
+
run_pretrained_hf_detectors,
|
| 1568 |
run_low_level_forensics,
|
| 1569 |
vision_specialist_probe,
|
| 1570 |
]
|
| 1571 |
|
| 1572 |
|
| 1573 |
+
def _build_graph(
|
| 1574 |
+
image: Image.Image,
|
| 1575 |
+
image_path: str | None = None,
|
| 1576 |
+
pretrained_hf_signal: dict[str, Any] | None = None,
|
| 1577 |
+
):
|
| 1578 |
+
tools = _build_tools(
|
| 1579 |
+
image,
|
| 1580 |
+
image_path=image_path,
|
| 1581 |
+
pretrained_hf_signal=pretrained_hf_signal,
|
| 1582 |
+
)
|
| 1583 |
llm = _build_langchain_model()
|
| 1584 |
llm_with_tools = llm.bind_tools(tools)
|
| 1585 |
tool_node = ToolNode(tools, handle_tool_errors=True)
|
|
|
|
| 1677 |
if cached is not None:
|
| 1678 |
return dict(cached)
|
| 1679 |
|
| 1680 |
+
pretrained_hf_signal: dict[str, Any] | None = None
|
| 1681 |
+
|
| 1682 |
+
if FAST_SCREENING_ENABLED:
|
| 1683 |
+
try:
|
| 1684 |
+
pretrained_hf_signal = _run_pretrained_hf_detectors(image=image)
|
| 1685 |
+
except Exception:
|
| 1686 |
+
pretrained_hf_signal = None
|
| 1687 |
+
|
| 1688 |
+
try:
|
| 1689 |
+
fast = _run_fast_screening(
|
| 1690 |
+
image=image,
|
| 1691 |
+
pretrained_signal=pretrained_hf_signal,
|
| 1692 |
+
)
|
| 1693 |
+
threshold = FAST_SCREENING_CONFIDENCE_THRESHOLD
|
| 1694 |
+
fast_assessment = str(fast.get("assessment", "UNSURE")).upper()
|
| 1695 |
+
fast_confidence = _normalize_final_confidence(fast.get("confidence"))
|
| 1696 |
+
if fast_assessment in {"REAL", "AI_GENERATED"} and fast_confidence >= threshold:
|
| 1697 |
+
detector_hint_text = ""
|
| 1698 |
+
if isinstance(pretrained_hf_signal, dict):
|
| 1699 |
+
aggregate = pretrained_hf_signal.get("aggregate", {})
|
| 1700 |
+
if isinstance(aggregate, dict):
|
| 1701 |
+
detector_hint_text = (
|
| 1702 |
+
f" Auxiliary pretrained-detector hint="
|
| 1703 |
+
f"{aggregate.get('overall_hint', 'UNCERTAIN')} "
|
| 1704 |
+
f"(confidence={aggregate.get('confidence', 0)})."
|
| 1705 |
+
)
|
| 1706 |
+
result = {
|
| 1707 |
+
"classification": "Real" if fast_assessment == "REAL" else "Fake",
|
| 1708 |
+
"confidence": fast_confidence,
|
| 1709 |
+
"summary": _normalize_non_empty_text(
|
| 1710 |
+
f"{fast.get('summary', '')}{detector_hint_text}".strip(),
|
| 1711 |
+
"Fast screening reached a decisive result.",
|
| 1712 |
+
),
|
| 1713 |
+
}
|
| 1714 |
+
if cache_enabled and cache_key:
|
| 1715 |
+
try:
|
| 1716 |
+
_save_cached_result(cache_key=cache_key, result=result)
|
| 1717 |
+
except Exception:
|
| 1718 |
+
pass
|
| 1719 |
+
return result
|
| 1720 |
+
except Exception:
|
| 1721 |
+
pass
|
| 1722 |
+
|
| 1723 |
+
graph = _build_graph(
|
| 1724 |
+
image,
|
| 1725 |
+
image_path=image_path,
|
| 1726 |
+
pretrained_hf_signal=pretrained_hf_signal,
|
| 1727 |
+
)
|
| 1728 |
initial_message = HumanMessage(
|
| 1729 |
content=(
|
| 1730 |
"Analyze the uploaded image with the available tools. "
|
|
|
|
| 1738 |
result = {
|
| 1739 |
"classification": _normalize_final_classification(verdict.classification),
|
| 1740 |
"confidence": _normalize_final_confidence(verdict.confidence),
|
| 1741 |
+
"summary": _normalize_non_empty_text(
|
| 1742 |
+
verdict.summary,
|
| 1743 |
+
"Insufficient evidence for a detailed summary.",
|
| 1744 |
+
),
|
| 1745 |
}
|
| 1746 |
if cache_enabled and cache_key:
|
| 1747 |
try:
|
requirements.txt
CHANGED
|
@@ -6,3 +6,5 @@ numpy>=1.26.0
|
|
| 6 |
openai>=1.40.0
|
| 7 |
Pillow>=10.0.0
|
| 8 |
pydantic>=2.7.0
|
|
|
|
|
|
|
|
|
| 6 |
openai>=1.40.0
|
| 7 |
Pillow>=10.0.0
|
| 8 |
pydantic>=2.7.0
|
| 9 |
+
torch>=2.3.0
|
| 10 |
+
transformers>=4.44.0
|