Spaces:

Crison11
/

AIFo-DEMO

Sleeping

App Files Files Community

Crison11 commited on Mar 22

Commit

5de4db9

1 Parent(s): dba94fc

feat: enhance detector agent with fast screening

Browse files

Files changed (4) hide show

README.md +2 -5
app.py +4 -4
detector_agent.py +510 -12
requirements.txt +2 -0

README.md CHANGED Viewed

@@ -20,9 +20,10 @@ This Space now provides a LangGraph-based AI-image detection agent:
 3. The tool layer combines:
    - common-sense consistency probe (agent-callable tool)
    - image metadata inspection (via `exiftool`)
    - low-level forensic heuristics
    - external vision specialist probe with auto multi-region discovery and zoomed crop analysis
-4. The agent synthesizes the evidence and returns:
    - `classification`: `Real` / `Fake`
    - `confidence`: `0-100`
@@ -34,10 +35,6 @@ Set the following in your Space settings:
 - `OPENAI_MODEL` (optional, default: `gpt-5-mini`)
 - `OPENAI_BASE_URL` (optional, for OpenAI-compatible third-party services)
 - `APP_API_TOKEN` (optional, used to protect API endpoint calls)
-- `DETECTOR_CACHE_ENABLED` (optional, default: `1`)
-- `DETECTOR_CACHE_TTL_SECONDS` (optional, default: `21600`)
-- `DETECTOR_CACHE_DIR` (optional, default: `/tmp/aifo_detector_cache`)
-- `DETECTOR_GRAPH_RECURSION_LIMIT` (optional, default: `24`)
 ## UI Features

 3. The tool layer combines:
    - common-sense consistency probe (agent-callable tool)
    - image metadata inspection (via `exiftool`)
+   - two pretrained Hugging Face AI-image detectors
    - low-level forensic heuristics
    - external vision specialist probe with auto multi-region discovery and zoomed crop analysis
+5. The agent synthesizes the evidence and returns:
    - `classification`: `Real` / `Fake`
    - `confidence`: `0-100`
 - `OPENAI_MODEL` (optional, default: `gpt-5-mini`)
 - `OPENAI_BASE_URL` (optional, for OpenAI-compatible third-party services)
 - `APP_API_TOKEN` (optional, used to protect API endpoint calls)
 ## UI Features

app.py CHANGED Viewed

@@ -62,9 +62,6 @@ Before running, configure these Hugging Face Space Secrets:
 - `OPENAI_MODEL` (optional, default: `gpt-5-mini`)
 - `OPENAI_BASE_URL` (optional, for compatible third-party endpoints)
 - `APP_API_TOKEN` (optional, protect API endpoint calls)
-- `DETECTOR_CACHE_ENABLED` (optional, default: `1`)
-- `DETECTOR_CACHE_TTL_SECONDS` (optional, default: `21600`)
-- `DETECTOR_CACHE_DIR` (optional, default: `/tmp/aifo_detector_cache`)
 """
     )
@@ -80,7 +77,10 @@ Before running, configure these Hugging Face Space Secrets:
     run_btn.click(
         fn=analyze_image_ui,
         inputs=[image_input],
-        outputs=[classification_out, confidence_out],
         api_name="analyze_ui",
     )

 - `OPENAI_MODEL` (optional, default: `gpt-5-mini`)
 - `OPENAI_BASE_URL` (optional, for compatible third-party endpoints)
 - `APP_API_TOKEN` (optional, protect API endpoint calls)
 """
     )
     run_btn.click(
         fn=analyze_image_ui,
         inputs=[image_input],
+        outputs=[
+            classification_out,
+            confidence_out,
+        ],
         api_name="analyze_ui",
     )

detector_agent.py CHANGED Viewed

@@ -7,6 +7,7 @@ import shutil
 import subprocess
 import tempfile
 import time
 from pathlib import Path
 from typing import Any, Literal
@@ -20,6 +21,17 @@ from openai import OpenAI
 from PIL import ExifTags, Image, ImageChops, ImageStat
 from pydantic import BaseModel, Field, field_validator
 AGENT_SYSTEM_PROMPT = """
 You are an image-forensics coordinator.
@@ -28,7 +40,7 @@ You do not see the image directly. Your job is to decide which tools to call.
 Rules:
 - Prefer gathering evidence before reaching a conclusion.
 - Prefer calling the common-sense consistency tool early when image logic/plausibility matters.
-- Usually call inspect_image_metadata first, then at least one deterministic local forensic tool and one vision probe.
 - For vision probe calls, prefer multi-region inspection to cover diverse clues (faces, text, boundaries, limbs, animals, reflections).
 - Stop calling tools once you have enough evidence. Do not provide the final verdict yourself.
 - Keep the process efficient. Usually 2-5 tool calls are enough.
@@ -39,13 +51,47 @@ SYNTHESIS_SYSTEM_PROMPT = """
 You are a senior image-forensics judge.
 Review the collected tool evidence and return a final verdict.
-Output requirements:
 - classification must be Real or Fake
 - confidence must be an integer between 0 and 100
-- output only these two fields
-Fake means likely AI-generated or not a real camera photo.
-Real means likely a real camera photo.
 """.strip()
@@ -185,9 +231,21 @@ AI_METADATA_KEYWORDS = [
 ]
-CACHE_SCHEMA_VERSION = "detector_cache_v1"
 CACHE_DEFAULT_TTL_SECONDS = 6 * 60 * 60
 GRAPH_RECURSION_LIMIT_DEFAULT = 24
 def _normalize_final_classification(value: Any) -> str:
@@ -209,9 +267,22 @@ def _normalize_final_confidence(value: Any) -> int:
     return max(0, min(100, numeric))
 class DetectorVerdict(BaseModel):
     classification: Literal["Real", "Fake"]
     confidence: int = Field(ge=0, le=100)
     @field_validator("classification", mode="before")
     @classmethod
@@ -223,6 +294,14 @@ class DetectorVerdict(BaseModel):
     def normalize_confidence(cls, value: Any) -> int:
         return _normalize_final_confidence(value)
 def _build_openai_client() -> OpenAI:
     api_key = os.getenv("OPENAI_API_KEY")
@@ -344,6 +423,7 @@ def _build_detector_cache_key(image: Image.Image, image_path: str | None) -> str
             [
                 AGENT_SYSTEM_PROMPT,
                 SYNTHESIS_SYSTEM_PROMPT,
                 VISION_TOOL_PROMPT,
                 VISION_REGION_DISCOVERY_PROMPT,
                 COMMON_SENSE_SYSTEM_PROMPT,
@@ -356,6 +436,9 @@ def _build_detector_cache_key(image: Image.Image, image_path: str | None) -> str
         "image_digest": image_digest,
         "model": model,
         "base_url": base_url,
         "prompt_fingerprint": prompt_fingerprint,
     }
     raw = json.dumps(payload, ensure_ascii=False, sort_keys=True)
@@ -386,9 +469,26 @@ def _load_cached_result(cache_key: str, ttl_seconds: int) -> dict[str, Any] | No
         except Exception:
             pass
         return None
     return {
-        "classification": _normalize_final_classification(result.get("classification")),
-        "confidence": _normalize_final_confidence(result.get("confidence")),
     }
@@ -836,6 +936,333 @@ def _to_float(value: Any, default: float) -> float:
         return default
 def _normalize_discovered_regions(payload: dict[str, Any], max_regions: int) -> list[dict[str, Any]]:
     raw_regions = payload.get("regions", [])
     if not isinstance(raw_regions, list):
@@ -1095,7 +1522,11 @@ def _messages_to_text(messages: list[Any]) -> str:
     return "\n\n".join(lines)
-def _build_tools(image: Image.Image, image_path: str | None = None) -> list[Any]:
     @tool
     def common_sense_consistency_probe() -> str:
         """Analyze full-image common-sense consistency (anatomy, text, physics, geometry, lighting, reflections)."""
@@ -1106,6 +1537,13 @@ def _build_tools(image: Image.Image, image_path: str | None = None) -> list[Any]
         """Use exiftool to extract selected metadata tags and summarize forensic clues."""
         return _json_dumps(_metadata_report(image, image_path=image_path))
     @tool
     def run_low_level_forensics() -> str:
         """Run low-level image heuristics such as texture smoothness, saturation, clipping, and ELA artifacts."""
@@ -1126,13 +1564,22 @@ def _build_tools(image: Image.Image, image_path: str | None = None) -> list[Any]
     return [
         common_sense_consistency_probe,
         inspect_image_metadata,
         run_low_level_forensics,
         vision_specialist_probe,
     ]
-def _build_graph(image: Image.Image, image_path: str | None = None):
-    tools = _build_tools(image, image_path=image_path)
     llm = _build_langchain_model()
     llm_with_tools = llm.bind_tools(tools)
     tool_node = ToolNode(tools, handle_tool_errors=True)
@@ -1230,7 +1677,54 @@ def run_detector_agent(
         if cached is not None:
             return dict(cached)
-    graph = _build_graph(image, image_path=image_path)
     initial_message = HumanMessage(
         content=(
             "Analyze the uploaded image with the available tools. "
@@ -1244,6 +1738,10 @@ def run_detector_agent(
     result = {
         "classification": _normalize_final_classification(verdict.classification),
         "confidence": _normalize_final_confidence(verdict.confidence),
     }
     if cache_enabled and cache_key:
         try:

 import subprocess
 import tempfile
 import time
+from functools import lru_cache
 from pathlib import Path
 from typing import Any, Literal
 from PIL import ExifTags, Image, ImageChops, ImageStat
 from pydantic import BaseModel, Field, field_validator
+try:
+    import torch
+    from transformers import AutoImageProcessor, AutoModelForImageClassification
+    HF_DETECTOR_RUNTIME_AVAILABLE = True
+except Exception:
+    torch = None
+    AutoImageProcessor = None
+    AutoModelForImageClassification = None
+    HF_DETECTOR_RUNTIME_AVAILABLE = False
 AGENT_SYSTEM_PROMPT = """
 You are an image-forensics coordinator.
 Rules:
 - Prefer gathering evidence before reaching a conclusion.
 - Prefer calling the common-sense consistency tool early when image logic/plausibility matters.
+- Usually call inspect_image_metadata and run_pretrained_hf_detectors early, then at least one deterministic local forensic tool and one vision probe.
 - For vision probe calls, prefer multi-region inspection to cover diverse clues (faces, text, boundaries, limbs, animals, reflections).
 - Stop calling tools once you have enough evidence. Do not provide the final verdict yourself.
 - Keep the process efficient. Usually 2-5 tool calls are enough.
 You are a senior image-forensics judge.
 Review the collected tool evidence and return a final verdict.
+Output requirements (JSON only):
 - classification must be Real or Fake
 - confidence must be an integer between 0 and 100
+- summary must be one concise paragraph that combines:
+  1) the final analysis,
+  2) evidence basis for the verdict,
+  3) image-specific forensic reasons
+Evidence weighting guidance:
+- Treat pretrained detector outputs as auxiliary signals.
+- If detector signals conflict with visual/metadata evidence, lower confidence and reflect uncertainty.
+Fake means likely AI-generated or not a real photo.
+Real means likely a real photo.
+""".strip()
+FAST_SCREENING_SYSTEM_PROMPT = """
+You are a fast image-authenticity screener.
+Task:
+- Determine whether the image is likely a real photo or likely AI-generated synthetic imagery.
+- Use only visible evidence in the image.
+Return strict JSON only:
+{
+  "assessment": "REAL|AI_GENERATED|UNSURE",
+  "confidence": 0-100,
+  "summary": "one concise paragraph"
+}
+Definitions:
+- REAL: likely captured by a real camera from the physical world (normal edits/compression are allowed).
+- AI_GENERATED: likely primarily synthesized by a generative model.
+Decision policy:
+- Prefer speed over exhaustive analysis.
+- Use strong visual cues only: anatomy coherence, text fidelity, geometry/perspective, reflections/shadows,
+  object boundaries, repeated texture patterns, and local artifacts.
+- Output REAL or AI_GENERATED only if evidence is clearly one-sided.
+- Output UNSURE when evidence is mixed/weak/insufficient.
+- summary must include main cues and uncertainty in one short paragraph.
 """.strip()
 ]
+CACHE_SCHEMA_VERSION = "detector_cache_v5"
 CACHE_DEFAULT_TTL_SECONDS = 6 * 60 * 60
 GRAPH_RECURSION_LIMIT_DEFAULT = 24
+FAST_SCREENING_ENABLED = True
+FAST_SCREENING_CONFIDENCE_THRESHOLD = 80
+HF_PRETRAINED_DETECTOR_MODELS = [
+    {
+        "name": "organika_sdxl_detector",
+        "repo_id": "Organika/sdxl-detector",
+    },
+    {
+        "name": "haywoodsloan_ai_image_detector_dev_deploy",
+        "repo_id": "haywoodsloan/ai-image-detector-dev-deploy",
+    },
+]
 def _normalize_final_classification(value: Any) -> str:
     return max(0, min(100, numeric))
+def _normalize_non_empty_text(value: Any, fallback: str) -> str:
+    text = str(value or "").strip()
+    if not text:
+        return fallback
+    return text
+def _is_real_like_label(label: str) -> bool:
+    normalized = label.strip().lower()
+    return normalized in {"real", "authentic", "natural", "photo", "photograph", "camera"}
 class DetectorVerdict(BaseModel):
     classification: Literal["Real", "Fake"]
     confidence: int = Field(ge=0, le=100)
+    summary: str = ""
     @field_validator("classification", mode="before")
     @classmethod
     def normalize_confidence(cls, value: Any) -> int:
         return _normalize_final_confidence(value)
+    @field_validator("summary", mode="before")
+    @classmethod
+    def normalize_summary(cls, value: Any) -> str:
+        return _normalize_non_empty_text(
+            value,
+            "Insufficient evidence for a detailed summary.",
+        )
 def _build_openai_client() -> OpenAI:
     api_key = os.getenv("OPENAI_API_KEY")
             [
                 AGENT_SYSTEM_PROMPT,
                 SYNTHESIS_SYSTEM_PROMPT,
+                FAST_SCREENING_SYSTEM_PROMPT,
                 VISION_TOOL_PROMPT,
                 VISION_REGION_DISCOVERY_PROMPT,
                 COMMON_SENSE_SYSTEM_PROMPT,
         "image_digest": image_digest,
         "model": model,
         "base_url": base_url,
+        "fast_screening_enabled": FAST_SCREENING_ENABLED,
+        "fast_screening_confidence_threshold": FAST_SCREENING_CONFIDENCE_THRESHOLD,
+        "hf_pretrained_detector_repos": [item["repo_id"] for item in HF_PRETRAINED_DETECTOR_MODELS],
         "prompt_fingerprint": prompt_fingerprint,
     }
     raw = json.dumps(payload, ensure_ascii=False, sort_keys=True)
         except Exception:
             pass
         return None
+    classification = _normalize_final_classification(result.get("classification"))
+    confidence = _normalize_final_confidence(result.get("confidence"))
+    summary_value = result.get("summary")
+    if not str(summary_value or "").strip():
+        legacy_parts = [
+            str(result.get("final_analysis") or "").strip(),
+            str(result.get("judgment_basis") or "").strip(),
+            str(result.get("image_analysis_reason") or "").strip(),
+        ]
+        legacy_parts = [part for part in legacy_parts if part]
+        if legacy_parts:
+            summary_value = " ".join(legacy_parts)
+    summary = _normalize_non_empty_text(
+        summary_value,
+        f"Cached verdict: {classification} (confidence={confidence}).",
+    )
     return {
+        "classification": classification,
+        "confidence": confidence,
+        "summary": summary,
     }
         return default
+def _normalize_fast_screening_result(data: dict[str, Any]) -> dict[str, Any]:
+    assessment = str(data.get("assessment", "UNSURE")).upper()
+    if assessment not in {"REAL", "AI_GENERATED", "UNSURE"}:
+        assessment = "UNSURE"
+    confidence = _normalize_final_confidence(data.get("confidence"))
+    summary = _normalize_non_empty_text(
+        data.get("summary"),
+        "Fast screening was inconclusive.",
+    )
+    return {
+        "assessment": assessment,
+        "confidence": confidence,
+        "summary": summary,
+    }
+def _fast_screening_detector_signal_text(pretrained_signal: dict[str, Any] | None) -> str:
+    if not isinstance(pretrained_signal, dict):
+        return "No pretrained detector signal available."
+    aggregate = pretrained_signal.get("aggregate", {})
+    if not isinstance(aggregate, dict):
+        aggregate = {}
+    lines: list[str] = [
+        "Pretrained detector auxiliary signals (label 'artificial' => AI-generated):",
+        (
+            f"- aggregate.overall_hint={aggregate.get('overall_hint', 'UNCERTAIN')}, "
+            f"aggregate.confidence={aggregate.get('confidence', 0)}, "
+            f"aggregate.mean_artificial_probability={aggregate.get('mean_artificial_probability', None)}"
+        ),
+    ]
+    detectors = pretrained_signal.get("detectors", [])
+    if isinstance(detectors, list):
+        for item in detectors[:4]:
+            if not isinstance(item, dict):
+                continue
+            lines.append(
+                (
+                    f"- {item.get('name', 'unknown')}: "
+                    f"label={item.get('predicted_label', 'N/A')}, "
+                    f"confidence={item.get('predicted_confidence', 'N/A')}, "
+                    f"artificial_probability={item.get('artificial_probability', 'N/A')}, "
+                    f"signal={item.get('signal', 'UNCERTAIN')}"
+                )
+            )
+    failures = pretrained_signal.get("load_failures", [])
+    if isinstance(failures, list) and failures:
+        lines.append(f"- load_failures={len(failures)}")
+    return "\n".join(lines)
+def _run_fast_screening(
+    image: Image.Image,
+    pretrained_signal: dict[str, Any] | None = None,
+) -> dict[str, Any]:
+    client = _build_openai_client()
+    model = os.getenv("OPENAI_MODEL", "gpt-5-mini")
+    data_url = _image_to_data_url(image)
+    detector_signal_text = _fast_screening_detector_signal_text(pretrained_signal)
+    response = client.responses.create(
+        model=model,
+        input=[
+            {"role": "system", "content": [{"type": "input_text", "text": FAST_SCREENING_SYSTEM_PROMPT}]},
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "input_text",
+                        "text": (
+                            "Classify image authenticity in one quick pass: REAL vs AI_GENERATED vs UNSURE. "
+                            "This is a real-photo-vs-AI-generated decision task.\n"
+                            "Use pretrained detector signals below as auxiliary evidence (not as absolute truth):\n"
+                            f"{detector_signal_text}\n"
+                            "Return strict JSON only."
+                        ),
+                    },
+                    {"type": "input_image", "image_url": data_url},
+                ],
+            },
+        ],
+    )
+    raw_text = _extract_output_text(response)
+    parsed = _safe_parse_json(raw_text)
+    if parsed is None:
+        return {
+            "assessment": "UNSURE",
+            "confidence": 35,
+            "summary": "Fast screening returned non-JSON output; escalate to full forensic workflow.",
+            "error": raw_text or "(empty response)",
+        }
+    return _normalize_fast_screening_result(parsed)
+@lru_cache(maxsize=1)
+def _load_hf_pretrained_detectors() -> dict[str, Any]:
+    if not HF_DETECTOR_RUNTIME_AVAILABLE or torch is None:
+        raise RuntimeError("transformers/torch runtime is unavailable")
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    loaded: list[dict[str, Any]] = []
+    failures: list[dict[str, str]] = []
+    for item in HF_PRETRAINED_DETECTOR_MODELS:
+        repo_id = item["repo_id"]
+        name = item["name"]
+        try:
+            processor = AutoImageProcessor.from_pretrained(repo_id)  # type: ignore[union-attr]
+            model = AutoModelForImageClassification.from_pretrained(repo_id)  # type: ignore[union-attr]
+            model.to(device)
+            model.eval()
+            id2label_raw = getattr(model.config, "id2label", {}) or {}
+            id2label: dict[int, str] = {}
+            for key, value in id2label_raw.items():
+                try:
+                    index = int(key)
+                except Exception:
+                    index = int(_to_float(key, -1))
+                if index < 0:
+                    continue
+                id2label[index] = str(value)
+            loaded.append(
+                {
+                    "name": name,
+                    "repo_id": repo_id,
+                    "processor": processor,
+                    "model": model,
+                    "id2label": id2label,
+                }
+            )
+        except Exception as exc:
+            failures.append({"name": name, "repo_id": repo_id, "error": f"{type(exc).__name__}: {exc}"})
+    return {"device": device, "detectors": loaded, "load_failures": failures}
+def _predict_single_hf_detector(runtime: dict[str, Any], image: Image.Image) -> dict[str, Any]:
+    if not HF_DETECTOR_RUNTIME_AVAILABLE or torch is None:
+        return {
+            "name": runtime.get("name", "unknown"),
+            "repo_id": runtime.get("repo_id", ""),
+            "available": False,
+            "error": "transformers/torch runtime is unavailable",
+        }
+    name = runtime["name"]
+    repo_id = runtime["repo_id"]
+    processor = runtime["processor"]
+    model = runtime["model"]
+    id2label = runtime["id2label"]
+    device = next(model.parameters()).device
+    inputs = processor(images=image.convert("RGB"), return_tensors="pt")
+    inputs = {key: value.to(device) for key, value in inputs.items()}
+    with torch.no_grad():
+        logits = model(**inputs).logits
+        probs = torch.softmax(logits, dim=-1)[0]
+    top_index = int(torch.argmax(probs).item())
+    top_confidence = float(probs[top_index].item())
+    top_label = str(id2label.get(top_index, str(top_index)))
+    top_label_normalized = top_label.strip().lower()
+    artificial_index: int | None = None
+    for idx, label in id2label.items():
+        if str(label).strip().lower() == "artificial":
+            artificial_index = idx
+            break
+    artificial_probability: float | None = None
+    real_probability: float | None = None
+    if artificial_index is not None:
+        artificial_probability = float(probs[artificial_index].item())
+        real_probability = max(0.0, min(1.0, 1.0 - artificial_probability))
+    elif top_label_normalized == "artificial":
+        artificial_probability = top_confidence
+        real_probability = max(0.0, min(1.0, 1.0 - artificial_probability))
+    elif _is_real_like_label(top_label_normalized):
+        real_probability = top_confidence
+        artificial_probability = max(0.0, min(1.0, 1.0 - real_probability))
+    signal = "UNCERTAIN"
+    if artificial_probability is not None:
+        if artificial_probability >= 0.75:
+            signal = "AI_HINT"
+        elif artificial_probability <= 0.25:
+            signal = "REAL_HINT"
+    elif top_label_normalized == "artificial" and top_confidence >= 0.75:
+        signal = "AI_HINT"
+    elif _is_real_like_label(top_label_normalized) and top_confidence >= 0.75:
+        signal = "REAL_HINT"
+    class_probabilities: list[dict[str, Any]] = []
+    for idx in range(int(probs.shape[-1])):
+        label = str(id2label.get(idx, str(idx)))
+        class_probabilities.append(
+            {
+                "label": label,
+                "probability": round(float(probs[idx].item()) * 100, 2),
+            }
+        )
+    class_probabilities.sort(key=lambda item: item["probability"], reverse=True)
+    return {
+        "name": name,
+        "repo_id": repo_id,
+        "available": True,
+        "predicted_label": top_label,
+        "predicted_confidence": round(top_confidence * 100, 2),
+        "artificial_probability": None if artificial_probability is None else round(artificial_probability * 100, 2),
+        "real_probability": None if real_probability is None else round(real_probability * 100, 2),
+        "signal": signal,
+        "class_probabilities": class_probabilities[:5],
+        "label_interpretation": "Label 'artificial' is interpreted as AI-generated imagery.",
+    }
+def _aggregate_hf_detector_predictions(predictions: list[dict[str, Any]]) -> dict[str, Any]:
+    valid = [item for item in predictions if item.get("available") is True]
+    if not valid:
+        return {
+            "available_detector_count": 0,
+            "overall_hint": "UNCERTAIN",
+            "confidence": 0,
+            "mean_artificial_probability": None,
+            "observation": "No pretrained detector predictions are available.",
+        }
+    artificial_probs: list[float] = []
+    ai_hint_count = 0
+    real_hint_count = 0
+    for item in valid:
+        ap_raw = item.get("artificial_probability")
+        if ap_raw is not None:
+            artificial_probs.append(max(0.0, min(100.0, float(ap_raw))) / 100.0)
+        signal = str(item.get("signal", "UNCERTAIN"))
+        if signal == "AI_HINT":
+            ai_hint_count += 1
+        elif signal == "REAL_HINT":
+            real_hint_count += 1
+    mean_ap: float | None = None
+    if artificial_probs:
+        mean_ap = float(sum(artificial_probs) / len(artificial_probs))
+    if mean_ap is not None:
+        if mean_ap >= 0.65:
+            overall_hint = "AI_GENERATED"
+        elif mean_ap <= 0.35:
+            overall_hint = "REAL"
+        else:
+            overall_hint = "UNCERTAIN"
+        confidence = int(round(min(100.0, max(0.0, abs(mean_ap - 0.5) * 200.0))))
+    else:
+        if ai_hint_count > real_hint_count:
+            overall_hint = "AI_GENERATED"
+            confidence = 60
+        elif real_hint_count > ai_hint_count:
+            overall_hint = "REAL"
+            confidence = 60
+        else:
+            overall_hint = "UNCERTAIN"
+            confidence = 40
+    return {
+        "available_detector_count": len(valid),
+        "overall_hint": overall_hint,
+        "confidence": confidence,
+        "mean_artificial_probability": None if mean_ap is None else round(mean_ap * 100, 2),
+        "ai_hint_count": ai_hint_count,
+        "real_hint_count": real_hint_count,
+        "observation": (
+            "Both pretrained detectors treat label 'artificial' as the AI-image signal. "
+            "Use this output as auxiliary evidence, not standalone proof."
+        ),
+    }
+def _run_pretrained_hf_detectors(image: Image.Image) -> dict[str, Any]:
+    if not HF_DETECTOR_RUNTIME_AVAILABLE:
+        return {
+            "runtime_available": False,
+            "detectors": [],
+            "aggregate": {
+                "available_detector_count": 0,
+                "overall_hint": "UNCERTAIN",
+                "confidence": 0,
+                "mean_artificial_probability": None,
+                "observation": "transformers/torch is not available in runtime.",
+            },
+            "load_failures": [
+                {"name": "runtime", "repo_id": "", "error": "transformers/torch is not installed or import failed"}
+            ],
+        }
+    runtime = _load_hf_pretrained_detectors()
+    detectors = runtime.get("detectors", [])
+    predictions: list[dict[str, Any]] = []
+    for detector in detectors:
+        try:
+            predictions.append(_predict_single_hf_detector(detector, image=image))
+        except Exception as exc:
+            predictions.append(
+                {
+                    "name": detector.get("name", "unknown"),
+                    "repo_id": detector.get("repo_id", ""),
+                    "available": False,
+                    "error": f"{type(exc).__name__}: {exc}",
+                }
+            )
+    aggregate = _aggregate_hf_detector_predictions(predictions)
+    return {
+        "runtime_available": True,
+        "device": runtime.get("device"),
+        "detectors": predictions,
+        "aggregate": aggregate,
+        "load_failures": runtime.get("load_failures", []),
+    }
 def _normalize_discovered_regions(payload: dict[str, Any], max_regions: int) -> list[dict[str, Any]]:
     raw_regions = payload.get("regions", [])
     if not isinstance(raw_regions, list):
     return "\n\n".join(lines)
+def _build_tools(
+    image: Image.Image,
+    image_path: str | None = None,
+    pretrained_hf_signal: dict[str, Any] | None = None,
+) -> list[Any]:
     @tool
     def common_sense_consistency_probe() -> str:
         """Analyze full-image common-sense consistency (anatomy, text, physics, geometry, lighting, reflections)."""
         """Use exiftool to extract selected metadata tags and summarize forensic clues."""
         return _json_dumps(_metadata_report(image, image_path=image_path))
+    @tool
+    def run_pretrained_hf_detectors() -> str:
+        """Run two pretrained Hugging Face AI-image detectors and return their predictions as auxiliary forensic signals."""
+        if isinstance(pretrained_hf_signal, dict):
+            return _json_dumps(pretrained_hf_signal)
+        return _json_dumps(_run_pretrained_hf_detectors(image=image))
     @tool
     def run_low_level_forensics() -> str:
         """Run low-level image heuristics such as texture smoothness, saturation, clipping, and ELA artifacts."""
     return [
         common_sense_consistency_probe,
         inspect_image_metadata,
+        run_pretrained_hf_detectors,
         run_low_level_forensics,
         vision_specialist_probe,
     ]
+def _build_graph(
+    image: Image.Image,
+    image_path: str | None = None,
+    pretrained_hf_signal: dict[str, Any] | None = None,
+):
+    tools = _build_tools(
+        image,
+        image_path=image_path,
+        pretrained_hf_signal=pretrained_hf_signal,
+    )
     llm = _build_langchain_model()
     llm_with_tools = llm.bind_tools(tools)
     tool_node = ToolNode(tools, handle_tool_errors=True)
         if cached is not None:
             return dict(cached)
+    pretrained_hf_signal: dict[str, Any] | None = None
+    if FAST_SCREENING_ENABLED:
+        try:
+            pretrained_hf_signal = _run_pretrained_hf_detectors(image=image)
+        except Exception:
+            pretrained_hf_signal = None
+        try:
+            fast = _run_fast_screening(
+                image=image,
+                pretrained_signal=pretrained_hf_signal,
+            )
+            threshold = FAST_SCREENING_CONFIDENCE_THRESHOLD
+            fast_assessment = str(fast.get("assessment", "UNSURE")).upper()
+            fast_confidence = _normalize_final_confidence(fast.get("confidence"))
+            if fast_assessment in {"REAL", "AI_GENERATED"} and fast_confidence >= threshold:
+                detector_hint_text = ""
+                if isinstance(pretrained_hf_signal, dict):
+                    aggregate = pretrained_hf_signal.get("aggregate", {})
+                    if isinstance(aggregate, dict):
+                        detector_hint_text = (
+                            f" Auxiliary pretrained-detector hint="
+                            f"{aggregate.get('overall_hint', 'UNCERTAIN')} "
+                            f"(confidence={aggregate.get('confidence', 0)})."
+                        )
+                result = {
+                    "classification": "Real" if fast_assessment == "REAL" else "Fake",
+                    "confidence": fast_confidence,
+                    "summary": _normalize_non_empty_text(
+                        f"{fast.get('summary', '')}{detector_hint_text}".strip(),
+                        "Fast screening reached a decisive result.",
+                    ),
+                }
+                if cache_enabled and cache_key:
+                    try:
+                        _save_cached_result(cache_key=cache_key, result=result)
+                    except Exception:
+                        pass
+                return result
+        except Exception:
+            pass
+    graph = _build_graph(
+        image,
+        image_path=image_path,
+        pretrained_hf_signal=pretrained_hf_signal,
+    )
     initial_message = HumanMessage(
         content=(
             "Analyze the uploaded image with the available tools. "
     result = {
         "classification": _normalize_final_classification(verdict.classification),
         "confidence": _normalize_final_confidence(verdict.confidence),
+        "summary": _normalize_non_empty_text(
+            verdict.summary,
+            "Insufficient evidence for a detailed summary.",
+        ),
     }
     if cache_enabled and cache_key:
         try:

requirements.txt CHANGED Viewed

@@ -6,3 +6,5 @@ numpy>=1.26.0
 openai>=1.40.0
 Pillow>=10.0.0
 pydantic>=2.7.0

 openai>=1.40.0
 Pillow>=10.0.0
 pydantic>=2.7.0
+torch>=2.3.0
+transformers>=4.44.0