Spaces:
Runtime error
Runtime error
Sync from GitHub via hub-sync
Browse files- Dockerfile +3 -0
- api/v1/analyze.py +2 -1
- api/v1/history.py +2 -0
- api/v1/report.py +2 -0
- schemas/analyze.py +1 -0
- schemas/common.py +2 -0
- services/exif_service.py +11 -1
- services/llm_explainer.py +11 -5
Dockerfile
CHANGED
|
@@ -42,6 +42,9 @@ RUN pip install --no-cache-dir \
|
|
| 42 |
# Install everything else
|
| 43 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 44 |
|
|
|
|
|
|
|
|
|
|
| 45 |
# ── App code ──────────────────────────────────────────────────────────────────
|
| 46 |
COPY . .
|
| 47 |
|
|
|
|
| 42 |
# Install everything else
|
| 43 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 44 |
|
| 45 |
+
# Download spaCy English model
|
| 46 |
+
RUN python -m spacy download en_core_web_sm
|
| 47 |
+
|
| 48 |
# ── App code ──────────────────────────────────────────────────────────────────
|
| 49 |
COPY . .
|
| 50 |
|
api/v1/analyze.py
CHANGED
|
@@ -92,7 +92,7 @@ def _compute_llm_summary(resp, *, record_id: int, user, media_kind: str, exclude
|
|
| 92 |
"""Generate the LLM summary for `resp`. Swallows provider errors gracefully."""
|
| 93 |
try:
|
| 94 |
payload = resp.model_dump(exclude=exclude) if exclude else resp.model_dump()
|
| 95 |
-
return generate_llm_summary(payload=payload, record_id=str(record_id))
|
| 96 |
except Exception as e: # noqa: BLE001
|
| 97 |
logger.warning(f"LLM explainer failed for {media_kind}: {e}")
|
| 98 |
return None
|
|
@@ -572,6 +572,7 @@ async def analyze_text_endpoint(
|
|
| 572 |
model_label=clf.label,
|
| 573 |
),
|
| 574 |
explainability=TextExplainability(
|
|
|
|
| 575 |
fake_probability=effective_fake_prob,
|
| 576 |
top_label=clf.label,
|
| 577 |
all_scores=clf.all_scores,
|
|
|
|
| 92 |
"""Generate the LLM summary for `resp`. Swallows provider errors gracefully."""
|
| 93 |
try:
|
| 94 |
payload = resp.model_dump(exclude=exclude) if exclude else resp.model_dump()
|
| 95 |
+
return generate_llm_summary(payload=payload, record_id=str(record_id), media_kind=media_kind)
|
| 96 |
except Exception as e: # noqa: BLE001
|
| 97 |
logger.warning(f"LLM explainer failed for {media_kind}: {e}")
|
| 98 |
return None
|
|
|
|
| 572 |
model_label=clf.label,
|
| 573 |
),
|
| 574 |
explainability=TextExplainability(
|
| 575 |
+
original_text=body.text,
|
| 576 |
fake_probability=effective_fake_prob,
|
| 577 |
top_label=clf.label,
|
| 578 |
all_scores=clf.all_scores,
|
api/v1/history.py
CHANGED
|
@@ -21,6 +21,7 @@ class HistoryItem(BaseModel):
|
|
| 21 |
authenticity_score: float
|
| 22 |
created_at: datetime
|
| 23 |
thumbnail_url: str | None = None
|
|
|
|
| 24 |
|
| 25 |
|
| 26 |
class HistoryListResponse(BaseModel):
|
|
@@ -46,6 +47,7 @@ def list_history(
|
|
| 46 |
authenticity_score=r.authenticity_score,
|
| 47 |
created_at=r.created_at,
|
| 48 |
thumbnail_url=r.thumbnail_url,
|
|
|
|
| 49 |
)
|
| 50 |
for r in rows
|
| 51 |
]
|
|
|
|
| 21 |
authenticity_score: float
|
| 22 |
created_at: datetime
|
| 23 |
thumbnail_url: str | None = None
|
| 24 |
+
media_path: str | None = None
|
| 25 |
|
| 26 |
|
| 27 |
class HistoryListResponse(BaseModel):
|
|
|
|
| 47 |
authenticity_score=r.authenticity_score,
|
| 48 |
created_at=r.created_at,
|
| 49 |
thumbnail_url=r.thumbnail_url,
|
| 50 |
+
media_path=r.media_path,
|
| 51 |
)
|
| 52 |
for r in rows
|
| 53 |
]
|
api/v1/report.py
CHANGED
|
@@ -40,6 +40,7 @@ def _assert_record_access(record: AnalysisRecord, user: User | None, token: str
|
|
| 40 |
def generate(
|
| 41 |
request: Request,
|
| 42 |
analysis_id: int,
|
|
|
|
| 43 |
token: str | None = Query(None),
|
| 44 |
db: Session = Depends(get_db),
|
| 45 |
user: User | None = Depends(optional_current_user),
|
|
@@ -79,6 +80,7 @@ def generate(
|
|
| 79 |
def download(
|
| 80 |
request: Request,
|
| 81 |
analysis_id: int,
|
|
|
|
| 82 |
token: str | None = Query(None),
|
| 83 |
db: Session = Depends(get_db),
|
| 84 |
user: User | None = Depends(optional_current_user),
|
|
|
|
| 40 |
def generate(
|
| 41 |
request: Request,
|
| 42 |
analysis_id: int,
|
| 43 |
+
response: Response,
|
| 44 |
token: str | None = Query(None),
|
| 45 |
db: Session = Depends(get_db),
|
| 46 |
user: User | None = Depends(optional_current_user),
|
|
|
|
| 80 |
def download(
|
| 81 |
request: Request,
|
| 82 |
analysis_id: int,
|
| 83 |
+
response: Response,
|
| 84 |
token: str | None = Query(None),
|
| 85 |
db: Session = Depends(get_db),
|
| 86 |
user: User | None = Depends(optional_current_user),
|
schemas/analyze.py
CHANGED
|
@@ -37,6 +37,7 @@ class ManipulationIndicatorOut(BaseModel):
|
|
| 37 |
|
| 38 |
|
| 39 |
class TextExplainability(BaseModel):
|
|
|
|
| 40 |
fake_probability: float
|
| 41 |
top_label: str
|
| 42 |
all_scores: dict = {}
|
|
|
|
| 37 |
|
| 38 |
|
| 39 |
class TextExplainability(BaseModel):
|
| 40 |
+
original_text: str = ""
|
| 41 |
fake_probability: float
|
| 42 |
top_label: str
|
| 43 |
all_scores: dict = {}
|
schemas/common.py
CHANGED
|
@@ -56,6 +56,8 @@ class ExifSummary(BaseModel):
|
|
| 56 |
gps_info: Optional[str] = None
|
| 57 |
software: Optional[str] = None
|
| 58 |
lens_model: Optional[str] = None
|
|
|
|
|
|
|
| 59 |
trust_adjustment: int = 0 # negative = more real, positive = more fake
|
| 60 |
trust_reason: str = ""
|
| 61 |
|
|
|
|
| 56 |
gps_info: Optional[str] = None
|
| 57 |
software: Optional[str] = None
|
| 58 |
lens_model: Optional[str] = None
|
| 59 |
+
icc_profile: Optional[bool] = False
|
| 60 |
+
maker_note: Optional[bool] = False
|
| 61 |
trust_adjustment: int = 0 # negative = more real, positive = more fake
|
| 62 |
trust_reason: str = ""
|
| 63 |
|
services/exif_service.py
CHANGED
|
@@ -76,6 +76,9 @@ def extract_exif(pil_img: Image.Image, raw_bytes: bytes) -> ExifSummary:
|
|
| 76 |
summary.datetime_original = str(tags.get("EXIF DateTimeOriginal", "")).strip() or None
|
| 77 |
summary.software = str(tags.get("Image Software", "")).strip() or None
|
| 78 |
summary.lens_model = str(tags.get("EXIF LensModel", "")).strip() or None
|
|
|
|
|
|
|
|
|
|
| 79 |
except ImportError:
|
| 80 |
logger.debug("exifread not installed, skipping fallback EXIF extraction")
|
| 81 |
except Exception as e:
|
|
@@ -93,6 +96,9 @@ def extract_exif(pil_img: Image.Image, raw_bytes: bytes) -> ExifSummary:
|
|
| 93 |
summary.software = str(decoded.get("Software", "")).strip() or None
|
| 94 |
summary.lens_model = str(decoded.get("LensModel", "")).strip() or None
|
| 95 |
|
|
|
|
|
|
|
|
|
|
| 96 |
# GPS
|
| 97 |
gps_raw = decoded.get("GPSInfo")
|
| 98 |
if gps_raw and isinstance(gps_raw, dict):
|
|
@@ -108,7 +114,11 @@ def extract_exif(pil_img: Image.Image, raw_bytes: bytes) -> ExifSummary:
|
|
| 108 |
has_camera_meta = summary.make and summary.model and summary.datetime_original
|
| 109 |
if has_camera_meta:
|
| 110 |
adjustment -= 8
|
| 111 |
-
reasons.append("valid camera metadata
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
if summary.gps_info:
|
| 114 |
adjustment -= 2
|
|
|
|
| 76 |
summary.datetime_original = str(tags.get("EXIF DateTimeOriginal", "")).strip() or None
|
| 77 |
summary.software = str(tags.get("Image Software", "")).strip() or None
|
| 78 |
summary.lens_model = str(tags.get("EXIF LensModel", "")).strip() or None
|
| 79 |
+
|
| 80 |
+
summary.icc_profile = bool(pil_img.info.get("icc_profile"))
|
| 81 |
+
summary.maker_note = bool(tags.get("EXIF MakerNote"))
|
| 82 |
except ImportError:
|
| 83 |
logger.debug("exifread not installed, skipping fallback EXIF extraction")
|
| 84 |
except Exception as e:
|
|
|
|
| 96 |
summary.software = str(decoded.get("Software", "")).strip() or None
|
| 97 |
summary.lens_model = str(decoded.get("LensModel", "")).strip() or None
|
| 98 |
|
| 99 |
+
summary.icc_profile = bool(pil_img.info.get("icc_profile"))
|
| 100 |
+
summary.maker_note = bool(decoded.get("MakerNote"))
|
| 101 |
+
|
| 102 |
# GPS
|
| 103 |
gps_raw = decoded.get("GPSInfo")
|
| 104 |
if gps_raw and isinstance(gps_raw, dict):
|
|
|
|
| 114 |
has_camera_meta = summary.make and summary.model and summary.datetime_original
|
| 115 |
if has_camera_meta:
|
| 116 |
adjustment -= 8
|
| 117 |
+
reasons.append("valid camera metadata")
|
| 118 |
+
|
| 119 |
+
if summary.maker_note:
|
| 120 |
+
adjustment -= 10
|
| 121 |
+
reasons.append("proprietary MakerNote present")
|
| 122 |
|
| 123 |
if summary.gps_info:
|
| 124 |
adjustment -= 2
|
services/llm_explainer.py
CHANGED
|
@@ -52,6 +52,9 @@ _PROMPT_TEMPLATE = """\
|
|
| 52 |
You are DeepShield's explainability engine. Given the JSON analysis payload below,
|
| 53 |
write a concise, accessible summary for a non-technical user.
|
| 54 |
|
|
|
|
|
|
|
|
|
|
| 55 |
**Output format (strict JSON only — no markdown fences):**
|
| 56 |
{{
|
| 57 |
"paragraph": "<2-3 sentence plain-English summary of the verdict and key signals>",
|
|
@@ -63,10 +66,12 @@ write a concise, accessible summary for a non-technical user.
|
|
| 63 |
}}
|
| 64 |
|
| 65 |
Rules:
|
| 66 |
-
- Be factual.
|
| 67 |
-
-
|
| 68 |
-
-
|
| 69 |
-
-
|
|
|
|
|
|
|
| 70 |
- Keep the paragraph under 60 words. Each bullet under 20 words.
|
| 71 |
|
| 72 |
**Analysis payload:**
|
|
@@ -211,6 +216,7 @@ def _parse_llm_response(raw: str) -> tuple[str, list[str]]:
|
|
| 211 |
def generate_llm_summary(
|
| 212 |
payload: dict[str, Any],
|
| 213 |
record_id: str | None = None,
|
|
|
|
| 214 |
) -> LLMExplainabilitySummary:
|
| 215 |
"""Generate an LLM-powered plain-English explanation for an analysis result.
|
| 216 |
|
|
@@ -248,7 +254,7 @@ def generate_llm_summary(
|
|
| 248 |
slim_payload["explainability"] = expl
|
| 249 |
|
| 250 |
prompt_body = json.dumps(slim_payload, indent=2, default=str, sort_keys=True)
|
| 251 |
-
prompt = _PROMPT_TEMPLATE.format(payload_json=prompt_body)
|
| 252 |
|
| 253 |
# Content-hash cache — dedups "same analysis re-run" across users / record_ids
|
| 254 |
content_hash = hashlib.sha256(
|
|
|
|
| 52 |
You are DeepShield's explainability engine. Given the JSON analysis payload below,
|
| 53 |
write a concise, accessible summary for a non-technical user.
|
| 54 |
|
| 55 |
+
This analysis is for a {media_kind}. Please customize the summary terminology to fit this domain
|
| 56 |
+
(e.g., mention wording/tone/heuristics for text, visuals/pixels/metadata for images, frames/motion for video, audio anomalies/frequencies for audio).
|
| 57 |
+
|
| 58 |
**Output format (strict JSON only — no markdown fences):**
|
| 59 |
{{
|
| 60 |
"paragraph": "<2-3 sentence plain-English summary of the verdict and key signals>",
|
|
|
|
| 66 |
}}
|
| 67 |
|
| 68 |
Rules:
|
| 69 |
+
- Be strictly factual. Do NOT hallucinate content or describe the image based on assumptions. Only state what the analysis payload found.
|
| 70 |
+
- If the image contains text (e.g. from OCR), quote it accurately but do NOT assume it applies to the entire image unless relevant.
|
| 71 |
+
- Reference specific technical indicators from the payload (e.g. "GAN artifact score", "EXIF metadata", "sensationalism level").
|
| 72 |
+
- Avoid generic phrases like "The image itself explicitly labels...". Instead, point out specific visual anomalies or text anomalies detected by the models.
|
| 73 |
+
- If the verdict is "Likely Authentic", reassure the user based on the lack of artifacts and strong metadata.
|
| 74 |
+
- If the verdict is "Likely Manipulated" or "Suspicious", highlight the strongest evidence (e.g., specific artifacts, low metadata trust, high model confidence).
|
| 75 |
- Keep the paragraph under 60 words. Each bullet under 20 words.
|
| 76 |
|
| 77 |
**Analysis payload:**
|
|
|
|
| 216 |
def generate_llm_summary(
|
| 217 |
payload: dict[str, Any],
|
| 218 |
record_id: str | None = None,
|
| 219 |
+
media_kind: str = "media",
|
| 220 |
) -> LLMExplainabilitySummary:
|
| 221 |
"""Generate an LLM-powered plain-English explanation for an analysis result.
|
| 222 |
|
|
|
|
| 254 |
slim_payload["explainability"] = expl
|
| 255 |
|
| 256 |
prompt_body = json.dumps(slim_payload, indent=2, default=str, sort_keys=True)
|
| 257 |
+
prompt = _PROMPT_TEMPLATE.format(media_kind=media_kind, payload_json=prompt_body)
|
| 258 |
|
| 259 |
# Content-hash cache — dedups "same analysis re-run" across users / record_ids
|
| 260 |
content_hash = hashlib.sha256(
|