Spaces:

ar07xd
/

deepshield

Runtime error

App Files Files Community

ar07xd commited on Apr 29

Commit

59dd371

verified ·

1 Parent(s): ca6ba6b

Sync from GitHub via hub-sync

Browse files

Files changed (8) hide show

Dockerfile +3 -0
api/v1/analyze.py +2 -1
api/v1/history.py +2 -0
api/v1/report.py +2 -0
schemas/analyze.py +1 -0
schemas/common.py +2 -0
services/exif_service.py +11 -1
services/llm_explainer.py +11 -5

Dockerfile CHANGED Viewed

@@ -42,6 +42,9 @@ RUN pip install --no-cache-dir \
 # Install everything else
 RUN pip install --no-cache-dir -r requirements.txt
 # ── App code ──────────────────────────────────────────────────────────────────
 COPY . .

 # Install everything else
 RUN pip install --no-cache-dir -r requirements.txt
+# Download spaCy English model
+RUN python -m spacy download en_core_web_sm
 # ── App code ──────────────────────────────────────────────────────────────────
 COPY . .

api/v1/analyze.py CHANGED Viewed

@@ -92,7 +92,7 @@ def _compute_llm_summary(resp, *, record_id: int, user, media_kind: str, exclude
     """Generate the LLM summary for `resp`. Swallows provider errors gracefully."""
     try:
         payload = resp.model_dump(exclude=exclude) if exclude else resp.model_dump()
-        return generate_llm_summary(payload=payload, record_id=str(record_id))
     except Exception as e:  # noqa: BLE001
         logger.warning(f"LLM explainer failed for {media_kind}: {e}")
         return None
@@ -572,6 +572,7 @@ async def analyze_text_endpoint(
             model_label=clf.label,
         ),
         explainability=TextExplainability(
             fake_probability=effective_fake_prob,
             top_label=clf.label,
             all_scores=clf.all_scores,

     """Generate the LLM summary for `resp`. Swallows provider errors gracefully."""
     try:
         payload = resp.model_dump(exclude=exclude) if exclude else resp.model_dump()
+        return generate_llm_summary(payload=payload, record_id=str(record_id), media_kind=media_kind)
     except Exception as e:  # noqa: BLE001
         logger.warning(f"LLM explainer failed for {media_kind}: {e}")
         return None
             model_label=clf.label,
         ),
         explainability=TextExplainability(
+            original_text=body.text,
             fake_probability=effective_fake_prob,
             top_label=clf.label,
             all_scores=clf.all_scores,

api/v1/history.py CHANGED Viewed

@@ -21,6 +21,7 @@ class HistoryItem(BaseModel):
     authenticity_score: float
     created_at: datetime
     thumbnail_url: str | None = None
 class HistoryListResponse(BaseModel):
@@ -46,6 +47,7 @@ def list_history(
             authenticity_score=r.authenticity_score,
             created_at=r.created_at,
             thumbnail_url=r.thumbnail_url,
         )
         for r in rows
     ]

     authenticity_score: float
     created_at: datetime
     thumbnail_url: str | None = None
+    media_path: str | None = None
 class HistoryListResponse(BaseModel):
             authenticity_score=r.authenticity_score,
             created_at=r.created_at,
             thumbnail_url=r.thumbnail_url,
+            media_path=r.media_path,
         )
         for r in rows
     ]

api/v1/report.py CHANGED Viewed

@@ -40,6 +40,7 @@ def _assert_record_access(record: AnalysisRecord, user: User | None, token: str
 def generate(
     request: Request,
     analysis_id: int,
     token: str | None = Query(None),
     db: Session = Depends(get_db),
     user: User | None = Depends(optional_current_user),
@@ -79,6 +80,7 @@ def generate(
 def download(
     request: Request,
     analysis_id: int,
     token: str | None = Query(None),
     db: Session = Depends(get_db),
     user: User | None = Depends(optional_current_user),

 def generate(
     request: Request,
     analysis_id: int,
+    response: Response,
     token: str | None = Query(None),
     db: Session = Depends(get_db),
     user: User | None = Depends(optional_current_user),
 def download(
     request: Request,
     analysis_id: int,
+    response: Response,
     token: str | None = Query(None),
     db: Session = Depends(get_db),
     user: User | None = Depends(optional_current_user),

schemas/analyze.py CHANGED Viewed

@@ -37,6 +37,7 @@ class ManipulationIndicatorOut(BaseModel):
 class TextExplainability(BaseModel):
     fake_probability: float
     top_label: str
     all_scores: dict = {}

 class TextExplainability(BaseModel):
+    original_text: str = ""
     fake_probability: float
     top_label: str
     all_scores: dict = {}

schemas/common.py CHANGED Viewed

@@ -56,6 +56,8 @@ class ExifSummary(BaseModel):
     gps_info: Optional[str] = None
     software: Optional[str] = None
     lens_model: Optional[str] = None
     trust_adjustment: int = 0  # negative = more real, positive = more fake
     trust_reason: str = ""

     gps_info: Optional[str] = None
     software: Optional[str] = None
     lens_model: Optional[str] = None
+    icc_profile: Optional[bool] = False
+    maker_note: Optional[bool] = False
     trust_adjustment: int = 0  # negative = more real, positive = more fake
     trust_reason: str = ""

services/exif_service.py CHANGED Viewed

@@ -76,6 +76,9 @@ def extract_exif(pil_img: Image.Image, raw_bytes: bytes) -> ExifSummary:
                 summary.datetime_original = str(tags.get("EXIF DateTimeOriginal", "")).strip() or None
                 summary.software = str(tags.get("Image Software", "")).strip() or None
                 summary.lens_model = str(tags.get("EXIF LensModel", "")).strip() or None
         except ImportError:
             logger.debug("exifread not installed, skipping fallback EXIF extraction")
         except Exception as e:
@@ -93,6 +96,9 @@ def extract_exif(pil_img: Image.Image, raw_bytes: bytes) -> ExifSummary:
         summary.software = str(decoded.get("Software", "")).strip() or None
         summary.lens_model = str(decoded.get("LensModel", "")).strip() or None
         # GPS
         gps_raw = decoded.get("GPSInfo")
         if gps_raw and isinstance(gps_raw, dict):
@@ -108,7 +114,11 @@ def extract_exif(pil_img: Image.Image, raw_bytes: bytes) -> ExifSummary:
     has_camera_meta = summary.make and summary.model and summary.datetime_original
     if has_camera_meta:
         adjustment -= 8
-        reasons.append("valid camera metadata (Make/Model/DateTime)")
     if summary.gps_info:
         adjustment -= 2

                 summary.datetime_original = str(tags.get("EXIF DateTimeOriginal", "")).strip() or None
                 summary.software = str(tags.get("Image Software", "")).strip() or None
                 summary.lens_model = str(tags.get("EXIF LensModel", "")).strip() or None
+                summary.icc_profile = bool(pil_img.info.get("icc_profile"))
+                summary.maker_note = bool(tags.get("EXIF MakerNote"))
         except ImportError:
             logger.debug("exifread not installed, skipping fallback EXIF extraction")
         except Exception as e:
         summary.software = str(decoded.get("Software", "")).strip() or None
         summary.lens_model = str(decoded.get("LensModel", "")).strip() or None
+        summary.icc_profile = bool(pil_img.info.get("icc_profile"))
+        summary.maker_note = bool(decoded.get("MakerNote"))
         # GPS
         gps_raw = decoded.get("GPSInfo")
         if gps_raw and isinstance(gps_raw, dict):
     has_camera_meta = summary.make and summary.model and summary.datetime_original
     if has_camera_meta:
         adjustment -= 8
+        reasons.append("valid camera metadata")
+    if summary.maker_note:
+        adjustment -= 10
+        reasons.append("proprietary MakerNote present")
     if summary.gps_info:
         adjustment -= 2

services/llm_explainer.py CHANGED Viewed

@@ -52,6 +52,9 @@ _PROMPT_TEMPLATE = """\
 You are DeepShield's explainability engine. Given the JSON analysis payload below,
 write a concise, accessible summary for a non-technical user.
 **Output format (strict JSON only — no markdown fences):**
 {{
   "paragraph": "<2-3 sentence plain-English summary of the verdict and key signals>",
@@ -63,10 +66,12 @@ write a concise, accessible summary for a non-technical user.
 }}
 Rules:
-- Be factual. State what the analysis found, not what you speculate.
-- Reference specific indicators (e.g. "GAN artifact score", "EXIF metadata", "sensationalism level").
-- If the verdict is "Likely Authentic", reassure the user and explain why.
-- If the verdict is "Likely Manipulated" or "Suspicious", highlight the strongest evidence.
 - Keep the paragraph under 60 words. Each bullet under 20 words.
 **Analysis payload:**
@@ -211,6 +216,7 @@ def _parse_llm_response(raw: str) -> tuple[str, list[str]]:
 def generate_llm_summary(
     payload: dict[str, Any],
     record_id: str | None = None,
 ) -> LLMExplainabilitySummary:
     """Generate an LLM-powered plain-English explanation for an analysis result.
@@ -248,7 +254,7 @@ def generate_llm_summary(
         slim_payload["explainability"] = expl
     prompt_body = json.dumps(slim_payload, indent=2, default=str, sort_keys=True)
-    prompt = _PROMPT_TEMPLATE.format(payload_json=prompt_body)
     # Content-hash cache — dedups "same analysis re-run" across users / record_ids
     content_hash = hashlib.sha256(

 You are DeepShield's explainability engine. Given the JSON analysis payload below,
 write a concise, accessible summary for a non-technical user.
+This analysis is for a {media_kind}. Please customize the summary terminology to fit this domain
+(e.g., mention wording/tone/heuristics for text, visuals/pixels/metadata for images, frames/motion for video, audio anomalies/frequencies for audio).
 **Output format (strict JSON only — no markdown fences):**
 {{
   "paragraph": "<2-3 sentence plain-English summary of the verdict and key signals>",
 }}
 Rules:
+- Be strictly factual. Do NOT hallucinate content or describe the image based on assumptions. Only state what the analysis payload found.
+- If the image contains text (e.g. from OCR), quote it accurately but do NOT assume it applies to the entire image unless relevant.
+- Reference specific technical indicators from the payload (e.g. "GAN artifact score", "EXIF metadata", "sensationalism level").
+- Avoid generic phrases like "The image itself explicitly labels...". Instead, point out specific visual anomalies or text anomalies detected by the models.
+- If the verdict is "Likely Authentic", reassure the user based on the lack of artifacts and strong metadata.
+- If the verdict is "Likely Manipulated" or "Suspicious", highlight the strongest evidence (e.g., specific artifacts, low metadata trust, high model confidence).
 - Keep the paragraph under 60 words. Each bullet under 20 words.
 **Analysis payload:**
 def generate_llm_summary(
     payload: dict[str, Any],
     record_id: str | None = None,
+    media_kind: str = "media",
 ) -> LLMExplainabilitySummary:
     """Generate an LLM-powered plain-English explanation for an analysis result.
         slim_payload["explainability"] = expl
     prompt_body = json.dumps(slim_payload, indent=2, default=str, sort_keys=True)
+    prompt = _PROMPT_TEMPLATE.format(media_kind=media_kind, payload_json=prompt_body)
     # Content-hash cache — dedups "same analysis re-run" across users / record_ids
     content_hash = hashlib.sha256(