ar07xd commited on
Commit
59dd371
·
verified ·
1 Parent(s): ca6ba6b

Sync from GitHub via hub-sync

Browse files
Dockerfile CHANGED
@@ -42,6 +42,9 @@ RUN pip install --no-cache-dir \
42
  # Install everything else
43
  RUN pip install --no-cache-dir -r requirements.txt
44
 
 
 
 
45
  # ── App code ──────────────────────────────────────────────────────────────────
46
  COPY . .
47
 
 
42
  # Install everything else
43
  RUN pip install --no-cache-dir -r requirements.txt
44
 
45
+ # Download spaCy English model
46
+ RUN python -m spacy download en_core_web_sm
47
+
48
  # ── App code ──────────────────────────────────────────────────────────────────
49
  COPY . .
50
 
api/v1/analyze.py CHANGED
@@ -92,7 +92,7 @@ def _compute_llm_summary(resp, *, record_id: int, user, media_kind: str, exclude
92
  """Generate the LLM summary for `resp`. Swallows provider errors gracefully."""
93
  try:
94
  payload = resp.model_dump(exclude=exclude) if exclude else resp.model_dump()
95
- return generate_llm_summary(payload=payload, record_id=str(record_id))
96
  except Exception as e: # noqa: BLE001
97
  logger.warning(f"LLM explainer failed for {media_kind}: {e}")
98
  return None
@@ -572,6 +572,7 @@ async def analyze_text_endpoint(
572
  model_label=clf.label,
573
  ),
574
  explainability=TextExplainability(
 
575
  fake_probability=effective_fake_prob,
576
  top_label=clf.label,
577
  all_scores=clf.all_scores,
 
92
  """Generate the LLM summary for `resp`. Swallows provider errors gracefully."""
93
  try:
94
  payload = resp.model_dump(exclude=exclude) if exclude else resp.model_dump()
95
+ return generate_llm_summary(payload=payload, record_id=str(record_id), media_kind=media_kind)
96
  except Exception as e: # noqa: BLE001
97
  logger.warning(f"LLM explainer failed for {media_kind}: {e}")
98
  return None
 
572
  model_label=clf.label,
573
  ),
574
  explainability=TextExplainability(
575
+ original_text=body.text,
576
  fake_probability=effective_fake_prob,
577
  top_label=clf.label,
578
  all_scores=clf.all_scores,
api/v1/history.py CHANGED
@@ -21,6 +21,7 @@ class HistoryItem(BaseModel):
21
  authenticity_score: float
22
  created_at: datetime
23
  thumbnail_url: str | None = None
 
24
 
25
 
26
  class HistoryListResponse(BaseModel):
@@ -46,6 +47,7 @@ def list_history(
46
  authenticity_score=r.authenticity_score,
47
  created_at=r.created_at,
48
  thumbnail_url=r.thumbnail_url,
 
49
  )
50
  for r in rows
51
  ]
 
21
  authenticity_score: float
22
  created_at: datetime
23
  thumbnail_url: str | None = None
24
+ media_path: str | None = None
25
 
26
 
27
  class HistoryListResponse(BaseModel):
 
47
  authenticity_score=r.authenticity_score,
48
  created_at=r.created_at,
49
  thumbnail_url=r.thumbnail_url,
50
+ media_path=r.media_path,
51
  )
52
  for r in rows
53
  ]
api/v1/report.py CHANGED
@@ -40,6 +40,7 @@ def _assert_record_access(record: AnalysisRecord, user: User | None, token: str
40
  def generate(
41
  request: Request,
42
  analysis_id: int,
 
43
  token: str | None = Query(None),
44
  db: Session = Depends(get_db),
45
  user: User | None = Depends(optional_current_user),
@@ -79,6 +80,7 @@ def generate(
79
  def download(
80
  request: Request,
81
  analysis_id: int,
 
82
  token: str | None = Query(None),
83
  db: Session = Depends(get_db),
84
  user: User | None = Depends(optional_current_user),
 
40
  def generate(
41
  request: Request,
42
  analysis_id: int,
43
+ response: Response,
44
  token: str | None = Query(None),
45
  db: Session = Depends(get_db),
46
  user: User | None = Depends(optional_current_user),
 
80
  def download(
81
  request: Request,
82
  analysis_id: int,
83
+ response: Response,
84
  token: str | None = Query(None),
85
  db: Session = Depends(get_db),
86
  user: User | None = Depends(optional_current_user),
schemas/analyze.py CHANGED
@@ -37,6 +37,7 @@ class ManipulationIndicatorOut(BaseModel):
37
 
38
 
39
  class TextExplainability(BaseModel):
 
40
  fake_probability: float
41
  top_label: str
42
  all_scores: dict = {}
 
37
 
38
 
39
  class TextExplainability(BaseModel):
40
+ original_text: str = ""
41
  fake_probability: float
42
  top_label: str
43
  all_scores: dict = {}
schemas/common.py CHANGED
@@ -56,6 +56,8 @@ class ExifSummary(BaseModel):
56
  gps_info: Optional[str] = None
57
  software: Optional[str] = None
58
  lens_model: Optional[str] = None
 
 
59
  trust_adjustment: int = 0 # negative = more real, positive = more fake
60
  trust_reason: str = ""
61
 
 
56
  gps_info: Optional[str] = None
57
  software: Optional[str] = None
58
  lens_model: Optional[str] = None
59
+ icc_profile: Optional[bool] = False
60
+ maker_note: Optional[bool] = False
61
  trust_adjustment: int = 0 # negative = more real, positive = more fake
62
  trust_reason: str = ""
63
 
services/exif_service.py CHANGED
@@ -76,6 +76,9 @@ def extract_exif(pil_img: Image.Image, raw_bytes: bytes) -> ExifSummary:
76
  summary.datetime_original = str(tags.get("EXIF DateTimeOriginal", "")).strip() or None
77
  summary.software = str(tags.get("Image Software", "")).strip() or None
78
  summary.lens_model = str(tags.get("EXIF LensModel", "")).strip() or None
 
 
 
79
  except ImportError:
80
  logger.debug("exifread not installed, skipping fallback EXIF extraction")
81
  except Exception as e:
@@ -93,6 +96,9 @@ def extract_exif(pil_img: Image.Image, raw_bytes: bytes) -> ExifSummary:
93
  summary.software = str(decoded.get("Software", "")).strip() or None
94
  summary.lens_model = str(decoded.get("LensModel", "")).strip() or None
95
 
 
 
 
96
  # GPS
97
  gps_raw = decoded.get("GPSInfo")
98
  if gps_raw and isinstance(gps_raw, dict):
@@ -108,7 +114,11 @@ def extract_exif(pil_img: Image.Image, raw_bytes: bytes) -> ExifSummary:
108
  has_camera_meta = summary.make and summary.model and summary.datetime_original
109
  if has_camera_meta:
110
  adjustment -= 8
111
- reasons.append("valid camera metadata (Make/Model/DateTime)")
 
 
 
 
112
 
113
  if summary.gps_info:
114
  adjustment -= 2
 
76
  summary.datetime_original = str(tags.get("EXIF DateTimeOriginal", "")).strip() or None
77
  summary.software = str(tags.get("Image Software", "")).strip() or None
78
  summary.lens_model = str(tags.get("EXIF LensModel", "")).strip() or None
79
+
80
+ summary.icc_profile = bool(pil_img.info.get("icc_profile"))
81
+ summary.maker_note = bool(tags.get("EXIF MakerNote"))
82
  except ImportError:
83
  logger.debug("exifread not installed, skipping fallback EXIF extraction")
84
  except Exception as e:
 
96
  summary.software = str(decoded.get("Software", "")).strip() or None
97
  summary.lens_model = str(decoded.get("LensModel", "")).strip() or None
98
 
99
+ summary.icc_profile = bool(pil_img.info.get("icc_profile"))
100
+ summary.maker_note = bool(decoded.get("MakerNote"))
101
+
102
  # GPS
103
  gps_raw = decoded.get("GPSInfo")
104
  if gps_raw and isinstance(gps_raw, dict):
 
114
  has_camera_meta = summary.make and summary.model and summary.datetime_original
115
  if has_camera_meta:
116
  adjustment -= 8
117
+ reasons.append("valid camera metadata")
118
+
119
+ if summary.maker_note:
120
+ adjustment -= 10
121
+ reasons.append("proprietary MakerNote present")
122
 
123
  if summary.gps_info:
124
  adjustment -= 2
services/llm_explainer.py CHANGED
@@ -52,6 +52,9 @@ _PROMPT_TEMPLATE = """\
52
  You are DeepShield's explainability engine. Given the JSON analysis payload below,
53
  write a concise, accessible summary for a non-technical user.
54
 
 
 
 
55
  **Output format (strict JSON only — no markdown fences):**
56
  {{
57
  "paragraph": "<2-3 sentence plain-English summary of the verdict and key signals>",
@@ -63,10 +66,12 @@ write a concise, accessible summary for a non-technical user.
63
  }}
64
 
65
  Rules:
66
- - Be factual. State what the analysis found, not what you speculate.
67
- - Reference specific indicators (e.g. "GAN artifact score", "EXIF metadata", "sensationalism level").
68
- - If the verdict is "Likely Authentic", reassure the user and explain why.
69
- - If the verdict is "Likely Manipulated" or "Suspicious", highlight the strongest evidence.
 
 
70
  - Keep the paragraph under 60 words. Each bullet under 20 words.
71
 
72
  **Analysis payload:**
@@ -211,6 +216,7 @@ def _parse_llm_response(raw: str) -> tuple[str, list[str]]:
211
  def generate_llm_summary(
212
  payload: dict[str, Any],
213
  record_id: str | None = None,
 
214
  ) -> LLMExplainabilitySummary:
215
  """Generate an LLM-powered plain-English explanation for an analysis result.
216
 
@@ -248,7 +254,7 @@ def generate_llm_summary(
248
  slim_payload["explainability"] = expl
249
 
250
  prompt_body = json.dumps(slim_payload, indent=2, default=str, sort_keys=True)
251
- prompt = _PROMPT_TEMPLATE.format(payload_json=prompt_body)
252
 
253
  # Content-hash cache — dedups "same analysis re-run" across users / record_ids
254
  content_hash = hashlib.sha256(
 
52
  You are DeepShield's explainability engine. Given the JSON analysis payload below,
53
  write a concise, accessible summary for a non-technical user.
54
 
55
+ This analysis is for a {media_kind}. Please customize the summary terminology to fit this domain
56
+ (e.g., mention wording/tone/heuristics for text, visuals/pixels/metadata for images, frames/motion for video, audio anomalies/frequencies for audio).
57
+
58
  **Output format (strict JSON only — no markdown fences):**
59
  {{
60
  "paragraph": "<2-3 sentence plain-English summary of the verdict and key signals>",
 
66
  }}
67
 
68
  Rules:
69
+ - Be strictly factual. Do NOT hallucinate content or describe the image based on assumptions. Only state what the analysis payload found.
70
+ - If the image contains text (e.g. from OCR), quote it accurately but do NOT assume it applies to the entire image unless relevant.
71
+ - Reference specific technical indicators from the payload (e.g. "GAN artifact score", "EXIF metadata", "sensationalism level").
72
+ - Avoid generic phrases like "The image itself explicitly labels...". Instead, point out specific visual anomalies or text anomalies detected by the models.
73
+ - If the verdict is "Likely Authentic", reassure the user based on the lack of artifacts and strong metadata.
74
+ - If the verdict is "Likely Manipulated" or "Suspicious", highlight the strongest evidence (e.g., specific artifacts, low metadata trust, high model confidence).
75
  - Keep the paragraph under 60 words. Each bullet under 20 words.
76
 
77
  **Analysis payload:**
 
216
  def generate_llm_summary(
217
  payload: dict[str, Any],
218
  record_id: str | None = None,
219
+ media_kind: str = "media",
220
  ) -> LLMExplainabilitySummary:
221
  """Generate an LLM-powered plain-English explanation for an analysis result.
222
 
 
254
  slim_payload["explainability"] = expl
255
 
256
  prompt_body = json.dumps(slim_payload, indent=2, default=str, sort_keys=True)
257
+ prompt = _PROMPT_TEMPLATE.format(media_kind=media_kind, payload_json=prompt_body)
258
 
259
  # Content-hash cache — dedups "same analysis re-run" across users / record_ids
260
  content_hash = hashlib.sha256(