MaximusCorp commited on
Commit
e09e1c1
·
verified ·
1 Parent(s): 13491ff

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +183 -30
app.py CHANGED
@@ -4,8 +4,10 @@ import asyncio
4
  import gc
5
  import io
6
  import os
 
7
  import threading
8
  import time
 
9
  from collections import defaultdict, deque
10
  from pathlib import Path
11
  from typing import Any, Callable
@@ -38,10 +40,74 @@ NSFW_THRESHOLD = float(os.getenv("NSFW_THRESHOLD", "0.82"))
38
  VIOLENCE_THRESHOLD = float(os.getenv("VIOLENCE_THRESHOLD", "0.80"))
39
  HATE_THRESHOLD = float(os.getenv("HATE_SYMBOL_THRESHOLD", "0.76"))
40
  TOXIC_THRESHOLD = float(os.getenv("TOXIC_TEXT_THRESHOLD", "0.78"))
 
41
  MAX_UPLOAD_BYTES = int(float(os.getenv("MAX_UPLOAD_MB", "8")) * 1024 * 1024)
42
  MAX_IMAGE_PIXELS = 24_000_000
43
  MAX_ANALYSIS_EDGE = 768
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  Image.MAX_IMAGE_PIXELS = MAX_IMAGE_PIXELS
46
  ImageFile.LOAD_TRUNCATED_IMAGES = False
47
 
@@ -138,6 +204,75 @@ def normalized_label(label: str) -> str:
138
  return "".join(ch for ch in label.lower() if ch.isalnum() or ch == "_")
139
 
140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  def run_nsfw(image: Image.Image) -> dict[str, Any]:
142
  classifier = pipeline("image-classification", model=NSFW_MODEL, device=-1)
143
  try:
@@ -178,16 +313,25 @@ def run_violence(image: Image.Image) -> dict[str, Any]:
178
  def run_hateful_symbols(image: Image.Image) -> dict[str, Any]:
179
  processor = CLIPProcessor.from_pretrained(HATE_MODEL)
180
  model = CLIPModel.from_pretrained(HATE_MODEL)
181
- prompts = [
182
- "a harmless ordinary image without extremist symbols",
183
- "an image prominently displaying a hateful extremist or supremacist symbol",
184
- ]
185
  try:
186
- inputs = processor(text=prompts, images=image, return_tensors="pt", padding=True)
 
 
 
187
  with torch.inference_mode():
188
- risky = float(model(**inputs).logits_per_image.softmax(dim=-1)[0][1])
 
 
 
 
 
 
 
 
189
  result = test_result("Hateful-symbol heuristic", risky, HATE_THRESHOLD, HATE_MODEL)
190
- result["warning"] = "Experimental CLIP heuristic; uncertain cases require human review."
 
191
  return result
192
  finally:
193
  release_model(model, processor)
@@ -195,6 +339,12 @@ def run_hateful_symbols(image: Image.Image) -> dict[str, Any]:
195
 
196
  def run_offensive_text(image: Image.Image) -> dict[str, Any]:
197
  text = extract_ocr_text(image)
 
 
 
 
 
 
198
  if not text:
199
  result = test_result("Offensive text", 0.0, TOXIC_THRESHOLD, TOXIC_MODEL)
200
  result["detail"] = "No readable English or French text was found."
@@ -232,24 +382,18 @@ def run_offensive_text(image: Image.Image) -> dict[str, Any]:
232
 
233
  def extract_ocr_text(image: Image.Image) -> str:
234
  candidates: list[Image.Image] = []
235
- base = image.convert("RGB")
236
- candidates.append(base)
237
-
238
- gray = ImageOps.grayscale(base)
239
- candidates.append(gray)
240
-
241
- wide = gray.copy()
242
- wide.thumbnail((1400, 1400), Image.Resampling.LANCZOS)
243
- if wide.width < gray.width:
244
- wide = gray
245
- else:
246
- wide = wide.resize((max(wide.width, gray.width * 2), max(wide.height, gray.height * 2)), Image.Resampling.LANCZOS)
247
- candidates.append(ImageEnhance.Contrast(wide).enhance(2.0))
248
-
249
- thresholded = ImageEnhance.Contrast(wide).enhance(2.8).point(lambda px: 255 if px > 170 else 0)
250
- candidates.append(thresholded)
251
-
252
- best = ""
253
  configs = ("--oem 3 --psm 6", "--oem 3 --psm 11")
254
  for candidate in candidates:
255
  for config in configs:
@@ -257,11 +401,20 @@ def extract_ocr_text(image: Image.Image) -> str:
257
  text = " ".join(pytesseract.image_to_string(candidate, lang="eng+fra", config=config).split())
258
  except Exception:
259
  text = ""
260
- if len(text) > len(best):
261
- best = text
262
- if len(best) >= 12:
263
- return best[:2000]
264
- return best[:2000]
 
 
 
 
 
 
 
 
 
265
 
266
 
267
  def test_result(name: str, score: float, threshold: float, model: str) -> dict[str, Any]:
 
4
  import gc
5
  import io
6
  import os
7
+ import re
8
  import threading
9
  import time
10
+ import unicodedata
11
  from collections import defaultdict, deque
12
  from pathlib import Path
13
  from typing import Any, Callable
 
40
  VIOLENCE_THRESHOLD = float(os.getenv("VIOLENCE_THRESHOLD", "0.80"))
41
  HATE_THRESHOLD = float(os.getenv("HATE_SYMBOL_THRESHOLD", "0.76"))
42
  TOXIC_THRESHOLD = float(os.getenv("TOXIC_TEXT_THRESHOLD", "0.78"))
43
+ TEXT_BLOCKLIST_THRESHOLD = float(os.getenv("TEXT_BLOCKLIST_THRESHOLD", "0.96"))
44
  MAX_UPLOAD_BYTES = int(float(os.getenv("MAX_UPLOAD_MB", "8")) * 1024 * 1024)
45
  MAX_IMAGE_PIXELS = 24_000_000
46
  MAX_ANALYSIS_EDGE = 768
47
 
48
+ TEXT_BLOCKLIST = {
49
+ # French insults / sexual slurs
50
+ "pute",
51
+ "putain",
52
+ "salope",
53
+ "connard",
54
+ "connasse",
55
+ "encule",
56
+ "enculer",
57
+ "nique",
58
+ "fdp",
59
+ "ntm",
60
+ "ta mere",
61
+ "ta mere la pute",
62
+ # English profanity / sexual slurs
63
+ "fuck",
64
+ "fucker",
65
+ "fucking",
66
+ "shit",
67
+ "bitch",
68
+ "whore",
69
+ "slut",
70
+ "cunt",
71
+ "dick",
72
+ "pussy",
73
+ "nigger",
74
+ "nigga",
75
+ "faggot",
76
+ "retard",
77
+ # Extremist / hate text
78
+ "nazi",
79
+ "hitler",
80
+ "heil hitler",
81
+ "sieg heil",
82
+ "swastika",
83
+ "white power",
84
+ "whitepower",
85
+ "1488",
86
+ "88",
87
+ }
88
+
89
+ TEXT_COMPACT_BLOCKLIST = {
90
+ "".join(ch for ch in term if ch.isalnum())
91
+ for term in TEXT_BLOCKLIST
92
+ if len("".join(ch for ch in term if ch.isalnum())) >= 3
93
+ }
94
+
95
+ HATE_SAFE_PROMPTS = [
96
+ "a harmless ordinary image without hate symbols",
97
+ "a normal avatar item or game asset with no extremist content",
98
+ "a safe logo or clothing texture",
99
+ ]
100
+
101
+ HATE_RISK_PROMPTS = [
102
+ "a nazi swastika symbol",
103
+ "a nazi flag",
104
+ "an image displaying a swastika",
105
+ "an image displaying hate symbols",
106
+ "an extremist supremacist logo",
107
+ "a white supremacist symbol",
108
+ "a hateful propaganda symbol",
109
+ ]
110
+
111
  Image.MAX_IMAGE_PIXELS = MAX_IMAGE_PIXELS
112
  ImageFile.LOAD_TRUNCATED_IMAGES = False
113
 
 
204
  return "".join(ch for ch in label.lower() if ch.isalnum() or ch == "_")
205
 
206
 
207
+ def normalized_text(text: str) -> str:
208
+ folded = unicodedata.normalize("NFKD", text)
209
+ folded = "".join(ch for ch in folded if not unicodedata.combining(ch))
210
+ folded = folded.lower()
211
+ folded = folded.translate(
212
+ str.maketrans(
213
+ {
214
+ "@": "a",
215
+ "4": "a",
216
+ "0": "o",
217
+ "1": "i",
218
+ "!": "i",
219
+ "|": "i",
220
+ "3": "e",
221
+ "5": "s",
222
+ "$": "s",
223
+ "7": "t",
224
+ "+": "t",
225
+ }
226
+ )
227
+ )
228
+ folded = re.sub(r"(.)\1{2,}", r"\1\1", folded)
229
+ return re.sub(r"\s+", " ", folded).strip()
230
+
231
+
232
+ def compact_text(text: str) -> str:
233
+ return re.sub(r"[^a-z0-9]+", "", normalized_text(text))
234
+
235
+
236
+ def find_blocked_text(text: str) -> str | None:
237
+ spaced = f" {normalized_text(text)} "
238
+ compact = compact_text(text)
239
+ for term in sorted(TEXT_BLOCKLIST, key=len, reverse=True):
240
+ norm = normalized_text(term)
241
+ compact_term = re.sub(r"[^a-z0-9]+", "", norm)
242
+ if " " in norm and f" {norm} " in spaced:
243
+ return term
244
+ if len(norm) <= 3:
245
+ if re.search(rf"(?<![a-z0-9]){re.escape(norm)}(?![a-z0-9])", spaced):
246
+ return term
247
+ continue
248
+ if re.search(rf"(?<![a-z0-9]){re.escape(norm)}(?![a-z0-9])", spaced):
249
+ return term
250
+ if compact_term in TEXT_COMPACT_BLOCKLIST and compact_term in compact:
251
+ return term
252
+ return None
253
+
254
+
255
+ def image_variants(image: Image.Image, *, include_crops: bool = False) -> list[Image.Image]:
256
+ base = image.convert("RGB")
257
+ variants = [
258
+ base,
259
+ base.rotate(90, expand=True),
260
+ base.rotate(180, expand=True),
261
+ base.rotate(270, expand=True),
262
+ ]
263
+ if include_crops and min(base.size) >= 96:
264
+ width, height = base.size
265
+ crop_boxes = [
266
+ (0, 0, width // 2, height // 2),
267
+ (width // 2, 0, width, height // 2),
268
+ (0, height // 2, width // 2, height),
269
+ (width // 2, height // 2, width, height),
270
+ (width // 5, height // 5, width * 4 // 5, height * 4 // 5),
271
+ ]
272
+ variants.extend(base.crop(box) for box in crop_boxes)
273
+ return variants
274
+
275
+
276
  def run_nsfw(image: Image.Image) -> dict[str, Any]:
277
  classifier = pipeline("image-classification", model=NSFW_MODEL, device=-1)
278
  try:
 
313
  def run_hateful_symbols(image: Image.Image) -> dict[str, Any]:
314
  processor = CLIPProcessor.from_pretrained(HATE_MODEL)
315
  model = CLIPModel.from_pretrained(HATE_MODEL)
316
+ prompts = HATE_SAFE_PROMPTS + HATE_RISK_PROMPTS
 
 
 
317
  try:
318
+ risky = 0.0
319
+ best_prompt = ""
320
+ variants = image_variants(image)
321
+ inputs = processor(text=prompts, images=variants, return_tensors="pt", padding=True)
322
  with torch.inference_mode():
323
+ logits = model(**inputs).logits_per_image
324
+ safe_logits = logits[:, : len(HATE_SAFE_PROMPTS)].max(dim=1).values
325
+ for row_index in range(logits.shape[0]):
326
+ for index, prompt in enumerate(prompts[len(HATE_SAFE_PROMPTS) :], start=len(HATE_SAFE_PROMPTS)):
327
+ binary = torch.stack((safe_logits[row_index], logits[row_index, index])).softmax(dim=0)
328
+ score = float(binary[1])
329
+ if score > risky:
330
+ risky = score
331
+ best_prompt = prompt
332
  result = test_result("Hateful-symbol heuristic", risky, HATE_THRESHOLD, HATE_MODEL)
333
+ result["detail"] = f"Closest risky label: {best_prompt or 'none'}."
334
+ result["warning"] = "Experimental CLIP heuristic; uncertain cases still require human review."
335
  return result
336
  finally:
337
  release_model(model, processor)
 
339
 
340
  def run_offensive_text(image: Image.Image) -> dict[str, Any]:
341
  text = extract_ocr_text(image)
342
+ blocked = find_blocked_text(text)
343
+ if blocked:
344
+ result = test_result("Offensive text", TEXT_BLOCKLIST_THRESHOLD, TOXIC_THRESHOLD, "Tesseract OCR + Axium blocklist")
345
+ result["detail"] = f'OCR detected blocked text "{blocked}" in: "{text[:180]}{"..." if len(text) > 180 else ""}"'
346
+ return result
347
+
348
  if not text:
349
  result = test_result("Offensive text", 0.0, TOXIC_THRESHOLD, TOXIC_MODEL)
350
  result["detail"] = "No readable English or French text was found."
 
382
 
383
  def extract_ocr_text(image: Image.Image) -> str:
384
  candidates: list[Image.Image] = []
385
+ for variant in image_variants(image, include_crops=True):
386
+ base = variant.convert("RGB")
387
+ gray = ImageOps.grayscale(base)
388
+ wide = gray.resize((gray.width * 2, gray.height * 2), Image.Resampling.LANCZOS)
389
+ contrast = ImageEnhance.Contrast(wide).enhance(2.4)
390
+ sharp = ImageEnhance.Sharpness(contrast).enhance(2.0)
391
+ thresholded = contrast.point(lambda px: 255 if px > 165 else 0)
392
+ inverted = ImageOps.invert(contrast)
393
+ candidates.extend([base, gray, contrast, sharp, thresholded, inverted])
394
+
395
+ seen: set[str] = set()
396
+ collected: list[str] = []
 
 
 
 
 
 
397
  configs = ("--oem 3 --psm 6", "--oem 3 --psm 11")
398
  for candidate in candidates:
399
  for config in configs:
 
401
  text = " ".join(pytesseract.image_to_string(candidate, lang="eng+fra", config=config).split())
402
  except Exception:
403
  text = ""
404
+ if not text:
405
+ continue
406
+ key = normalized_text(text)
407
+ if key in seen:
408
+ continue
409
+ seen.add(key)
410
+ collected.append(text)
411
+ if find_blocked_text(text):
412
+ return text[:2000]
413
+
414
+ if not collected:
415
+ return ""
416
+ collected.sort(key=len, reverse=True)
417
+ return " | ".join(collected[:6])[:2000]
418
 
419
 
420
  def test_result(name: str, score: float, threshold: float, model: str) -> dict[str, Any]: