Spaces:

MaximusCorp
/

image-verification

Sleeping

App Files Files Community

MaximusCorp commited on 7 days ago

Commit

e09e1c1

verified ·

1 Parent(s): 13491ff

Upload app.py

Browse files

Files changed (1) hide show

app.py +183 -30

app.py CHANGED Viewed

@@ -4,8 +4,10 @@ import asyncio
 import gc
 import io
 import os
 import threading
 import time
 from collections import defaultdict, deque
 from pathlib import Path
 from typing import Any, Callable
@@ -38,10 +40,74 @@ NSFW_THRESHOLD = float(os.getenv("NSFW_THRESHOLD", "0.82"))
 VIOLENCE_THRESHOLD = float(os.getenv("VIOLENCE_THRESHOLD", "0.80"))
 HATE_THRESHOLD = float(os.getenv("HATE_SYMBOL_THRESHOLD", "0.76"))
 TOXIC_THRESHOLD = float(os.getenv("TOXIC_TEXT_THRESHOLD", "0.78"))
 MAX_UPLOAD_BYTES = int(float(os.getenv("MAX_UPLOAD_MB", "8")) * 1024 * 1024)
 MAX_IMAGE_PIXELS = 24_000_000
 MAX_ANALYSIS_EDGE = 768
 Image.MAX_IMAGE_PIXELS = MAX_IMAGE_PIXELS
 ImageFile.LOAD_TRUNCATED_IMAGES = False
@@ -138,6 +204,75 @@ def normalized_label(label: str) -> str:
     return "".join(ch for ch in label.lower() if ch.isalnum() or ch == "_")
 def run_nsfw(image: Image.Image) -> dict[str, Any]:
     classifier = pipeline("image-classification", model=NSFW_MODEL, device=-1)
     try:
@@ -178,16 +313,25 @@ def run_violence(image: Image.Image) -> dict[str, Any]:
 def run_hateful_symbols(image: Image.Image) -> dict[str, Any]:
     processor = CLIPProcessor.from_pretrained(HATE_MODEL)
     model = CLIPModel.from_pretrained(HATE_MODEL)
-    prompts = [
-        "a harmless ordinary image without extremist symbols",
-        "an image prominently displaying a hateful extremist or supremacist symbol",
-    ]
     try:
-        inputs = processor(text=prompts, images=image, return_tensors="pt", padding=True)
         with torch.inference_mode():
-            risky = float(model(**inputs).logits_per_image.softmax(dim=-1)[0][1])
         result = test_result("Hateful-symbol heuristic", risky, HATE_THRESHOLD, HATE_MODEL)
-        result["warning"] = "Experimental CLIP heuristic; uncertain cases require human review."
         return result
     finally:
         release_model(model, processor)
@@ -195,6 +339,12 @@ def run_hateful_symbols(image: Image.Image) -> dict[str, Any]:
 def run_offensive_text(image: Image.Image) -> dict[str, Any]:
     text = extract_ocr_text(image)
     if not text:
         result = test_result("Offensive text", 0.0, TOXIC_THRESHOLD, TOXIC_MODEL)
         result["detail"] = "No readable English or French text was found."
@@ -232,24 +382,18 @@ def run_offensive_text(image: Image.Image) -> dict[str, Any]:
 def extract_ocr_text(image: Image.Image) -> str:
     candidates: list[Image.Image] = []
-    base = image.convert("RGB")
-    candidates.append(base)
-    gray = ImageOps.grayscale(base)
-    candidates.append(gray)
-    wide = gray.copy()
-    wide.thumbnail((1400, 1400), Image.Resampling.LANCZOS)
-    if wide.width < gray.width:
-        wide = gray
-    else:
-        wide = wide.resize((max(wide.width, gray.width * 2), max(wide.height, gray.height * 2)), Image.Resampling.LANCZOS)
-    candidates.append(ImageEnhance.Contrast(wide).enhance(2.0))
-    thresholded = ImageEnhance.Contrast(wide).enhance(2.8).point(lambda px: 255 if px > 170 else 0)
-    candidates.append(thresholded)
-    best = ""
     configs = ("--oem 3 --psm 6", "--oem 3 --psm 11")
     for candidate in candidates:
         for config in configs:
@@ -257,11 +401,20 @@ def extract_ocr_text(image: Image.Image) -> str:
                 text = " ".join(pytesseract.image_to_string(candidate, lang="eng+fra", config=config).split())
             except Exception:
                 text = ""
-            if len(text) > len(best):
-                best = text
-            if len(best) >= 12:
-                return best[:2000]
-    return best[:2000]
 def test_result(name: str, score: float, threshold: float, model: str) -> dict[str, Any]:

 import gc
 import io
 import os
+import re
 import threading
 import time
+import unicodedata
 from collections import defaultdict, deque
 from pathlib import Path
 from typing import Any, Callable
 VIOLENCE_THRESHOLD = float(os.getenv("VIOLENCE_THRESHOLD", "0.80"))
 HATE_THRESHOLD = float(os.getenv("HATE_SYMBOL_THRESHOLD", "0.76"))
 TOXIC_THRESHOLD = float(os.getenv("TOXIC_TEXT_THRESHOLD", "0.78"))
+TEXT_BLOCKLIST_THRESHOLD = float(os.getenv("TEXT_BLOCKLIST_THRESHOLD", "0.96"))
 MAX_UPLOAD_BYTES = int(float(os.getenv("MAX_UPLOAD_MB", "8")) * 1024 * 1024)
 MAX_IMAGE_PIXELS = 24_000_000
 MAX_ANALYSIS_EDGE = 768
+TEXT_BLOCKLIST = {
+    # French insults / sexual slurs
+    "pute",
+    "putain",
+    "salope",
+    "connard",
+    "connasse",
+    "encule",
+    "enculer",
+    "nique",
+    "fdp",
+    "ntm",
+    "ta mere",
+    "ta mere la pute",
+    # English profanity / sexual slurs
+    "fuck",
+    "fucker",
+    "fucking",
+    "shit",
+    "bitch",
+    "whore",
+    "slut",
+    "cunt",
+    "dick",
+    "pussy",
+    "nigger",
+    "nigga",
+    "faggot",
+    "retard",
+    # Extremist / hate text
+    "nazi",
+    "hitler",
+    "heil hitler",
+    "sieg heil",
+    "swastika",
+    "white power",
+    "whitepower",
+    "1488",
+    "88",
+}
+TEXT_COMPACT_BLOCKLIST = {
+    "".join(ch for ch in term if ch.isalnum())
+    for term in TEXT_BLOCKLIST
+    if len("".join(ch for ch in term if ch.isalnum())) >= 3
+}
+HATE_SAFE_PROMPTS = [
+    "a harmless ordinary image without hate symbols",
+    "a normal avatar item or game asset with no extremist content",
+    "a safe logo or clothing texture",
+]
+HATE_RISK_PROMPTS = [
+    "a nazi swastika symbol",
+    "a nazi flag",
+    "an image displaying a swastika",
+    "an image displaying hate symbols",
+    "an extremist supremacist logo",
+    "a white supremacist symbol",
+    "a hateful propaganda symbol",
+]
 Image.MAX_IMAGE_PIXELS = MAX_IMAGE_PIXELS
 ImageFile.LOAD_TRUNCATED_IMAGES = False
     return "".join(ch for ch in label.lower() if ch.isalnum() or ch == "_")
+def normalized_text(text: str) -> str:
+    folded = unicodedata.normalize("NFKD", text)
+    folded = "".join(ch for ch in folded if not unicodedata.combining(ch))
+    folded = folded.lower()
+    folded = folded.translate(
+        str.maketrans(
+            {
+                "@": "a",
+                "4": "a",
+                "0": "o",
+                "1": "i",
+                "!": "i",
+                "|": "i",
+                "3": "e",
+                "5": "s",
+                "$": "s",
+                "7": "t",
+                "+": "t",
+            }
+        )
+    )
+    folded = re.sub(r"(.)\1{2,}", r"\1\1", folded)
+    return re.sub(r"\s+", " ", folded).strip()
+def compact_text(text: str) -> str:
+    return re.sub(r"[^a-z0-9]+", "", normalized_text(text))
+def find_blocked_text(text: str) -> str | None:
+    spaced = f" {normalized_text(text)} "
+    compact = compact_text(text)
+    for term in sorted(TEXT_BLOCKLIST, key=len, reverse=True):
+        norm = normalized_text(term)
+        compact_term = re.sub(r"[^a-z0-9]+", "", norm)
+        if " " in norm and f" {norm} " in spaced:
+            return term
+        if len(norm) <= 3:
+            if re.search(rf"(?<![a-z0-9]){re.escape(norm)}(?![a-z0-9])", spaced):
+                return term
+            continue
+        if re.search(rf"(?<![a-z0-9]){re.escape(norm)}(?![a-z0-9])", spaced):
+            return term
+        if compact_term in TEXT_COMPACT_BLOCKLIST and compact_term in compact:
+            return term
+    return None
+def image_variants(image: Image.Image, *, include_crops: bool = False) -> list[Image.Image]:
+    base = image.convert("RGB")
+    variants = [
+        base,
+        base.rotate(90, expand=True),
+        base.rotate(180, expand=True),
+        base.rotate(270, expand=True),
+    ]
+    if include_crops and min(base.size) >= 96:
+        width, height = base.size
+        crop_boxes = [
+            (0, 0, width // 2, height // 2),
+            (width // 2, 0, width, height // 2),
+            (0, height // 2, width // 2, height),
+            (width // 2, height // 2, width, height),
+            (width // 5, height // 5, width * 4 // 5, height * 4 // 5),
+        ]
+        variants.extend(base.crop(box) for box in crop_boxes)
+    return variants
 def run_nsfw(image: Image.Image) -> dict[str, Any]:
     classifier = pipeline("image-classification", model=NSFW_MODEL, device=-1)
     try:
 def run_hateful_symbols(image: Image.Image) -> dict[str, Any]:
     processor = CLIPProcessor.from_pretrained(HATE_MODEL)
     model = CLIPModel.from_pretrained(HATE_MODEL)
+    prompts = HATE_SAFE_PROMPTS + HATE_RISK_PROMPTS
     try:
+        risky = 0.0
+        best_prompt = ""
+        variants = image_variants(image)
+        inputs = processor(text=prompts, images=variants, return_tensors="pt", padding=True)
         with torch.inference_mode():
+            logits = model(**inputs).logits_per_image
+        safe_logits = logits[:, : len(HATE_SAFE_PROMPTS)].max(dim=1).values
+        for row_index in range(logits.shape[0]):
+            for index, prompt in enumerate(prompts[len(HATE_SAFE_PROMPTS) :], start=len(HATE_SAFE_PROMPTS)):
+                binary = torch.stack((safe_logits[row_index], logits[row_index, index])).softmax(dim=0)
+                score = float(binary[1])
+                if score > risky:
+                    risky = score
+                    best_prompt = prompt
         result = test_result("Hateful-symbol heuristic", risky, HATE_THRESHOLD, HATE_MODEL)
+        result["detail"] = f"Closest risky label: {best_prompt or 'none'}."
+        result["warning"] = "Experimental CLIP heuristic; uncertain cases still require human review."
         return result
     finally:
         release_model(model, processor)
 def run_offensive_text(image: Image.Image) -> dict[str, Any]:
     text = extract_ocr_text(image)
+    blocked = find_blocked_text(text)
+    if blocked:
+        result = test_result("Offensive text", TEXT_BLOCKLIST_THRESHOLD, TOXIC_THRESHOLD, "Tesseract OCR + Axium blocklist")
+        result["detail"] = f'OCR detected blocked text "{blocked}" in: "{text[:180]}{"..." if len(text) > 180 else ""}"'
+        return result
     if not text:
         result = test_result("Offensive text", 0.0, TOXIC_THRESHOLD, TOXIC_MODEL)
         result["detail"] = "No readable English or French text was found."
 def extract_ocr_text(image: Image.Image) -> str:
     candidates: list[Image.Image] = []
+    for variant in image_variants(image, include_crops=True):
+        base = variant.convert("RGB")
+        gray = ImageOps.grayscale(base)
+        wide = gray.resize((gray.width * 2, gray.height * 2), Image.Resampling.LANCZOS)
+        contrast = ImageEnhance.Contrast(wide).enhance(2.4)
+        sharp = ImageEnhance.Sharpness(contrast).enhance(2.0)
+        thresholded = contrast.point(lambda px: 255 if px > 165 else 0)
+        inverted = ImageOps.invert(contrast)
+        candidates.extend([base, gray, contrast, sharp, thresholded, inverted])
+    seen: set[str] = set()
+    collected: list[str] = []
     configs = ("--oem 3 --psm 6", "--oem 3 --psm 11")
     for candidate in candidates:
         for config in configs:
                 text = " ".join(pytesseract.image_to_string(candidate, lang="eng+fra", config=config).split())
             except Exception:
                 text = ""
+            if not text:
+                continue
+            key = normalized_text(text)
+            if key in seen:
+                continue
+            seen.add(key)
+            collected.append(text)
+            if find_blocked_text(text):
+                return text[:2000]
+    if not collected:
+        return ""
+    collected.sort(key=len, reverse=True)
+    return " | ".join(collected[:6])[:2000]
 def test_result(name: str, score: float, threshold: float, model: str) -> dict[str, Any]: