Spaces:

LikoKIko
/

OpenCensor-H1-Mini

Runtime error

App Files Files Community

LikoKiko commited on Dec 7, 2025

Commit

3ffc83b

1 Parent(s): 2b8e905

OpenCensor-H1-Mini

Browse files

Files changed (4) hide show

Dockerfile +14 -0
README.md +11 -6
app.py +63 -0
requirements.txt +5 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,14 @@

+FROM python:3.10-slim
+ENV PYTHONUNBUFFERED=1
+ENV HF_HOME=/app/.cache
+ENV TRANSFORMERS_CACHE=/app/.cache
+WORKDIR /app
+RUN mkdir -p /app/.cache && chmod 777 /app/.cache
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    ca-certificates curl libgomp1 git \
+ && rm -rf /var/lib/apt/lists/*
+COPY requirements.txt .
+RUN python -m pip install --upgrade pip && pip install -r requirements.txt
+COPY app.py .
+EXPOSE 7860
+CMD ["python","app.py"]

README.md CHANGED Viewed

@@ -1,14 +1,19 @@
 ---
 title: OpenCensor H1 Mini
-emoji: 🐠
 colorFrom: green
-colorTo: yellow
-sdk: gradio
-sdk_version: 6.0.2
-app_file: app.py
 pinned: false
-license: cc-by-sa-4.0
 short_description: OpenCensor-H1-Mini
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: OpenCensor H1 Mini
+emoji: 🦀
 colorFrom: green
+colorTo: indigo
+sdk: docker
 pinned: false
 short_description: OpenCensor-H1-Mini
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+# OpenCensor-H1-Mini Space
+Test the **OpenCensor-H1-Mini** model for Hebrew profanity and toxicity detection.
+Type a sentence in Hebrew to see the toxicity score.
+**Model:** [LikoKIko/OpenCensor-H1-Mini](https://huggingface.co/LikoKIko/OpenCensor-H1-Mini)
+**Threshold:** 0.17 (Scores >= 0.17 are considered toxic)

app.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import os
+import re
+import torch
+import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+# constants
+KModelId = "LikoKIko/OpenCensor-H1-Mini"
+KMaxLen = 256
+KThreshold = 0.17
+KDevice = "cuda" if torch.cuda.is_available() else "cpu"
+torch.set_num_threads(max(1, os.cpu_count() or 1))
+# load once
+tok = AutoTokenizer.from_pretrained(KModelId)
+model = AutoModelForSequenceClassification.from_pretrained(
+    KModelId, num_labels=1
+).to(KDevice).eval()
+# warmup to force weights load before
+with torch.inference_mode():
+    _warm = tok("שלום", return_tensors="pt", padding="max_length",
+                truncation=True, max_length=KMaxLen).to(KDevice)
+    _ = torch.sigmoid(model(**_warm).logits).item()
+# helpers
+clean = lambda s: re.sub(r"\s+", " ", str(s)).strip()
+@torch.inference_mode()
+def check(txt: str) -> str:
+    txt = clean(txt)
+    if not txt:
+        return "Type something first."
+    batch = tok(
+        txt,
+        return_tensors="pt",
+        truncation=True,
+        padding="max_length",
+        max_length=KMaxLen,
+    ).to(KDevice)
+    prob = torch.sigmoid(model(**batch).logits).item()
+    label = 1 if prob >= KThreshold else 0
+    return f"Prob: {prob:.4f} | Label: {label} (cutoff={KThreshold})"
+# ui
+with gr.Blocks(title="Hebrew Profanity Detector") as demo:
+    gr.Markdown("## Hebrew Profanity Detector\nEnter Hebrew text. Output: probability and label.")
+    inp = gr.Textbox(lines=4, label="Hebrew text")
+    out = gr.Textbox(label="Result", interactive=False)
+    btn = gr.Button("Check")
+    btn.click(check, inputs=inp, outputs=out, api_name="/predict")
+    gr.Examples(
+        examples=[["זה דבר מצוין"], ["!יש לי חרא חבר"]],
+        inputs=inp,
+        outputs=out,
+        fn=check,
+        cache_examples=False,
+    )
+if __name__ == "__main__":
+    port = int(os.getenv("PORT", "7860"))
+    demo.launch(server_name="0.0.0.0", server_port=port, show_error=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+numpy<2
+torch==2.2.2
+transformers>=4.43,<4.47
+gradio>=4.37,<5.0
+huggingface-hub>=0.24