LikoKiko commited on
Commit
3ffc83b
1 Parent(s): 2b8e905

OpenCensor-H1-Mini

Browse files
Files changed (4) hide show
  1. Dockerfile +14 -0
  2. README.md +11 -6
  3. app.py +63 -0
  4. requirements.txt +5 -0
Dockerfile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+ ENV PYTHONUNBUFFERED=1
3
+ ENV HF_HOME=/app/.cache
4
+ ENV TRANSFORMERS_CACHE=/app/.cache
5
+ WORKDIR /app
6
+ RUN mkdir -p /app/.cache && chmod 777 /app/.cache
7
+ RUN apt-get update && apt-get install -y --no-install-recommends \
8
+ ca-certificates curl libgomp1 git \
9
+ && rm -rf /var/lib/apt/lists/*
10
+ COPY requirements.txt .
11
+ RUN python -m pip install --upgrade pip && pip install -r requirements.txt
12
+ COPY app.py .
13
+ EXPOSE 7860
14
+ CMD ["python","app.py"]
README.md CHANGED
@@ -1,14 +1,19 @@
1
  ---
2
  title: OpenCensor H1 Mini
3
- emoji: 馃悹
4
  colorFrom: green
5
- colorTo: yellow
6
- sdk: gradio
7
- sdk_version: 6.0.2
8
- app_file: app.py
9
  pinned: false
10
- license: cc-by-sa-4.0
11
  short_description: OpenCensor-H1-Mini
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: OpenCensor H1 Mini
3
+ emoji: 馃
4
  colorFrom: green
5
+ colorTo: indigo
6
+ sdk: docker
 
 
7
  pinned: false
 
8
  short_description: OpenCensor-H1-Mini
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
12
+
13
+ # OpenCensor-H1-Mini Space
14
+
15
+ Test the **OpenCensor-H1-Mini** model for Hebrew profanity and toxicity detection.
16
+ Type a sentence in Hebrew to see the toxicity score.
17
+
18
+ **Model:** [LikoKIko/OpenCensor-H1-Mini](https://huggingface.co/LikoKIko/OpenCensor-H1-Mini)
19
+ **Threshold:** 0.17 (Scores >= 0.17 are considered toxic)
app.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import torch
4
+ import gradio as gr
5
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
6
+
7
+ # constants
8
+ KModelId = "LikoKIko/OpenCensor-H1-Mini"
9
+ KMaxLen = 256
10
+ KThreshold = 0.17
11
+ KDevice = "cuda" if torch.cuda.is_available() else "cpu"
12
+ torch.set_num_threads(max(1, os.cpu_count() or 1))
13
+
14
+ # load once
15
+ tok = AutoTokenizer.from_pretrained(KModelId)
16
+ model = AutoModelForSequenceClassification.from_pretrained(
17
+ KModelId, num_labels=1
18
+ ).to(KDevice).eval()
19
+
20
+ # warmup to force weights load before
21
+ with torch.inference_mode():
22
+ _warm = tok("砖诇讜诐", return_tensors="pt", padding="max_length",
23
+ truncation=True, max_length=KMaxLen).to(KDevice)
24
+ _ = torch.sigmoid(model(**_warm).logits).item()
25
+
26
+ # helpers
27
+ clean = lambda s: re.sub(r"\s+", " ", str(s)).strip()
28
+
29
+ @torch.inference_mode()
30
+ def check(txt: str) -> str:
31
+ txt = clean(txt)
32
+ if not txt:
33
+ return "Type something first."
34
+ batch = tok(
35
+ txt,
36
+ return_tensors="pt",
37
+ truncation=True,
38
+ padding="max_length",
39
+ max_length=KMaxLen,
40
+ ).to(KDevice)
41
+
42
+ prob = torch.sigmoid(model(**batch).logits).item()
43
+ label = 1 if prob >= KThreshold else 0
44
+ return f"Prob: {prob:.4f} | Label: {label} (cutoff={KThreshold})"
45
+
46
+ # ui
47
+ with gr.Blocks(title="Hebrew Profanity Detector") as demo:
48
+ gr.Markdown("## Hebrew Profanity Detector\nEnter Hebrew text. Output: probability and label.")
49
+ inp = gr.Textbox(lines=4, label="Hebrew text")
50
+ out = gr.Textbox(label="Result", interactive=False)
51
+ btn = gr.Button("Check")
52
+ btn.click(check, inputs=inp, outputs=out, api_name="/predict")
53
+ gr.Examples(
54
+ examples=[["讝讛 讚讘专 诪爪讜讬谉"], ["!讬砖 诇讬 讞专讗 讞讘专"]],
55
+ inputs=inp,
56
+ outputs=out,
57
+ fn=check,
58
+ cache_examples=False,
59
+ )
60
+
61
+ if __name__ == "__main__":
62
+ port = int(os.getenv("PORT", "7860"))
63
+ demo.launch(server_name="0.0.0.0", server_port=port, show_error=True)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ numpy<2
2
+ torch==2.2.2
3
+ transformers>=4.43,<4.47
4
+ gradio>=4.37,<5.0
5
+ huggingface-hub>=0.24