fallinluv8 commited on
Commit
f4a3d53
·
0 Parent(s):

Deploy final

Browse files
Files changed (6) hide show
  1. .dockerignore +3 -0
  2. .gitattributes +35 -0
  3. Dockerfile +23 -0
  4. README.md +11 -0
  5. main.py +94 -0
  6. requirements.txt +6 -0
.dockerignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ venv
2
+ __pycache__
3
+ .git
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Sử dụng Python 3.10 làm nền tảng
2
+ FROM python:3.10
3
+
4
+ # Tạo thư mục làm việc trong Container
5
+ WORKDIR /code
6
+
7
+ # Copy file requirements.txt vào Container trước để cài thư viện
8
+ COPY ./requirements.txt /code/requirements.txt
9
+
10
+ # Cài đặt các thư viện cần thiết
11
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
12
+
13
+ # Copy toàn bộ code (bao gồm thư mục model và main.py) vào Container
14
+ COPY . /code
15
+
16
+ # Thiết lập quyền user (Yêu cầu bắt buộc của Hugging Face Spaces)
17
+ RUN useradd -m -u 1000 user
18
+ USER user
19
+ ENV HOME=/home/user \
20
+ PATH=/home/user/.local/bin:$PATH
21
+
22
+
23
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Nexore Toxic Api
3
+ emoji: 📉
4
+ colorFrom: purple
5
+ colorTo: gray
6
+ sdk: docker
7
+ pinned: false
8
+ license: mit
9
+ ---
10
+
11
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
main.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uvicorn
2
+ from fastapi import FastAPI, HTTPException
3
+ from pydantic import BaseModel
4
+ from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
5
+ import torch
6
+
7
+ app = FastAPI()
8
+
9
+
10
+ MODEL_PATH = "fallinluv8/nexore-toxic-model"
11
+
12
+ HARD_BLACKLIST = [
13
+ "đéo", "đ**", "dm", "dcm", "đm", "vcl", "vl",
14
+ "cc", "cmn", "cmm", "lồn", "l**", "đĩ", "đ**",
15
+ "cặc", "c**", "buồi", "bu**", "đụ", "đụ mẹ",
16
+ "đụ bố", "đụ con mẹ mày", "đụ con", "đụ mày",
17
+ ]
18
+
19
+ print(" Đang tải model NER (Token Classification)...")
20
+
21
+ try:
22
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
23
+ model = AutoModelForTokenClassification.from_pretrained(MODEL_PATH)
24
+
25
+ classifier = pipeline(
26
+ "token-classification",
27
+ model=model,
28
+ tokenizer=tokenizer,
29
+ aggregation_strategy="simple",
30
+ device=0 if torch.cuda.is_available() else -1
31
+ )
32
+ print(" Model NER đã tải thành công!")
33
+ except Exception as e:
34
+ print(f" Lỗi tải model: {e}")
35
+
36
+ class ToxicRequest(BaseModel):
37
+ text: str
38
+
39
+ @app.get("/")
40
+ def home():
41
+ return {"status": "AI NER Service is running"}
42
+
43
+ @app.post("/predict")
44
+ async def predict_toxicity(request: ToxicRequest):
45
+ if not request.text:
46
+ raise HTTPException(status_code=400, detail="Thiếu nội dung text")
47
+ text_lower = request.text.lower()
48
+
49
+ is_toxic = False
50
+ max_score = 0.0
51
+ toxic_words = []
52
+
53
+ for bad_word in HARD_BLACKLIST:
54
+ if f" {bad_word} " in f" {text_lower} " or text_lower.startswith(bad_word) or text_lower.endswith(bad_word) or bad_word in text_lower:
55
+ if bad_word in text_lower:
56
+ is_toxic = True
57
+ max_score = 1.0
58
+ if not any(t['word'] == bad_word for t in toxic_words):
59
+ toxic_words.append({
60
+ "word": bad_word,
61
+ "score": 1.0
62
+ })
63
+
64
+ results = classifier(request.text)
65
+
66
+ for entity in results:
67
+ print(f" AI thấy: '{entity['word']}' ({entity['score']:.4f})")
68
+
69
+ if entity['entity_group'] == 'BAD':
70
+ score = float(entity['score'])
71
+ word = entity['word']
72
+
73
+ # Chỉ thêm nếu từ này chưa bị Blacklist bắt (tránh hiện trùng 2 lần)
74
+ if not any(t['word'].lower() == word.lower() for t in toxic_words):
75
+ toxic_words.append({
76
+ "word": word,
77
+ "score": score
78
+ })
79
+
80
+ if score > max_score:
81
+ max_score = score
82
+
83
+ if max_score > 0.9:
84
+ is_toxic = True
85
+
86
+ return {
87
+ "is_toxic": is_toxic,
88
+ "score": float(max_score),
89
+ "toxic_words": toxic_words,
90
+ "details": str(results)
91
+ }
92
+
93
+ if __name__ == "__main__":
94
+ uvicorn.run(app, host="0.0.0.0", port=8000)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ transformers
4
+ torch
5
+ pydantic
6
+ safetensors