amba370447 commited on
Commit
918a1af
·
verified ·
1 Parent(s): e4df84e

Upload preprocessing_config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. preprocessing_config.json +90 -0
preprocessing_config.json ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "char_map": {
3
+ "0": "o",
4
+ "1": "i",
5
+ "3": "e",
6
+ "4": "a",
7
+ "5": "s",
8
+ "6": "g",
9
+ "7": "t",
10
+ "8": "b",
11
+ "9": "g",
12
+ "@": "a",
13
+ "$": "s",
14
+ "!": "i",
15
+ "#": "h",
16
+ "đ": "d",
17
+ "Đ": "d"
18
+ },
19
+ "obfuscation_patterns": {
20
+ "d[i1!][t7]": "dit",
21
+ "l[o0]n": "lon",
22
+ "c[a@4]c": "cac",
23
+ "c[u\\*]": "cu",
24
+ "d[e3]o": "deo",
25
+ "v[c\\*]l": "vcl",
26
+ "d[m\\*]": "dm",
27
+ "đ[m\\*]": "dm",
28
+ "đ[c\\*]m": "dcm",
29
+ "ch[o0]": "cho"
30
+ },
31
+ "nsfw_keywords": {
32
+ "địt": 100,
33
+ "đụ": 100,
34
+ "đéo": 100,
35
+ "lồn": 100,
36
+ "buồi": 100,
37
+ "cặc": 100,
38
+ "cu": 100,
39
+ "bú": 90,
40
+ "bím": 100,
41
+ "cứt": 80,
42
+ "chịch": 100,
43
+ "doggy": 90,
44
+ "blowjob": 100,
45
+ "địt mẹ": 100,
46
+ "đụ mẹ": 100,
47
+ "lồn mẹ": 100,
48
+ "dmm": 100,
49
+ "đmm": 100,
50
+ "dm": 90,
51
+ "đm": 90,
52
+ "vcl": 90,
53
+ "vkl": 90,
54
+ "dcm": 100,
55
+ "sex": 100,
56
+ "porn": 100,
57
+ "xxx": 100,
58
+ "nude": 100,
59
+ "đĩ": 100,
60
+ "điếm": 100,
61
+ "fuck": 100,
62
+ "shit": 80
63
+ },
64
+ "nsfw_patterns": [
65
+ [
66
+ "đ[i1!][t7]",
67
+ 100
68
+ ],
69
+ [
70
+ "l[o0]n",
71
+ 100
72
+ ],
73
+ [
74
+ "c[ặa@][c]",
75
+ 100
76
+ ],
77
+ [
78
+ "[cđ][u\\*]",
79
+ 90
80
+ ],
81
+ [
82
+ "v[c\\*]l",
83
+ 90
84
+ ],
85
+ [
86
+ "đ[mM\\*]+",
87
+ 90
88
+ ]
89
+ ]
90
+ }