HarethahMo commited on
Commit
22c590f
·
verified ·
1 Parent(s): f4de760

Upload tokenizer

Browse files
Files changed (4) hide show
  1. .gitattributes +1 -0
  2. chat_template.jinja +30 -0
  3. tokenizer.json +3 -0
  4. tokenizer_config.json +13 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
chat_template.jinja ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% if messages|length % 2 == 0 %}{% set role = 'Agent' %}{% else %}{% set role = 'User' %}{% endif %}{{ " <|begin_of_text|><|start_header_id|>user<|end_header_id|>
2
+
3
+ Task: Check if there is unsafe content in '" + role + "' messages in conversations according our safety policy with the below categories.
4
+
5
+ <BEGIN UNSAFE CONTENT CATEGORIES>
6
+ S1: Violent Crimes.
7
+ S2: Non-Violent Crimes.
8
+ S3: Sex Crimes.
9
+ S4: Child Exploitation.
10
+ S5: Defamation.
11
+ S6: Specialized Advice.
12
+ S7: Privacy.
13
+ S8: Intellectual Property.
14
+ S9: Indiscriminate Weapons.
15
+ S10: Hate.
16
+ S11: Self-Harm.
17
+ S12: Sexual Content.
18
+ S13: Elections.
19
+ S14: Code Interpreter Abuse.
20
+ <END UNSAFE CONTENT CATEGORIES>
21
+
22
+ <BEGIN CONVERSATION>
23
+
24
+ " }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{% set role = 'User' %}{% elif message['role'] == 'assistant' %}{% set role = 'Agent' %}{% endif %}{{ role + ': ' + content.strip() + '
25
+
26
+ ' }}{% endfor %}{{ "<END CONVERSATION>
27
+
28
+ Provide your safety assessment for ONLY THE LAST " + role + " message in the above conversation:
29
+ - First line must read 'safe' or 'unsafe'.
30
+ - If unsafe, a second line must include a comma-separated list of violated categories.<|eot_id|><|start_header_id|>assistant<|end_header_id|>" }}
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ade1dac458f86f9bea8bf35b713f14e1bbed24228429534038e9f7e54ea3e8b6
3
+ size 17208712
tokenizer_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "bos_token": "<|begin_of_text|>",
4
+ "clean_up_tokenization_spaces": true,
5
+ "eos_token": "<|eot_id|>",
6
+ "is_local": false,
7
+ "model_input_names": [
8
+ "input_ids",
9
+ "attention_mask"
10
+ ],
11
+ "model_max_length": 131072,
12
+ "tokenizer_class": "TokenizersBackend"
13
+ }