dliebson commited on
Commit
00ad462
·
verified ·
1 Parent(s): 4a84ed7

Upload tokenizer.json

Browse files
Files changed (1) hide show
  1. tokenizer.json +48 -0
tokenizer.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "type": "BPE",
4
+ "vocab": {
5
+ "!": 0,
6
+ "\"": 1,
7
+ "#": 1
8
+ }
9
+ },
10
+ "description": "Benign sample data: looks like phishing or payloads in places, but is safe when treated as JSON only.",
11
+ "clickable_url": {
12
+ "label": "Example documentation link",
13
+ "href": "https://example.com/",
14
+ "note": "IANA-reserved domain; safe placeholder for link tests."
15
+ },
16
+ "embedded_image": {
17
+ "alt": "Small SVG inline as data URL (green square only)",
18
+ "src": "data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='16' height='16'%3E%3Crect fill='%23008000' width='16' height='16'/%3E%3C/svg%3E",
19
+ "remote_fallback": "https://www.w3.org/Assets/w3c_logo.svg"
20
+ },
21
+ "looks_like_credentials_but_is_fake": {
22
+ "username": "demo_user_not_real",
23
+ "password": "hunter2_is_a_meme_not_a_secret"
24
+ },
25
+ "escaped_html_as_data_not_dom": {
26
+ "html_fragment": "<a href=\"https://example.org/\">Click me (string only)</a><img src=\"https://www.w3.org/Assets/w3c_logo.svg\" alt=\"W3C\" />"
27
+ },
28
+ "javascript_url_as_literal_string": {
29
+ "do_not_use_as_href": "javascript:alert('This is only text inside JSON; it does not execute here.')"
30
+ },
31
+ "shell_like_strings": {
32
+ "command_looking": "curl -s https://example.com/ | head -n 1",
33
+ "note": "Plain string; not executed by JSON parsers."
34
+ },
35
+ "base64_looking_but_harmless": {
36
+ "payload": "SGVsbG8sIHRoaXMgaXMganVzdCBiYXNlNjQgZW5jb2RlZCB0ZXh0Lg==",
37
+ "decoded_hint": "Decodes to a simple English sentence, not binary malware."
38
+ },
39
+ "ipv4_that_looks_suspicious": {
40
+ "address": "127.0.0.1",
41
+ "context": "Loopback; common in examples, not an attack by itself."
42
+ },
43
+ "unicode_homoglyph_example": {
44
+ "display": "exаmple.com",
45
+ "warning": "Contains Cyrillic 'а' (U+0430) instead of Latin 'a' — safe to store, but teaches URL review.",
46
+ "safe_ascii_equivalent": "example.com"
47
+ }
48
+ }