manuylo commited on
Commit
a22a49d
·
verified ·
1 Parent(s): 119f524

Upload dataset, tokenizer, categorical_mappings

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ categorical_mappings.json filter=lfs diff=lfs merge=lfs -text
37
+ data.csv filter=lfs diff=lfs merge=lfs -text
categorical_mappings.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:388ee9e9b7aba820df3cef46aba8e0c5c327e3a32698a19de13e19c430068361
3
+ size 62638684
data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d4a90496ce7239c9bc22afe88c2728b74981f37e1f93a66ee56ac9f0853f036
3
+ size 48104596
tokenizer_smiles_diffusion.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "properties": {
3
+ "regex": "(\\[[^\\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|\\(|\\)|\\.|=|#|-|\\+|\\\\\\\\|\\/|:|~|@|\\?|>|\\*|\\$|\\%[0-9]{2}|[0-9]|.)",
4
+ "special_tokens": {
5
+ "start": "^",
6
+ "end": "&",
7
+ "pad": "<PAD>",
8
+ "unknown": "?",
9
+ "mask": "<MASK>",
10
+ "sep": "<SEP>"
11
+ },
12
+ "chem_start_idx": 6
13
+ },
14
+ "vocabulary": [
15
+ "<PAD>",
16
+ "?",
17
+ "^",
18
+ "&",
19
+ "<MASK>",
20
+ "<SEP>",
21
+ "O",
22
+ "=",
23
+ "C",
24
+ "1",
25
+ "N",
26
+ "(",
27
+ "/",
28
+ "c",
29
+ "2",
30
+ "n",
31
+ "[nH]",
32
+ ")",
33
+ "[C@H]",
34
+ "[C@@H]",
35
+ "3",
36
+ "Br",
37
+ "F",
38
+ "S",
39
+ "Cl",
40
+ "\\",
41
+ "[N+]",
42
+ "[O-]",
43
+ "#",
44
+ "4",
45
+ "s",
46
+ "-",
47
+ "o",
48
+ "5",
49
+ "[C@@]",
50
+ "[C@]",
51
+ "6",
52
+ "P",
53
+ "7"
54
+ ]
55
+ }