Upload dataset, tokenizer, categorical_mappings
Browse files- .gitattributes +2 -0
- categorical_mappings.json +3 -0
- data.csv +3 -0
- tokenizer_smiles_diffusion.json +55 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
categorical_mappings.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
data.csv filter=lfs diff=lfs merge=lfs -text
|
categorical_mappings.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:388ee9e9b7aba820df3cef46aba8e0c5c327e3a32698a19de13e19c430068361
|
| 3 |
+
size 62638684
|
data.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d4a90496ce7239c9bc22afe88c2728b74981f37e1f93a66ee56ac9f0853f036
|
| 3 |
+
size 48104596
|
tokenizer_smiles_diffusion.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"properties": {
|
| 3 |
+
"regex": "(\\[[^\\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|\\(|\\)|\\.|=|#|-|\\+|\\\\\\\\|\\/|:|~|@|\\?|>|\\*|\\$|\\%[0-9]{2}|[0-9]|.)",
|
| 4 |
+
"special_tokens": {
|
| 5 |
+
"start": "^",
|
| 6 |
+
"end": "&",
|
| 7 |
+
"pad": "<PAD>",
|
| 8 |
+
"unknown": "?",
|
| 9 |
+
"mask": "<MASK>",
|
| 10 |
+
"sep": "<SEP>"
|
| 11 |
+
},
|
| 12 |
+
"chem_start_idx": 6
|
| 13 |
+
},
|
| 14 |
+
"vocabulary": [
|
| 15 |
+
"<PAD>",
|
| 16 |
+
"?",
|
| 17 |
+
"^",
|
| 18 |
+
"&",
|
| 19 |
+
"<MASK>",
|
| 20 |
+
"<SEP>",
|
| 21 |
+
"O",
|
| 22 |
+
"=",
|
| 23 |
+
"C",
|
| 24 |
+
"1",
|
| 25 |
+
"N",
|
| 26 |
+
"(",
|
| 27 |
+
"/",
|
| 28 |
+
"c",
|
| 29 |
+
"2",
|
| 30 |
+
"n",
|
| 31 |
+
"[nH]",
|
| 32 |
+
")",
|
| 33 |
+
"[C@H]",
|
| 34 |
+
"[C@@H]",
|
| 35 |
+
"3",
|
| 36 |
+
"Br",
|
| 37 |
+
"F",
|
| 38 |
+
"S",
|
| 39 |
+
"Cl",
|
| 40 |
+
"\\",
|
| 41 |
+
"[N+]",
|
| 42 |
+
"[O-]",
|
| 43 |
+
"#",
|
| 44 |
+
"4",
|
| 45 |
+
"s",
|
| 46 |
+
"-",
|
| 47 |
+
"o",
|
| 48 |
+
"5",
|
| 49 |
+
"[C@@]",
|
| 50 |
+
"[C@]",
|
| 51 |
+
"6",
|
| 52 |
+
"P",
|
| 53 |
+
"7"
|
| 54 |
+
]
|
| 55 |
+
}
|