Update README.md
Browse files
README.md
CHANGED
|
@@ -102,6 +102,8 @@ tokenizer.decode_with_trace(encoded)
|
|
| 102 |
```
|
| 103 |
|
| 104 |
**for SELFIES**
|
|
|
|
|
|
|
| 105 |
```python
|
| 106 |
from FastChemTokenizerHF import FastChemTokenizerSelfies
|
| 107 |
|
|
@@ -125,6 +127,29 @@ tokenizer.decode_with_trace(encoded)
|
|
| 125 |
# [005] ID= 1 β '</s>'
|
| 126 |
```
|
| 127 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
## π¦ Installation & Usage
|
| 129 |
|
| 130 |
0. Make sure you have all the reqs packages, possibly can be run with different versions
|
|
|
|
| 102 |
```
|
| 103 |
|
| 104 |
**for SELFIES**
|
| 105 |
+
Please don't use the old `FastChemTokenizer` for SELFIES, use the HF one
|
| 106 |
+
|
| 107 |
```python
|
| 108 |
from FastChemTokenizerHF import FastChemTokenizerSelfies
|
| 109 |
|
|
|
|
| 127 |
# [005] ID= 1 β '</s>'
|
| 128 |
```
|
| 129 |
|
| 130 |
+
#### BigSMILES (experimental)
|
| 131 |
+
```python
|
| 132 |
+
from FastChemTokenizer import FastChemTokenizer
|
| 133 |
+
|
| 134 |
+
tokenizer = FastChemTokenizer.from_pretrained("./bigsmiles-proto")
|
| 135 |
+
testentry = "*CC(*)c1ccccc1C(=O)OCCCCCC"
|
| 136 |
+
encoded = tokenizer.encode(testentry)
|
| 137 |
+
print("β
Encoded:", encoded)
|
| 138 |
+
decoded = tokenizer.decode(encoded)
|
| 139 |
+
print("β
Decoded:", decoded)
|
| 140 |
+
tokenizer.decode_with_trace(encoded)
|
| 141 |
+
|
| 142 |
+
# β
Encoded: [186, 185, 723, 31, 439]
|
| 143 |
+
# β
Decoded: *CC(*)c1ccccc1C(=O)OCCCCCC
|
| 144 |
+
#
|
| 145 |
+
# π Decoding 5 tokens:
|
| 146 |
+
# [000] ID= 186 β '*CC(*)'
|
| 147 |
+
# [001] ID= 185 β 'c1cccc'
|
| 148 |
+
# [002] ID= 723 β 'c1'
|
| 149 |
+
# [003] ID= 31 β 'C(=O)OCC'
|
| 150 |
+
# [004] ID= 439 β 'CCCC'
|
| 151 |
+
```
|
| 152 |
+
|
| 153 |
## π¦ Installation & Usage
|
| 154 |
|
| 155 |
0. Make sure you have all the reqs packages, possibly can be run with different versions
|