Commit
·
e2537a6
1
Parent(s):
9389567
update readme and tokenizer
Browse files- README.md +2 -2
- classification/tokenizer_bbb.py +5 -2
- regression/tokenizer_bbb.py +5 -2
- tokenizer_bbb.py +4 -1
README.md
CHANGED
|
@@ -45,7 +45,7 @@ tokenizer = AutoTokenizer.from_pretrained('SaeedLab/TITAN-BBB', subfolder='class
|
|
| 45 |
model.eval()
|
| 46 |
|
| 47 |
smiles = ["NCCc1nc(-c2ccccc2)cs1", "CC(=O)OCC(C)C"]
|
| 48 |
-
inputs = tokenizer(smiles)
|
| 49 |
|
| 50 |
with torch.no_grad():
|
| 51 |
outputs = model(**inputs)
|
|
@@ -67,7 +67,7 @@ tokenizer = AutoTokenizer.from_pretrained('SaeedLab/TITAN-BBB', subfolder='regre
|
|
| 67 |
model.eval()
|
| 68 |
|
| 69 |
smiles = ["NCCc1nc(-c2ccccc2)cs1", "CC(=O)OCC(C)C"]
|
| 70 |
-
inputs = tokenizer(smiles)
|
| 71 |
|
| 72 |
with torch.no_grad():
|
| 73 |
outputs = model(**inputs)
|
|
|
|
| 45 |
model.eval()
|
| 46 |
|
| 47 |
smiles = ["NCCc1nc(-c2ccccc2)cs1", "CC(=O)OCC(C)C"]
|
| 48 |
+
inputs = tokenizer(smiles, task='classification')
|
| 49 |
|
| 50 |
with torch.no_grad():
|
| 51 |
outputs = model(**inputs)
|
|
|
|
| 67 |
model.eval()
|
| 68 |
|
| 69 |
smiles = ["NCCc1nc(-c2ccccc2)cs1", "CC(=O)OCC(C)C"]
|
| 70 |
+
inputs = tokenizer(smiles, task='regression')
|
| 71 |
|
| 72 |
with torch.no_grad():
|
| 73 |
outputs = model(**inputs)
|
classification/tokenizer_bbb.py
CHANGED
|
@@ -42,6 +42,7 @@ class BBBTokenizer(PreTrainedTokenizer):
|
|
| 42 |
self.feature_transformer_tab = None
|
| 43 |
self.feature_transformer_img = None
|
| 44 |
self.feature_transformer_txt = None
|
|
|
|
| 45 |
|
| 46 |
def generate_tab_features(self, smiles):
|
| 47 |
mol = Chem.MolFromSmiles(smiles)
|
|
@@ -55,7 +56,7 @@ class BBBTokenizer(PreTrainedTokenizer):
|
|
| 55 |
maccs = np.array(list(MACCSkeys.GenMACCSKeys(mol).ToBitString()), dtype=int)
|
| 56 |
tab_input = np.concatenate([rdkit_2d, maccs])
|
| 57 |
tab_input = self.feature_transformer_tab.transform(tab_input.reshape(1, -1))[0]
|
| 58 |
-
tab_input = np.clip(tab_input, -
|
| 59 |
return torch.tensor(tab_input, dtype=torch.float32)
|
| 60 |
|
| 61 |
def generate_img_features(self, smiles):
|
|
@@ -85,7 +86,7 @@ class BBBTokenizer(PreTrainedTokenizer):
|
|
| 85 |
return_tensors: str = "pt",
|
| 86 |
**kwargs
|
| 87 |
):
|
| 88 |
-
if self.
|
| 89 |
if task == 'classification':
|
| 90 |
model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_tabular.joblib"])
|
| 91 |
transformer_tab_path = os.path.join(model_dir, "normalize_cls_tabular.joblib")
|
|
@@ -93,6 +94,7 @@ class BBBTokenizer(PreTrainedTokenizer):
|
|
| 93 |
transformer_img_path = os.path.join(model_dir, "normalize_cls_image.joblib")
|
| 94 |
model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_text.joblib"])
|
| 95 |
transformer_txt_path = os.path.join(model_dir, "normalize_cls_text.joblib")
|
|
|
|
| 96 |
|
| 97 |
elif task == 'regression':
|
| 98 |
model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_tabular.joblib"])
|
|
@@ -101,6 +103,7 @@ class BBBTokenizer(PreTrainedTokenizer):
|
|
| 101 |
transformer_img_path = os.path.join(model_dir, "normalize_reg_image.joblib")
|
| 102 |
model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_text.joblib"])
|
| 103 |
transformer_txt_path = os.path.join(model_dir, "normalize_reg_text.joblib")
|
|
|
|
| 104 |
|
| 105 |
else:
|
| 106 |
raise ValueError('task not defined')
|
|
|
|
| 42 |
self.feature_transformer_tab = None
|
| 43 |
self.feature_transformer_img = None
|
| 44 |
self.feature_transformer_txt = None
|
| 45 |
+
self.task = None
|
| 46 |
|
| 47 |
def generate_tab_features(self, smiles):
|
| 48 |
mol = Chem.MolFromSmiles(smiles)
|
|
|
|
| 56 |
maccs = np.array(list(MACCSkeys.GenMACCSKeys(mol).ToBitString()), dtype=int)
|
| 57 |
tab_input = np.concatenate([rdkit_2d, maccs])
|
| 58 |
tab_input = self.feature_transformer_tab.transform(tab_input.reshape(1, -1))[0]
|
| 59 |
+
tab_input = np.clip(tab_input, -1e5, 1e5)
|
| 60 |
return torch.tensor(tab_input, dtype=torch.float32)
|
| 61 |
|
| 62 |
def generate_img_features(self, smiles):
|
|
|
|
| 86 |
return_tensors: str = "pt",
|
| 87 |
**kwargs
|
| 88 |
):
|
| 89 |
+
if self.task is None or self.task != task:
|
| 90 |
if task == 'classification':
|
| 91 |
model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_tabular.joblib"])
|
| 92 |
transformer_tab_path = os.path.join(model_dir, "normalize_cls_tabular.joblib")
|
|
|
|
| 94 |
transformer_img_path = os.path.join(model_dir, "normalize_cls_image.joblib")
|
| 95 |
model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_text.joblib"])
|
| 96 |
transformer_txt_path = os.path.join(model_dir, "normalize_cls_text.joblib")
|
| 97 |
+
self.task = task
|
| 98 |
|
| 99 |
elif task == 'regression':
|
| 100 |
model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_tabular.joblib"])
|
|
|
|
| 103 |
transformer_img_path = os.path.join(model_dir, "normalize_reg_image.joblib")
|
| 104 |
model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_text.joblib"])
|
| 105 |
transformer_txt_path = os.path.join(model_dir, "normalize_reg_text.joblib")
|
| 106 |
+
self.task = task
|
| 107 |
|
| 108 |
else:
|
| 109 |
raise ValueError('task not defined')
|
regression/tokenizer_bbb.py
CHANGED
|
@@ -42,6 +42,7 @@ class BBBTokenizer(PreTrainedTokenizer):
|
|
| 42 |
self.feature_transformer_tab = None
|
| 43 |
self.feature_transformer_img = None
|
| 44 |
self.feature_transformer_txt = None
|
|
|
|
| 45 |
|
| 46 |
def generate_tab_features(self, smiles):
|
| 47 |
mol = Chem.MolFromSmiles(smiles)
|
|
@@ -55,7 +56,7 @@ class BBBTokenizer(PreTrainedTokenizer):
|
|
| 55 |
maccs = np.array(list(MACCSkeys.GenMACCSKeys(mol).ToBitString()), dtype=int)
|
| 56 |
tab_input = np.concatenate([rdkit_2d, maccs])
|
| 57 |
tab_input = self.feature_transformer_tab.transform(tab_input.reshape(1, -1))[0]
|
| 58 |
-
tab_input = np.clip(tab_input, -
|
| 59 |
return torch.tensor(tab_input, dtype=torch.float32)
|
| 60 |
|
| 61 |
def generate_img_features(self, smiles):
|
|
@@ -85,7 +86,7 @@ class BBBTokenizer(PreTrainedTokenizer):
|
|
| 85 |
return_tensors: str = "pt",
|
| 86 |
**kwargs
|
| 87 |
):
|
| 88 |
-
if self.
|
| 89 |
if task == 'classification':
|
| 90 |
model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_tabular.joblib"])
|
| 91 |
transformer_tab_path = os.path.join(model_dir, "normalize_cls_tabular.joblib")
|
|
@@ -93,6 +94,7 @@ class BBBTokenizer(PreTrainedTokenizer):
|
|
| 93 |
transformer_img_path = os.path.join(model_dir, "normalize_cls_image.joblib")
|
| 94 |
model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_text.joblib"])
|
| 95 |
transformer_txt_path = os.path.join(model_dir, "normalize_cls_text.joblib")
|
|
|
|
| 96 |
|
| 97 |
elif task == 'regression':
|
| 98 |
model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_tabular.joblib"])
|
|
@@ -101,6 +103,7 @@ class BBBTokenizer(PreTrainedTokenizer):
|
|
| 101 |
transformer_img_path = os.path.join(model_dir, "normalize_reg_image.joblib")
|
| 102 |
model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_text.joblib"])
|
| 103 |
transformer_txt_path = os.path.join(model_dir, "normalize_reg_text.joblib")
|
|
|
|
| 104 |
|
| 105 |
else:
|
| 106 |
raise ValueError('task not defined')
|
|
|
|
| 42 |
self.feature_transformer_tab = None
|
| 43 |
self.feature_transformer_img = None
|
| 44 |
self.feature_transformer_txt = None
|
| 45 |
+
self.task = None
|
| 46 |
|
| 47 |
def generate_tab_features(self, smiles):
|
| 48 |
mol = Chem.MolFromSmiles(smiles)
|
|
|
|
| 56 |
maccs = np.array(list(MACCSkeys.GenMACCSKeys(mol).ToBitString()), dtype=int)
|
| 57 |
tab_input = np.concatenate([rdkit_2d, maccs])
|
| 58 |
tab_input = self.feature_transformer_tab.transform(tab_input.reshape(1, -1))[0]
|
| 59 |
+
tab_input = np.clip(tab_input, -1e5, 1e5)
|
| 60 |
return torch.tensor(tab_input, dtype=torch.float32)
|
| 61 |
|
| 62 |
def generate_img_features(self, smiles):
|
|
|
|
| 86 |
return_tensors: str = "pt",
|
| 87 |
**kwargs
|
| 88 |
):
|
| 89 |
+
if self.task is None or self.task != task:
|
| 90 |
if task == 'classification':
|
| 91 |
model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_tabular.joblib"])
|
| 92 |
transformer_tab_path = os.path.join(model_dir, "normalize_cls_tabular.joblib")
|
|
|
|
| 94 |
transformer_img_path = os.path.join(model_dir, "normalize_cls_image.joblib")
|
| 95 |
model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_text.joblib"])
|
| 96 |
transformer_txt_path = os.path.join(model_dir, "normalize_cls_text.joblib")
|
| 97 |
+
self.task = task
|
| 98 |
|
| 99 |
elif task == 'regression':
|
| 100 |
model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_tabular.joblib"])
|
|
|
|
| 103 |
transformer_img_path = os.path.join(model_dir, "normalize_reg_image.joblib")
|
| 104 |
model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_text.joblib"])
|
| 105 |
transformer_txt_path = os.path.join(model_dir, "normalize_reg_text.joblib")
|
| 106 |
+
self.task = task
|
| 107 |
|
| 108 |
else:
|
| 109 |
raise ValueError('task not defined')
|
tokenizer_bbb.py
CHANGED
|
@@ -42,6 +42,7 @@ class BBBTokenizer(PreTrainedTokenizer):
|
|
| 42 |
self.feature_transformer_tab = None
|
| 43 |
self.feature_transformer_img = None
|
| 44 |
self.feature_transformer_txt = None
|
|
|
|
| 45 |
|
| 46 |
def generate_tab_features(self, smiles):
|
| 47 |
mol = Chem.MolFromSmiles(smiles)
|
|
@@ -85,7 +86,7 @@ class BBBTokenizer(PreTrainedTokenizer):
|
|
| 85 |
return_tensors: str = "pt",
|
| 86 |
**kwargs
|
| 87 |
):
|
| 88 |
-
if self.
|
| 89 |
if task == 'classification':
|
| 90 |
model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_tabular.joblib"])
|
| 91 |
transformer_tab_path = os.path.join(model_dir, "normalize_cls_tabular.joblib")
|
|
@@ -93,6 +94,7 @@ class BBBTokenizer(PreTrainedTokenizer):
|
|
| 93 |
transformer_img_path = os.path.join(model_dir, "normalize_cls_image.joblib")
|
| 94 |
model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_text.joblib"])
|
| 95 |
transformer_txt_path = os.path.join(model_dir, "normalize_cls_text.joblib")
|
|
|
|
| 96 |
|
| 97 |
elif task == 'regression':
|
| 98 |
model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_tabular.joblib"])
|
|
@@ -101,6 +103,7 @@ class BBBTokenizer(PreTrainedTokenizer):
|
|
| 101 |
transformer_img_path = os.path.join(model_dir, "normalize_reg_image.joblib")
|
| 102 |
model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_text.joblib"])
|
| 103 |
transformer_txt_path = os.path.join(model_dir, "normalize_reg_text.joblib")
|
|
|
|
| 104 |
|
| 105 |
else:
|
| 106 |
raise ValueError('task not defined')
|
|
|
|
| 42 |
self.feature_transformer_tab = None
|
| 43 |
self.feature_transformer_img = None
|
| 44 |
self.feature_transformer_txt = None
|
| 45 |
+
self.task = None
|
| 46 |
|
| 47 |
def generate_tab_features(self, smiles):
|
| 48 |
mol = Chem.MolFromSmiles(smiles)
|
|
|
|
| 86 |
return_tensors: str = "pt",
|
| 87 |
**kwargs
|
| 88 |
):
|
| 89 |
+
if self.task is None or self.task != task:
|
| 90 |
if task == 'classification':
|
| 91 |
model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_tabular.joblib"])
|
| 92 |
transformer_tab_path = os.path.join(model_dir, "normalize_cls_tabular.joblib")
|
|
|
|
| 94 |
transformer_img_path = os.path.join(model_dir, "normalize_cls_image.joblib")
|
| 95 |
model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_text.joblib"])
|
| 96 |
transformer_txt_path = os.path.join(model_dir, "normalize_cls_text.joblib")
|
| 97 |
+
self.task = task
|
| 98 |
|
| 99 |
elif task == 'regression':
|
| 100 |
model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_tabular.joblib"])
|
|
|
|
| 103 |
transformer_img_path = os.path.join(model_dir, "normalize_reg_image.joblib")
|
| 104 |
model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_text.joblib"])
|
| 105 |
transformer_txt_path = os.path.join(model_dir, "normalize_reg_text.joblib")
|
| 106 |
+
self.task = task
|
| 107 |
|
| 108 |
else:
|
| 109 |
raise ValueError('task not defined')
|