gabrielbianchin commited on
Commit
e2537a6
·
1 Parent(s): 9389567

update readme and tokenizer

Browse files
README.md CHANGED
@@ -45,7 +45,7 @@ tokenizer = AutoTokenizer.from_pretrained('SaeedLab/TITAN-BBB', subfolder='class
45
  model.eval()
46
 
47
  smiles = ["NCCc1nc(-c2ccccc2)cs1", "CC(=O)OCC(C)C"]
48
- inputs = tokenizer(smiles)
49
 
50
  with torch.no_grad():
51
  outputs = model(**inputs)
@@ -67,7 +67,7 @@ tokenizer = AutoTokenizer.from_pretrained('SaeedLab/TITAN-BBB', subfolder='regre
67
  model.eval()
68
 
69
  smiles = ["NCCc1nc(-c2ccccc2)cs1", "CC(=O)OCC(C)C"]
70
- inputs = tokenizer(smiles)
71
 
72
  with torch.no_grad():
73
  outputs = model(**inputs)
 
45
  model.eval()
46
 
47
  smiles = ["NCCc1nc(-c2ccccc2)cs1", "CC(=O)OCC(C)C"]
48
+ inputs = tokenizer(smiles, task='classification')
49
 
50
  with torch.no_grad():
51
  outputs = model(**inputs)
 
67
  model.eval()
68
 
69
  smiles = ["NCCc1nc(-c2ccccc2)cs1", "CC(=O)OCC(C)C"]
70
+ inputs = tokenizer(smiles, task='regression')
71
 
72
  with torch.no_grad():
73
  outputs = model(**inputs)
classification/tokenizer_bbb.py CHANGED
@@ -42,6 +42,7 @@ class BBBTokenizer(PreTrainedTokenizer):
42
  self.feature_transformer_tab = None
43
  self.feature_transformer_img = None
44
  self.feature_transformer_txt = None
 
45
 
46
  def generate_tab_features(self, smiles):
47
  mol = Chem.MolFromSmiles(smiles)
@@ -55,7 +56,7 @@ class BBBTokenizer(PreTrainedTokenizer):
55
  maccs = np.array(list(MACCSkeys.GenMACCSKeys(mol).ToBitString()), dtype=int)
56
  tab_input = np.concatenate([rdkit_2d, maccs])
57
  tab_input = self.feature_transformer_tab.transform(tab_input.reshape(1, -1))[0]
58
- tab_input = np.clip(tab_input, -1e3, 1e3)
59
  return torch.tensor(tab_input, dtype=torch.float32)
60
 
61
  def generate_img_features(self, smiles):
@@ -85,7 +86,7 @@ class BBBTokenizer(PreTrainedTokenizer):
85
  return_tensors: str = "pt",
86
  **kwargs
87
  ):
88
- if self.feature_transformer_tab is None and self.feature_transformer_img is None and self.feature_transformer_txt is None:
89
  if task == 'classification':
90
  model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_tabular.joblib"])
91
  transformer_tab_path = os.path.join(model_dir, "normalize_cls_tabular.joblib")
@@ -93,6 +94,7 @@ class BBBTokenizer(PreTrainedTokenizer):
93
  transformer_img_path = os.path.join(model_dir, "normalize_cls_image.joblib")
94
  model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_text.joblib"])
95
  transformer_txt_path = os.path.join(model_dir, "normalize_cls_text.joblib")
 
96
 
97
  elif task == 'regression':
98
  model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_tabular.joblib"])
@@ -101,6 +103,7 @@ class BBBTokenizer(PreTrainedTokenizer):
101
  transformer_img_path = os.path.join(model_dir, "normalize_reg_image.joblib")
102
  model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_text.joblib"])
103
  transformer_txt_path = os.path.join(model_dir, "normalize_reg_text.joblib")
 
104
 
105
  else:
106
  raise ValueError('task not defined')
 
42
  self.feature_transformer_tab = None
43
  self.feature_transformer_img = None
44
  self.feature_transformer_txt = None
45
+ self.task = None
46
 
47
  def generate_tab_features(self, smiles):
48
  mol = Chem.MolFromSmiles(smiles)
 
56
  maccs = np.array(list(MACCSkeys.GenMACCSKeys(mol).ToBitString()), dtype=int)
57
  tab_input = np.concatenate([rdkit_2d, maccs])
58
  tab_input = self.feature_transformer_tab.transform(tab_input.reshape(1, -1))[0]
59
+ tab_input = np.clip(tab_input, -1e5, 1e5)
60
  return torch.tensor(tab_input, dtype=torch.float32)
61
 
62
  def generate_img_features(self, smiles):
 
86
  return_tensors: str = "pt",
87
  **kwargs
88
  ):
89
+ if self.task is None or self.task != task:
90
  if task == 'classification':
91
  model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_tabular.joblib"])
92
  transformer_tab_path = os.path.join(model_dir, "normalize_cls_tabular.joblib")
 
94
  transformer_img_path = os.path.join(model_dir, "normalize_cls_image.joblib")
95
  model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_text.joblib"])
96
  transformer_txt_path = os.path.join(model_dir, "normalize_cls_text.joblib")
97
+ self.task = task
98
 
99
  elif task == 'regression':
100
  model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_tabular.joblib"])
 
103
  transformer_img_path = os.path.join(model_dir, "normalize_reg_image.joblib")
104
  model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_text.joblib"])
105
  transformer_txt_path = os.path.join(model_dir, "normalize_reg_text.joblib")
106
+ self.task = task
107
 
108
  else:
109
  raise ValueError('task not defined')
regression/tokenizer_bbb.py CHANGED
@@ -42,6 +42,7 @@ class BBBTokenizer(PreTrainedTokenizer):
42
  self.feature_transformer_tab = None
43
  self.feature_transformer_img = None
44
  self.feature_transformer_txt = None
 
45
 
46
  def generate_tab_features(self, smiles):
47
  mol = Chem.MolFromSmiles(smiles)
@@ -55,7 +56,7 @@ class BBBTokenizer(PreTrainedTokenizer):
55
  maccs = np.array(list(MACCSkeys.GenMACCSKeys(mol).ToBitString()), dtype=int)
56
  tab_input = np.concatenate([rdkit_2d, maccs])
57
  tab_input = self.feature_transformer_tab.transform(tab_input.reshape(1, -1))[0]
58
- tab_input = np.clip(tab_input, -1e3, 1e3)
59
  return torch.tensor(tab_input, dtype=torch.float32)
60
 
61
  def generate_img_features(self, smiles):
@@ -85,7 +86,7 @@ class BBBTokenizer(PreTrainedTokenizer):
85
  return_tensors: str = "pt",
86
  **kwargs
87
  ):
88
- if self.feature_transformer_tab is None and self.feature_transformer_img is None and self.feature_transformer_txt is None:
89
  if task == 'classification':
90
  model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_tabular.joblib"])
91
  transformer_tab_path = os.path.join(model_dir, "normalize_cls_tabular.joblib")
@@ -93,6 +94,7 @@ class BBBTokenizer(PreTrainedTokenizer):
93
  transformer_img_path = os.path.join(model_dir, "normalize_cls_image.joblib")
94
  model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_text.joblib"])
95
  transformer_txt_path = os.path.join(model_dir, "normalize_cls_text.joblib")
 
96
 
97
  elif task == 'regression':
98
  model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_tabular.joblib"])
@@ -101,6 +103,7 @@ class BBBTokenizer(PreTrainedTokenizer):
101
  transformer_img_path = os.path.join(model_dir, "normalize_reg_image.joblib")
102
  model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_text.joblib"])
103
  transformer_txt_path = os.path.join(model_dir, "normalize_reg_text.joblib")
 
104
 
105
  else:
106
  raise ValueError('task not defined')
 
42
  self.feature_transformer_tab = None
43
  self.feature_transformer_img = None
44
  self.feature_transformer_txt = None
45
+ self.task = None
46
 
47
  def generate_tab_features(self, smiles):
48
  mol = Chem.MolFromSmiles(smiles)
 
56
  maccs = np.array(list(MACCSkeys.GenMACCSKeys(mol).ToBitString()), dtype=int)
57
  tab_input = np.concatenate([rdkit_2d, maccs])
58
  tab_input = self.feature_transformer_tab.transform(tab_input.reshape(1, -1))[0]
59
+ tab_input = np.clip(tab_input, -1e5, 1e5)
60
  return torch.tensor(tab_input, dtype=torch.float32)
61
 
62
  def generate_img_features(self, smiles):
 
86
  return_tensors: str = "pt",
87
  **kwargs
88
  ):
89
+ if self.task is None or self.task != task:
90
  if task == 'classification':
91
  model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_tabular.joblib"])
92
  transformer_tab_path = os.path.join(model_dir, "normalize_cls_tabular.joblib")
 
94
  transformer_img_path = os.path.join(model_dir, "normalize_cls_image.joblib")
95
  model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_text.joblib"])
96
  transformer_txt_path = os.path.join(model_dir, "normalize_cls_text.joblib")
97
+ self.task = task
98
 
99
  elif task == 'regression':
100
  model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_tabular.joblib"])
 
103
  transformer_img_path = os.path.join(model_dir, "normalize_reg_image.joblib")
104
  model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_text.joblib"])
105
  transformer_txt_path = os.path.join(model_dir, "normalize_reg_text.joblib")
106
+ self.task = task
107
 
108
  else:
109
  raise ValueError('task not defined')
tokenizer_bbb.py CHANGED
@@ -42,6 +42,7 @@ class BBBTokenizer(PreTrainedTokenizer):
42
  self.feature_transformer_tab = None
43
  self.feature_transformer_img = None
44
  self.feature_transformer_txt = None
 
45
 
46
  def generate_tab_features(self, smiles):
47
  mol = Chem.MolFromSmiles(smiles)
@@ -85,7 +86,7 @@ class BBBTokenizer(PreTrainedTokenizer):
85
  return_tensors: str = "pt",
86
  **kwargs
87
  ):
88
- if self.feature_transformer_tab is None and self.feature_transformer_img is None and self.feature_transformer_txt is None:
89
  if task == 'classification':
90
  model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_tabular.joblib"])
91
  transformer_tab_path = os.path.join(model_dir, "normalize_cls_tabular.joblib")
@@ -93,6 +94,7 @@ class BBBTokenizer(PreTrainedTokenizer):
93
  transformer_img_path = os.path.join(model_dir, "normalize_cls_image.joblib")
94
  model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_text.joblib"])
95
  transformer_txt_path = os.path.join(model_dir, "normalize_cls_text.joblib")
 
96
 
97
  elif task == 'regression':
98
  model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_tabular.joblib"])
@@ -101,6 +103,7 @@ class BBBTokenizer(PreTrainedTokenizer):
101
  transformer_img_path = os.path.join(model_dir, "normalize_reg_image.joblib")
102
  model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_text.joblib"])
103
  transformer_txt_path = os.path.join(model_dir, "normalize_reg_text.joblib")
 
104
 
105
  else:
106
  raise ValueError('task not defined')
 
42
  self.feature_transformer_tab = None
43
  self.feature_transformer_img = None
44
  self.feature_transformer_txt = None
45
+ self.task = None
46
 
47
  def generate_tab_features(self, smiles):
48
  mol = Chem.MolFromSmiles(smiles)
 
86
  return_tensors: str = "pt",
87
  **kwargs
88
  ):
89
+ if self.task is None or self.task != task:
90
  if task == 'classification':
91
  model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_tabular.joblib"])
92
  transformer_tab_path = os.path.join(model_dir, "normalize_cls_tabular.joblib")
 
94
  transformer_img_path = os.path.join(model_dir, "normalize_cls_image.joblib")
95
  model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_text.joblib"])
96
  transformer_txt_path = os.path.join(model_dir, "normalize_cls_text.joblib")
97
+ self.task = task
98
 
99
  elif task == 'regression':
100
  model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_tabular.joblib"])
 
103
  transformer_img_path = os.path.join(model_dir, "normalize_reg_image.joblib")
104
  model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_text.joblib"])
105
  transformer_txt_path = os.path.join(model_dir, "normalize_reg_text.joblib")
106
+ self.task = task
107
 
108
  else:
109
  raise ValueError('task not defined')