gabrielbianchin commited on
Commit
b24b48a
·
1 Parent(s): a39c0d4
classification/tokenizer_bbb.py CHANGED
@@ -39,17 +39,6 @@ class BBBTokenizer(PreTrainedTokenizer):
39
  )
40
  ])
41
 
42
- model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_tabular.joblib"])
43
- transformer_tab_path = os.path.join(model_dir, "normalize_cls_tabular.joblib")
44
- model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_image.joblib"])
45
- transformer_img_path = os.path.join(model_dir, "normalize_cls_image.joblib")
46
- model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_text.joblib"])
47
- transformer_txt_path = os.path.join(model_dir, "normalize_cls_text.joblib")
48
-
49
- self.feature_transformer_tab = joblib.load(transformer_tab_path)
50
- self.feature_transformer_img = joblib.load(transformer_img_path)
51
- self.feature_transformer_txt = joblib.load(transformer_txt_path)
52
-
53
  def generate_tab_features(self, smiles):
54
  mol = Chem.MolFromSmiles(smiles)
55
 
@@ -87,9 +76,34 @@ class BBBTokenizer(PreTrainedTokenizer):
87
  def _batch_encode_plus(
88
  self,
89
  batch_smiles: list[str],
 
90
  return_tensors: str = "pt",
91
  **kwargs
92
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  data_list = []
94
  tab, img, txt = [], [], []
95
 
@@ -111,15 +125,17 @@ class BBBTokenizer(PreTrainedTokenizer):
111
 
112
  def encode(self,
113
  batch_smiles: list[str],
 
114
  return_tensors: str = "pt",
115
  **kwargs):
116
- return self._batch_encode_plus(batch_smiles, return_tensors, **kwargs)
117
 
118
  def __call__(self,
119
  batch_smiles: list[str],
 
120
  return_tensors: str = "pt",
121
  **kwargs):
122
- return self._batch_encode_plus(batch_smiles, return_tensors, **kwargs)
123
 
124
  def _tokenize(self, text, **kwargs):
125
  return []
 
39
  )
40
  ])
41
 
 
 
 
 
 
 
 
 
 
 
 
42
  def generate_tab_features(self, smiles):
43
  mol = Chem.MolFromSmiles(smiles)
44
 
 
76
  def _batch_encode_plus(
77
  self,
78
  batch_smiles: list[str],
79
+ task: str = 'classification',
80
  return_tensors: str = "pt",
81
  **kwargs
82
  ):
83
+ if task == 'classification':
84
+ model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_tabular.joblib"])
85
+ transformer_tab_path = os.path.join(model_dir, "normalize_cls_tabular.joblib")
86
+ model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_image.joblib"])
87
+ transformer_img_path = os.path.join(model_dir, "normalize_cls_image.joblib")
88
+ model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_text.joblib"])
89
+ transformer_txt_path = os.path.join(model_dir, "normalize_cls_text.joblib")
90
+
91
+ elif task == 'regression':
92
+ model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_tabular.joblib"])
93
+ transformer_tab_path = os.path.join(model_dir, "normalize_reg_tabular.joblib")
94
+ model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_image.joblib"])
95
+ transformer_img_path = os.path.join(model_dir, "normalize_reg_image.joblib")
96
+ model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_text.joblib"])
97
+ transformer_txt_path = os.path.join(model_dir, "normalize_reg_text.joblib")
98
+
99
+ else:
100
+ raise ValueError('task not defined')
101
+ return
102
+
103
+ self.feature_transformer_tab = joblib.load(transformer_tab_path)
104
+ self.feature_transformer_img = joblib.load(transformer_img_path)
105
+ self.feature_transformer_txt = joblib.load(transformer_txt_path)
106
+
107
  data_list = []
108
  tab, img, txt = [], [], []
109
 
 
125
 
126
  def encode(self,
127
  batch_smiles: list[str],
128
+ task: str = 'classification',
129
  return_tensors: str = "pt",
130
  **kwargs):
131
+ return self._batch_encode_plus(batch_smiles, task, return_tensors, **kwargs)
132
 
133
  def __call__(self,
134
  batch_smiles: list[str],
135
+ task: str = 'classification',
136
  return_tensors: str = "pt",
137
  **kwargs):
138
+ return self._batch_encode_plus(batch_smiles, task, return_tensors, **kwargs)
139
 
140
  def _tokenize(self, text, **kwargs):
141
  return []
regression/tokenizer_bbb.py CHANGED
@@ -39,17 +39,6 @@ class BBBTokenizer(PreTrainedTokenizer):
39
  )
40
  ])
41
 
42
- model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_tabular.joblib"])
43
- transformer_tab_path = os.path.join(model_dir, "normalize_reg_tabular.joblib")
44
- model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_image.joblib"])
45
- transformer_img_path = os.path.join(model_dir, "normalize_reg_image.joblib")
46
- model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_text.joblib"])
47
- transformer_txt_path = os.path.join(model_dir, "normalize_reg_text.joblib")
48
-
49
- self.feature_transformer_tab = joblib.load(transformer_tab_path)
50
- self.feature_transformer_img = joblib.load(transformer_img_path)
51
- self.feature_transformer_txt = joblib.load(transformer_txt_path)
52
-
53
  def generate_tab_features(self, smiles):
54
  mol = Chem.MolFromSmiles(smiles)
55
 
@@ -87,9 +76,34 @@ class BBBTokenizer(PreTrainedTokenizer):
87
  def _batch_encode_plus(
88
  self,
89
  batch_smiles: list[str],
 
90
  return_tensors: str = "pt",
91
  **kwargs
92
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  data_list = []
94
  tab, img, txt = [], [], []
95
 
@@ -111,15 +125,17 @@ class BBBTokenizer(PreTrainedTokenizer):
111
 
112
  def encode(self,
113
  batch_smiles: list[str],
 
114
  return_tensors: str = "pt",
115
  **kwargs):
116
- return self._batch_encode_plus(batch_smiles, return_tensors, **kwargs)
117
 
118
  def __call__(self,
119
  batch_smiles: list[str],
 
120
  return_tensors: str = "pt",
121
  **kwargs):
122
- return self._batch_encode_plus(batch_smiles, return_tensors, **kwargs)
123
 
124
  def _tokenize(self, text, **kwargs):
125
  return []
 
39
  )
40
  ])
41
 
 
 
 
 
 
 
 
 
 
 
 
42
  def generate_tab_features(self, smiles):
43
  mol = Chem.MolFromSmiles(smiles)
44
 
 
76
  def _batch_encode_plus(
77
  self,
78
  batch_smiles: list[str],
79
+ task: str = 'classification',
80
  return_tensors: str = "pt",
81
  **kwargs
82
  ):
83
+ if task == 'classification':
84
+ model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_tabular.joblib"])
85
+ transformer_tab_path = os.path.join(model_dir, "normalize_cls_tabular.joblib")
86
+ model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_image.joblib"])
87
+ transformer_img_path = os.path.join(model_dir, "normalize_cls_image.joblib")
88
+ model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_text.joblib"])
89
+ transformer_txt_path = os.path.join(model_dir, "normalize_cls_text.joblib")
90
+
91
+ elif task == 'regression':
92
+ model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_tabular.joblib"])
93
+ transformer_tab_path = os.path.join(model_dir, "normalize_reg_tabular.joblib")
94
+ model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_image.joblib"])
95
+ transformer_img_path = os.path.join(model_dir, "normalize_reg_image.joblib")
96
+ model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_text.joblib"])
97
+ transformer_txt_path = os.path.join(model_dir, "normalize_reg_text.joblib")
98
+
99
+ else:
100
+ raise ValueError('task not defined')
101
+ return
102
+
103
+ self.feature_transformer_tab = joblib.load(transformer_tab_path)
104
+ self.feature_transformer_img = joblib.load(transformer_img_path)
105
+ self.feature_transformer_txt = joblib.load(transformer_txt_path)
106
+
107
  data_list = []
108
  tab, img, txt = [], [], []
109
 
 
125
 
126
  def encode(self,
127
  batch_smiles: list[str],
128
+ task: str = 'classification',
129
  return_tensors: str = "pt",
130
  **kwargs):
131
+ return self._batch_encode_plus(batch_smiles, task, return_tensors, **kwargs)
132
 
133
  def __call__(self,
134
  batch_smiles: list[str],
135
+ task: str = 'classification',
136
  return_tensors: str = "pt",
137
  **kwargs):
138
+ return self._batch_encode_plus(batch_smiles, task, return_tensors, **kwargs)
139
 
140
  def _tokenize(self, text, **kwargs):
141
  return []
tokenizer_bbb.py CHANGED
@@ -39,17 +39,6 @@ class BBBTokenizer(PreTrainedTokenizer):
39
  )
40
  ])
41
 
42
- model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_tabular.joblib"])
43
- transformer_tab_path = os.path.join(model_dir, "normalize_cls_tabular.joblib")
44
- model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_image.joblib"])
45
- transformer_img_path = os.path.join(model_dir, "normalize_cls_image.joblib")
46
- model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_text.joblib"])
47
- transformer_txt_path = os.path.join(model_dir, "normalize_cls_text.joblib")
48
-
49
- self.feature_transformer_tab = joblib.load(transformer_tab_path)
50
- self.feature_transformer_img = joblib.load(transformer_img_path)
51
- self.feature_transformer_txt = joblib.load(transformer_txt_path)
52
-
53
  def generate_tab_features(self, smiles):
54
  mol = Chem.MolFromSmiles(smiles)
55
 
@@ -87,9 +76,34 @@ class BBBTokenizer(PreTrainedTokenizer):
87
  def _batch_encode_plus(
88
  self,
89
  batch_smiles: list[str],
 
90
  return_tensors: str = "pt",
91
  **kwargs
92
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  data_list = []
94
  tab, img, txt = [], [], []
95
 
@@ -111,15 +125,17 @@ class BBBTokenizer(PreTrainedTokenizer):
111
 
112
  def encode(self,
113
  batch_smiles: list[str],
 
114
  return_tensors: str = "pt",
115
  **kwargs):
116
- return self._batch_encode_plus(batch_smiles, return_tensors, **kwargs)
117
 
118
  def __call__(self,
119
  batch_smiles: list[str],
 
120
  return_tensors: str = "pt",
121
  **kwargs):
122
- return self._batch_encode_plus(batch_smiles, return_tensors, **kwargs)
123
 
124
  def _tokenize(self, text, **kwargs):
125
  return []
 
39
  )
40
  ])
41
 
 
 
 
 
 
 
 
 
 
 
 
42
  def generate_tab_features(self, smiles):
43
  mol = Chem.MolFromSmiles(smiles)
44
 
 
76
  def _batch_encode_plus(
77
  self,
78
  batch_smiles: list[str],
79
+ task: str = 'classification',
80
  return_tensors: str = "pt",
81
  **kwargs
82
  ):
83
+ if task == 'classification':
84
+ model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_tabular.joblib"])
85
+ transformer_tab_path = os.path.join(model_dir, "normalize_cls_tabular.joblib")
86
+ model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_image.joblib"])
87
+ transformer_img_path = os.path.join(model_dir, "normalize_cls_image.joblib")
88
+ model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_cls_text.joblib"])
89
+ transformer_txt_path = os.path.join(model_dir, "normalize_cls_text.joblib")
90
+
91
+ elif task == 'regression':
92
+ model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_tabular.joblib"])
93
+ transformer_tab_path = os.path.join(model_dir, "normalize_reg_tabular.joblib")
94
+ model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_image.joblib"])
95
+ transformer_img_path = os.path.join(model_dir, "normalize_reg_image.joblib")
96
+ model_dir = snapshot_download("SaeedLab/TITAN-BBB", allow_patterns=["normalize_reg_text.joblib"])
97
+ transformer_txt_path = os.path.join(model_dir, "normalize_reg_text.joblib")
98
+
99
+ else:
100
+ raise ValueError('task not defined')
101
+ return
102
+
103
+ self.feature_transformer_tab = joblib.load(transformer_tab_path)
104
+ self.feature_transformer_img = joblib.load(transformer_img_path)
105
+ self.feature_transformer_txt = joblib.load(transformer_txt_path)
106
+
107
  data_list = []
108
  tab, img, txt = [], [], []
109
 
 
125
 
126
  def encode(self,
127
  batch_smiles: list[str],
128
+ task: str = 'classification',
129
  return_tensors: str = "pt",
130
  **kwargs):
131
+ return self._batch_encode_plus(batch_smiles, task, return_tensors, **kwargs)
132
 
133
  def __call__(self,
134
  batch_smiles: list[str],
135
+ task: str = 'classification',
136
  return_tensors: str = "pt",
137
  **kwargs):
138
+ return self._batch_encode_plus(batch_smiles, task, return_tensors, **kwargs)
139
 
140
  def _tokenize(self, text, **kwargs):
141
  return []