Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
e98af12
1
Parent(s):
c70f35b
load the adapters firstly
Browse files
app.py
CHANGED
|
@@ -19,7 +19,7 @@ def get_models():
|
|
| 19 |
|
| 20 |
candidate_models = get_models()
|
| 21 |
properties = list(candidate_models.keys())
|
| 22 |
-
model = MolecularPropertyPredictionModel()
|
| 23 |
|
| 24 |
def get_description(property_name):
|
| 25 |
return dataset_descriptions[property_name]
|
|
|
|
| 19 |
|
| 20 |
candidate_models = get_models()
|
| 21 |
properties = list(candidate_models.keys())
|
| 22 |
+
model = MolecularPropertyPredictionModel(candidate_models)
|
| 23 |
|
| 24 |
def get_description(property_name):
|
| 25 |
return dataset_descriptions[property_name]
|
utils.py
CHANGED
|
@@ -14,7 +14,7 @@ import os
|
|
| 14 |
import pickle
|
| 15 |
from sklearn import preprocessing
|
| 16 |
import json
|
| 17 |
-
import spaces
|
| 18 |
|
| 19 |
from rdkit import RDLogger, Chem
|
| 20 |
# Suppress RDKit INFO messages
|
|
@@ -145,7 +145,7 @@ class DataCollator(object):
|
|
| 145 |
return data_dict
|
| 146 |
|
| 147 |
class MolecularPropertyPredictionModel():
|
| 148 |
-
def __init__(self):
|
| 149 |
self.adapter_name = None
|
| 150 |
|
| 151 |
# we need to keep track of the paths of adapter scalers
|
|
@@ -166,10 +166,17 @@ class MolecularPropertyPredictionModel():
|
|
| 166 |
self.base_model = AutoModelForSequenceClassification.from_pretrained(
|
| 167 |
"ChemFM/ChemFM-3B",
|
| 168 |
config=config,
|
| 169 |
-
device_map="
|
| 170 |
trust_remote_code=True,
|
| 171 |
token = os.environ.get("TOKEN")
|
| 172 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
|
| 174 |
# load the tokenizer
|
| 175 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
|
@@ -203,30 +210,35 @@ class MolecularPropertyPredictionModel():
|
|
| 203 |
return "keep"
|
| 204 |
# switch adapter
|
| 205 |
try:
|
| 206 |
-
self.adapter_name = adapter_name
|
| 207 |
-
print(self.adapter_name, adapter_id)
|
| 208 |
-
self.lora_model = PeftModel.from_pretrained(self.base_model, adapter_id, token = os.environ.get("TOKEN"))
|
| 209 |
-
self.lora_model.to("cuda")
|
| 210 |
-
print(self.lora_model)
|
| 211 |
-
|
| 212 |
-
|
|
|
|
|
|
|
|
|
|
| 213 |
if os.path.exists(self.apapter_scaler_path[adapter_name]):
|
| 214 |
self.scaler = pickle.load(open(self.apapter_scaler_path[adapter_name], "rb"))
|
| 215 |
else:
|
| 216 |
self.scaler = None
|
|
|
|
|
|
|
| 217 |
|
| 218 |
return "switched"
|
| 219 |
except Exception as e:
|
| 220 |
# handle error
|
| 221 |
return "error"
|
| 222 |
|
| 223 |
-
|
| 224 |
def predict(self, valid_df, task_type):
|
| 225 |
test_dataset = Dataset.from_pandas(valid_df)
|
| 226 |
# construct the dataloader
|
| 227 |
test_loader = torch.utils.data.DataLoader(
|
| 228 |
test_dataset,
|
| 229 |
-
batch_size=
|
| 230 |
collate_fn=self.data_collator,
|
| 231 |
)
|
| 232 |
# predict
|
|
@@ -234,8 +246,8 @@ class MolecularPropertyPredictionModel():
|
|
| 234 |
y_pred = []
|
| 235 |
for i, batch in tqdm(enumerate(test_loader), total=len(test_loader), desc="Evaluating"):
|
| 236 |
with torch.no_grad():
|
| 237 |
-
batch = {k: v.to(self.
|
| 238 |
-
outputs = self.
|
| 239 |
if task_type == "regression": # TODO: check if the model is regression or classification
|
| 240 |
y_pred.append(outputs.logits.cpu().detach().numpy())
|
| 241 |
else:
|
|
|
|
| 14 |
import pickle
|
| 15 |
from sklearn import preprocessing
|
| 16 |
import json
|
| 17 |
+
#import spaces
|
| 18 |
|
| 19 |
from rdkit import RDLogger, Chem
|
| 20 |
# Suppress RDKit INFO messages
|
|
|
|
| 145 |
return data_dict
|
| 146 |
|
| 147 |
class MolecularPropertyPredictionModel():
|
| 148 |
+
def __init__(self, candidate_models):
|
| 149 |
self.adapter_name = None
|
| 150 |
|
| 151 |
# we need to keep track of the paths of adapter scalers
|
|
|
|
| 166 |
self.base_model = AutoModelForSequenceClassification.from_pretrained(
|
| 167 |
"ChemFM/ChemFM-3B",
|
| 168 |
config=config,
|
| 169 |
+
device_map="cpu",
|
| 170 |
trust_remote_code=True,
|
| 171 |
token = os.environ.get("TOKEN")
|
| 172 |
)
|
| 173 |
+
#self.base_model.to("cuda")
|
| 174 |
+
# load the adapters firstly
|
| 175 |
+
for adapter_name in candidate_models:
|
| 176 |
+
adapter_id = candidate_models[adapter_name]
|
| 177 |
+
self.base_model.load_adapter(adapter_id, adapter_name=adapter_name)
|
| 178 |
+
self.apapter_scaler_path[adapter_name] = hf_hub_download(adapter_id, filename="scaler.pkl", token = os.environ.get("TOKEN"))
|
| 179 |
+
|
| 180 |
|
| 181 |
# load the tokenizer
|
| 182 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
|
|
|
| 210 |
return "keep"
|
| 211 |
# switch adapter
|
| 212 |
try:
|
| 213 |
+
#self.adapter_name = adapter_name
|
| 214 |
+
#print(self.adapter_name, adapter_id)
|
| 215 |
+
#self.lora_model = PeftModel.from_pretrained(self.base_model, adapter_id, token = os.environ.get("TOKEN"))
|
| 216 |
+
#self.lora_model.to("cuda")
|
| 217 |
+
#print(self.lora_model)
|
| 218 |
+
|
| 219 |
+
self.base_model.set_adapter(adapter_name)
|
| 220 |
+
|
| 221 |
+
#if adapter_name not in self.apapter_scaler_path:
|
| 222 |
+
# self.apapter_scaler_path[adapter_name] = hf_hub_download(adapter_id, filename="scaler.pkl", token = os.environ.get("TOKEN"))
|
| 223 |
if os.path.exists(self.apapter_scaler_path[adapter_name]):
|
| 224 |
self.scaler = pickle.load(open(self.apapter_scaler_path[adapter_name], "rb"))
|
| 225 |
else:
|
| 226 |
self.scaler = None
|
| 227 |
+
|
| 228 |
+
self.adapter_name = adapter_name
|
| 229 |
|
| 230 |
return "switched"
|
| 231 |
except Exception as e:
|
| 232 |
# handle error
|
| 233 |
return "error"
|
| 234 |
|
| 235 |
+
#@spaces.GPU
|
| 236 |
def predict(self, valid_df, task_type):
|
| 237 |
test_dataset = Dataset.from_pandas(valid_df)
|
| 238 |
# construct the dataloader
|
| 239 |
test_loader = torch.utils.data.DataLoader(
|
| 240 |
test_dataset,
|
| 241 |
+
batch_size=32,
|
| 242 |
collate_fn=self.data_collator,
|
| 243 |
)
|
| 244 |
# predict
|
|
|
|
| 246 |
y_pred = []
|
| 247 |
for i, batch in tqdm(enumerate(test_loader), total=len(test_loader), desc="Evaluating"):
|
| 248 |
with torch.no_grad():
|
| 249 |
+
batch = {k: v.to(self.base_model.device) for k, v in batch.items()}
|
| 250 |
+
outputs = self.base_model(**batch)
|
| 251 |
if task_type == "regression": # TODO: check if the model is regression or classification
|
| 252 |
y_pred.append(outputs.logits.cpu().detach().numpy())
|
| 253 |
else:
|