import os import joblib import numpy as np import onnxruntime as rt import sys import requests from io import BytesIO import urllib.request CHUNKSIZE = 1024 # root = os.path.abspath(os.path.dirname(__file__)) # sys.path.append(root) class FragmentEmbedder(object): def __init__(self): url = 'https://huggingface.co/ligdis/fpred/resolve/main/morgan_descriptor.joblib' # The URL of the file you want to load with urllib.request.urlopen(url) as response: # Download the file self.morgan_desc = joblib.load(BytesIO(response.read())) url = 'https://huggingface.co/ligdis/fpred/resolve/main/physchem_descriptor.joblib' # The URL of the file you want to load with urllib.request.urlopen(url) as response: # Download the file self.physchem_desc = joblib.load(BytesIO(response.read())) def _chunker(self, l, n): for i in range(0, len(l), n): yield l[i : i + n] def encoder_inference(self, X): # sess = rt.InferenceSession(os.path.join(self.models_dir, "encoder_model.onnx")) url = 'https://huggingface.co/ligdis/fpred/resolve/main/encoder_model.onnx' response = requests.get(url) model_bytes = BytesIO(response.content) # Create a file-like object from the byte data sess = rt.InferenceSession(model_bytes.read()) input_name = sess.get_inputs()[0].name output_name = sess.get_outputs()[0].name output_data = sess.run( [output_name], {input_name: np.array(X, dtype=np.float32)} ) Y = np.array(output_data[0]) return Y def transform(self, smiles): X = None for smiles_chunk in self._chunker(smiles, CHUNKSIZE): X_0 = self.morgan_desc.transform(smiles_chunk) X_1 = self.physchem_desc.transform(smiles_chunk) X_i = np.hstack([X_0, X_1]) X_o = self.encoder_inference(X_i) if X is None: X = X_o else: X = np.vstack([X, X_o]) return X