| | import os |
| | import joblib |
| | import numpy as np |
| | import onnxruntime as rt |
| | import sys |
| | import requests |
| | from io import BytesIO |
| | import urllib.request |
| |
|
| | CHUNKSIZE = 1024 |
| |
|
| | |
| | |
| |
|
| | class FragmentEmbedder(object): |
| | def __init__(self): |
| |
|
| | url = 'https://huggingface.co/ligdis/fpred/resolve/main/morgan_descriptor.joblib' |
| | with urllib.request.urlopen(url) as response: |
| | self.morgan_desc = joblib.load(BytesIO(response.read())) |
| |
|
| | url = 'https://huggingface.co/ligdis/fpred/resolve/main/physchem_descriptor.joblib' |
| | with urllib.request.urlopen(url) as response: |
| | self.physchem_desc = joblib.load(BytesIO(response.read())) |
| |
|
| | def _chunker(self, l, n): |
| | for i in range(0, len(l), n): |
| | yield l[i : i + n] |
| |
|
| | def encoder_inference(self, X): |
| | |
| | url = 'https://huggingface.co/ligdis/fpred/resolve/main/encoder_model.onnx' |
| | response = requests.get(url) |
| | model_bytes = BytesIO(response.content) |
| | sess = rt.InferenceSession(model_bytes.read()) |
| |
|
| |
|
| | input_name = sess.get_inputs()[0].name |
| | output_name = sess.get_outputs()[0].name |
| | output_data = sess.run( |
| | [output_name], {input_name: np.array(X, dtype=np.float32)} |
| | ) |
| | Y = np.array(output_data[0]) |
| | return Y |
| |
|
| | def transform(self, smiles): |
| | X = None |
| | for smiles_chunk in self._chunker(smiles, CHUNKSIZE): |
| | X_0 = self.morgan_desc.transform(smiles_chunk) |
| | X_1 = self.physchem_desc.transform(smiles_chunk) |
| | X_i = np.hstack([X_0, X_1]) |
| | X_o = self.encoder_inference(X_i) |
| | if X is None: |
| | X = X_o |
| | else: |
| | X = np.vstack([X, X_o]) |
| | return X |
| |
|
| |
|