File size: 1,398 Bytes
21f308b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn as nn
polymer2psmiles = {
'PHB': '[*]OC(C)CC(=O)[*]',
'PCL': '[*]OCCCCCC(=O)[*]',
'PVA': '[*]C(O)C[*]',
'PPL': '[*]CCC(=O)O[*]',
'P3HP': '[*]OCCC(=O)[*]',
'P4HB': '[*]C(=O)CCCO[*]',
'PEA': '[*]OCCOC(=O)CCCCC(=O)[*]',
'PES': '[*]OCCOC(=O)CCC(=O)[*]',
'O-PVA': '[*]C(=O)C[*]',
'PBS': '[*]C(=O)CCC(=O)OCCCCO[*]',
'PLA': '[*]C(C)C(=O)O[*]',
'PEG': '[*]CCO[*]',
'PBSA': '[*]C(=O)CCC(=O)OCCCCOC(=O)CCC(=O)[*]',
'PET': '[*]CCOC(=O)c1ccc(C(=O)O[*])cc1',
'PE': '[*]CC[*]',
'PMCL': '[*]C(=O)CCC(C)CCO[*]',
'PEF': '[*]OC(=O)c1oc(C(=O)OCC[*])cc1',
'PS': '[*]C(c1ccccc1)C[*]',
'NR': '[*]CC(C)=CC[*]',
'PHV': '[*]OC(CC)CC(=O)[*]',
}
class PolyEncoder(nn.Module):
def __init__(self):
super().__init__()
self.tokenizer = AutoTokenizer.from_pretrained('kuelumbus/polyBERT')
self.polyBERT = AutoModel.from_pretrained('kuelumbus/polyBERT')
def forward(self, psmiles_strings):
assert len(psmiles_strings) == 1, "Batch size must be 1 for PolyEncoder"
encoded_input = self.tokenizer(
psmiles_strings, padding=True, truncation=True, return_tensors='pt')
with torch.no_grad():
model_output = self.polyBERT(**encoded_input)
return model_output[0]
|