| from transformers import AutoTokenizer, AutoModel | |
| import torch | |
| import torch.nn as nn | |
| polymer2psmiles = { | |
| 'PHB': '[*]OC(C)CC(=O)[*]', | |
| 'PCL': '[*]OCCCCCC(=O)[*]', | |
| 'PVA': '[*]C(O)C[*]', | |
| 'PPL': '[*]CCC(=O)O[*]', | |
| 'P3HP': '[*]OCCC(=O)[*]', | |
| 'P4HB': '[*]C(=O)CCCO[*]', | |
| 'PEA': '[*]OCCOC(=O)CCCCC(=O)[*]', | |
| 'PES': '[*]OCCOC(=O)CCC(=O)[*]', | |
| 'O-PVA': '[*]C(=O)C[*]', | |
| 'PBS': '[*]C(=O)CCC(=O)OCCCCO[*]', | |
| 'PLA': '[*]C(C)C(=O)O[*]', | |
| 'PEG': '[*]CCO[*]', | |
| 'PBSA': '[*]C(=O)CCC(=O)OCCCCOC(=O)CCC(=O)[*]', | |
| 'PET': '[*]CCOC(=O)c1ccc(C(=O)O[*])cc1', | |
| 'PE': '[*]CC[*]', | |
| 'PMCL': '[*]C(=O)CCC(C)CCO[*]', | |
| 'PEF': '[*]OC(=O)c1oc(C(=O)OCC[*])cc1', | |
| 'PS': '[*]C(c1ccccc1)C[*]', | |
| 'NR': '[*]CC(C)=CC[*]', | |
| 'PHV': '[*]OC(CC)CC(=O)[*]', | |
| } | |
| class PolyEncoder(nn.Module): | |
| def __init__(self): | |
| super().__init__() | |
| self.tokenizer = AutoTokenizer.from_pretrained('kuelumbus/polyBERT') | |
| self.polyBERT = AutoModel.from_pretrained('kuelumbus/polyBERT') | |
| def forward(self, psmiles_strings): | |
| assert len(psmiles_strings) == 1, "Batch size must be 1 for PolyEncoder" | |
| encoded_input = self.tokenizer( | |
| psmiles_strings, padding=True, truncation=True, return_tensors='pt') | |
| with torch.no_grad(): | |
| model_output = self.polyBERT(**encoded_input) | |
| return model_output[0] | |