gabrielbianchin commited on
Commit
887e87e
·
1 Parent(s): 13ec48b

update readme

Browse files
Files changed (1) hide show
  1. README.md +30 -7
README.md CHANGED
@@ -43,12 +43,21 @@ from transformers import AutoTokenizer, AutoModel
43
  import torch
44
 
45
  # proteins
46
- tokenizer_prot = AutoTokenizer.from_pretrained('facebook/esm2_t36_3B_UR50D')
47
- encoder_prot = AutoModel.from_pretrained('facebook/esm2_t36_3B_UR50D').eval()
 
 
 
 
 
48
 
49
  proteins = ["MKTFFVLLL", "ABCDE"]
50
  proteins = [" ".join(i) for i in proteins]
51
- inputs_prot = tokenizer_prot(proteins, return_tensors="pt", padding=True)
 
 
 
 
52
 
53
  with torch.no_grad():
54
  outputs = encoder_prot(**inputs_prot)
@@ -56,12 +65,22 @@ with torch.no_grad():
56
  mask = inputs_prot['attention_mask'].unsqueeze(-1).float()
57
  prot_rep = (hidden * mask).sum(dim=1) / mask.sum(dim=1).clamp(min=1e-8)
58
 
 
59
  # molecules
60
- tokenizer_mol = AutoTokenizer.from_pretrained('SaeedLab/MolDeBERTa-base-123M-mlc')
61
- encoder_mol = AutoModel.from_pretrained('SaeedLab/MolDeBERTa-base-123M-mlc').eval()
 
 
 
 
 
62
 
63
  molecules = ["NCCc1nc(-c2ccccc2)cs1", "CC(=O)OCC(C)C"]
64
- inputs_mol = tokenizer_mol(molecules, return_tensors="pt", padding=True)
 
 
 
 
65
 
66
  with torch.no_grad():
67
  outputs = encoder_mol(**inputs_mol)
@@ -69,8 +88,12 @@ with torch.no_grad():
69
  mask = inputs_mol['attention_mask'].unsqueeze(-1).float()
70
  mol_rep = (hidden * mask).sum(dim=1) / mask.sum(dim=1).clamp(min=1e-8)
71
 
 
72
  # seqscreen
73
- seqscreen = AutoModel.from_pretrained('SaeedLab/SeqScreen-Frozen', trust_remote_code=True).eval()
 
 
 
74
 
75
  with torch.no_grad():
76
  outputs = seqscreen(prot=prot_rep, mol=mol_rep)
 
43
  import torch
44
 
45
  # proteins
46
+ tokenizer_prot = AutoTokenizer.from_pretrained(
47
+ 'facebook/esm2_t36_3B_UR50D'
48
+ )
49
+ encoder_prot = AutoModel.from_pretrained(
50
+ 'facebook/esm2_t36_3B_UR50D'
51
+ ).eval()
52
+
53
 
54
  proteins = ["MKTFFVLLL", "ABCDE"]
55
  proteins = [" ".join(i) for i in proteins]
56
+ inputs_prot = tokenizer_prot(
57
+ proteins,
58
+ return_tensors="pt",
59
+ padding=True
60
+ )
61
 
62
  with torch.no_grad():
63
  outputs = encoder_prot(**inputs_prot)
 
65
  mask = inputs_prot['attention_mask'].unsqueeze(-1).float()
66
  prot_rep = (hidden * mask).sum(dim=1) / mask.sum(dim=1).clamp(min=1e-8)
67
 
68
+
69
  # molecules
70
+ tokenizer_mol = AutoTokenizer.from_pretrained(
71
+ 'SaeedLab/MolDeBERTa-base-123M-mlc'
72
+ )
73
+ encoder_mol = AutoModel.from_pretrained(
74
+ 'SaeedLab/MolDeBERTa-base-123M-mlc'
75
+ ).eval()
76
+
77
 
78
  molecules = ["NCCc1nc(-c2ccccc2)cs1", "CC(=O)OCC(C)C"]
79
+ inputs_mol = tokenizer_mol(
80
+ molecules,
81
+ return_tensors="pt",
82
+ padding=True
83
+ )
84
 
85
  with torch.no_grad():
86
  outputs = encoder_mol(**inputs_mol)
 
88
  mask = inputs_mol['attention_mask'].unsqueeze(-1).float()
89
  mol_rep = (hidden * mask).sum(dim=1) / mask.sum(dim=1).clamp(min=1e-8)
90
 
91
+
92
  # seqscreen
93
+ seqscreen = AutoModel.from_pretrained(
94
+ 'SaeedLab/SeqScreen-Frozen',
95
+ trust_remote_code=True
96
+ ).eval()
97
 
98
  with torch.no_grad():
99
  outputs = seqscreen(prot=prot_rep, mol=mol_rep)