gabrielbianchin commited on
Commit
a319e89
·
1 Parent(s): d0de36b

update readme

Browse files
Files changed (1) hide show
  1. README.md +33 -9
README.md CHANGED
@@ -50,35 +50,59 @@ from peft import PeftModel
50
  import torch
51
 
52
  # proteins: ESM2 + LoRA adapter
53
- tokenizer_prot = AutoTokenizer.from_pretrained('facebook/esm2_t36_3B_UR50D')
 
 
54
  backbone = AutoModel.from_pretrained(
55
- 'facebook/esm2_t36_3B_UR50D',
56
- torch_dtype=torch.bfloat16
57
  )
58
- backbone = PeftModel.from_pretrained(backbone, 'SaeedLab/SeqScreen-lora').eval()
 
 
 
 
59
 
60
  proteins = ["MKTFFVLLL", "ACDEFGHIKLM"]
61
- inputs_prot = tokenizer_prot(proteins, return_tensors="pt", padding=True)
 
 
 
 
62
 
63
  with torch.no_grad():
64
  hidden = backbone(**inputs_prot).last_hidden_state
65
  mask = inputs_prot['attention_mask'].unsqueeze(-1).float()
66
  prot_emb = (hidden * mask).sum(1) / mask.sum(1).clamp(min=1e-8)
67
 
 
68
  # molecules
69
- tokenizer_mol = AutoTokenizer.from_pretrained('SaeedLab/MolDeBERTa-base-123M-mlc')
70
- encoder_mol = AutoModel.from_pretrained('SaeedLab/MolDeBERTa-base-123M-mlc').eval()
 
 
 
 
 
71
 
72
  molecules = ["NCCc1nc(-c2ccccc2)cs1", "CC(=O)OCC(C)C"]
73
- inputs_mol = tokenizer_mol(molecules, return_tensors="pt", padding=True)
 
 
 
 
74
 
75
  with torch.no_grad():
76
  hidden = encoder_mol(**inputs_mol).last_hidden_state
77
  mask = inputs_mol['attention_mask'].unsqueeze(-1).float()
78
  mol_emb = (hidden * mask).sum(1) / mask.sum(1).clamp(min=1e-8)
79
 
 
80
  # seqscreen
81
- seqscreen = AutoModel.from_pretrained('SaeedLab/SeqScreen-Finetuning', trust_remote_code=True).eval()
 
 
 
82
 
83
  with torch.no_grad():
84
  outputs = seqscreen(prot=prot_emb, mol=mol_emb)
 
50
  import torch
51
 
52
  # proteins: ESM2 + LoRA adapter
53
+ tokenizer_prot = AutoTokenizer.from_pretrained(
54
+ 'facebook/esm2_t36_3B_UR50D'
55
+ )
56
  backbone = AutoModel.from_pretrained(
57
+ 'facebook/esm2_t36_3B_UR50D',
58
+ torch_dtype=torch.bfloat16
59
  )
60
+ backbone = PeftModel.from_pretrained(
61
+ backbone,
62
+ 'SaeedLab/SeqScreen-lora'
63
+ ).eval()
64
+
65
 
66
  proteins = ["MKTFFVLLL", "ACDEFGHIKLM"]
67
+ inputs_prot = tokenizer_prot(
68
+ proteins,
69
+ return_tensors="pt",
70
+ padding=True
71
+ )
72
 
73
  with torch.no_grad():
74
  hidden = backbone(**inputs_prot).last_hidden_state
75
  mask = inputs_prot['attention_mask'].unsqueeze(-1).float()
76
  prot_emb = (hidden * mask).sum(1) / mask.sum(1).clamp(min=1e-8)
77
 
78
+
79
  # molecules
80
+ tokenizer_mol = AutoTokenizer.from_pretrained(
81
+ 'SaeedLab/MolDeBERTa-base-123M-mlc'
82
+ )
83
+ encoder_mol = AutoModel.from_pretrained(
84
+ 'SaeedLab/MolDeBERTa-base-123M-mlc'
85
+ ).eval()
86
+
87
 
88
  molecules = ["NCCc1nc(-c2ccccc2)cs1", "CC(=O)OCC(C)C"]
89
+ inputs_mol = tokenizer_mol(
90
+ molecules,
91
+ return_tensors="pt",
92
+ padding=True
93
+ )
94
 
95
  with torch.no_grad():
96
  hidden = encoder_mol(**inputs_mol).last_hidden_state
97
  mask = inputs_mol['attention_mask'].unsqueeze(-1).float()
98
  mol_emb = (hidden * mask).sum(1) / mask.sum(1).clamp(min=1e-8)
99
 
100
+
101
  # seqscreen
102
+ seqscreen = AutoModel.from_pretrained(
103
+ 'SaeedLab/SeqScreen-Finetuning',
104
+ trust_remote_code=True
105
+ ).eval()
106
 
107
  with torch.no_grad():
108
  outputs = seqscreen(prot=prot_emb, mol=mol_emb)