ebetica commited on
Commit
e8d47c8
·
verified ·
1 Parent(s): da29290

Upload README.md with huggingface_hub

Browse files
Files changed (1) hide show
  1. README.md +73 -63
README.md CHANGED
@@ -52,105 +52,115 @@ Please install `esm` via PyPi:
52
  pip install esm
53
  ```
54
 
55
- You can fold your first protein with
56
 
57
  ```py
58
- from esm.models.esmfold2 import (
59
- ESMFold2InputBuilder,
60
- ProteinInput,
61
- StructurePredictionInput,
62
- )
63
  from transformers.models.esmfold2.modeling_esmfold2 import ESMFold2Model
64
 
65
  # Ubiquitin (PDB 1UBQ)
66
- sequence = (
67
- "MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG"
68
- )
69
 
70
  model = ESMFold2Model.from_pretrained("biohub/ESMFold2").cuda().eval()
71
- processor = ESMFold2InputBuilder()
72
- spi = StructurePredictionInput(sequences=[ProteinInput(id="A", sequence=sequence)])
73
-
74
- result = processor.fold(
75
- model, spi, num_loops=3, num_sampling_steps=50, num_diffusion_samples=1, seed=0
76
- )
77
 
78
- print(f"pLDDT mean: {float(result.plddt.mean()):.3f}")
79
- print(f"pTM: {float(result.ptm):.3f}")
80
  ```
81
 
82
- You may directly use the model through huggingface/transformers
83
 
84
  ```py
 
 
 
 
 
 
 
 
85
  from transformers.models.esmfold2.modeling_esmfold2 import ESMFold2Model
86
 
87
- # Ubiquitin (PDB 1UBQ)
88
- sequence = (
89
- "MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG"
 
 
 
90
  )
91
 
92
  model = ESMFold2Model.from_pretrained("biohub/ESMFold2").cuda().eval()
93
- output = model.infer_protein(sequence, num_loops=3, num_sampling_steps=50)
94
-
95
- print(f"pLDDT mean: {float(output['plddt'].mean()):.3f}")
96
- print(f"pTM: {float(output['ptm'].mean()):.3f}")
97
- ```
98
-
99
- And the Biohub API:
100
 
101
- ```py
102
- TODO minimal biohub API code snippet
103
- ```
104
-
105
- First install the `esm` Python package.
106
-
107
- ```
108
- pip install esm
109
- ```
110
-
111
- Import the necessary libraries.
 
 
 
 
 
112
 
113
- ```py
114
- from esm.sdk.forge import SequenceStructureForgeInferenceClient
115
- from esm.sdk import client
116
- from esm.sdk.api import ESMProtein, ESMProteinError, LogitsConfig, LogitsOutput
117
- ```
118
 
119
- Generate an [API key](https://biohub.ai/developer-console/api-keys) and add it to your Biohub account. This API key manages your access to credits and tokens, and the term API key/token is often used interchangeably within documentation. Call the inference client with the selected model of choice and replace with your token name.
120
 
121
- ```py
122
- client = SequenceStructureForgeInferenceClient(model="esmfold2-fast-2026-05", url="https://biohub.ai", token="<your API token>")
123
  ```
124
 
125
- ####
126
-
127
- The Hugging Face implementation directly supports proteins only. For complex biomolecules, we recommend using the internal API. Here's an example of folding a Ubiquitin with ESMFold2:
128
 
129
  ```py
130
  import os
131
 
132
- from esm.models.esmfold2 import LigandInput, ProteinInput, StructurePredictionInput
 
 
 
 
 
 
133
  from esm.sdk import esmfold2_client
134
  from esm.sdk.api import FoldingConfig
135
 
136
- # Ubiquitin (PDB 1UBQ) + ATP cofactor (illustrative pairing).
137
- protein = ProteinInput(
138
- id="A",
139
- sequence=(
140
- "MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG"
141
- ),
142
  )
143
- ligand = LigandInput(id="L", ccd=["ATP"])
144
 
145
  client = esmfold2_client(model="esmfold2-fast-2026-05", token=os.environ["ESM_API_KEY"])
146
 
147
- spi = StructurePredictionInput(sequences=[protein, ligand])
148
- result = client.fold_all_atom(
149
- spi, config=FoldingConfig(num_loops=3, num_sampling_steps=50)
 
 
 
 
 
 
 
 
 
 
 
 
150
  )
151
 
152
- print(f"pLDDT mean: {float(result.plddt.mean()):.3f}")
153
- print(f"pTM: {float(result.ptm):.3f}")
 
154
  ```
155
 
156
  ## Training Data
 
52
  pip install esm
53
  ```
54
 
55
+ You can fold your first protein with:
56
 
57
  ```py
 
 
 
 
 
58
  from transformers.models.esmfold2.modeling_esmfold2 import ESMFold2Model
59
 
60
  # Ubiquitin (PDB 1UBQ)
61
+ sequence = "MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG"
 
 
62
 
63
  model = ESMFold2Model.from_pretrained("biohub/ESMFold2").cuda().eval()
64
+ output = model.infer_protein(sequence, num_loops=3, num_sampling_steps=50)
 
 
 
 
 
65
 
66
+ print(f"pLDDT mean: {float(output['plddt'].mean()):.3f}, pTM: {float(output['ptm'].mean()):.3f}")
 
67
  ```
68
 
69
+ You can also fold complex biomolecules — proteins, DNA/RNA (with modified residues), and small-molecule ligands all at once. Here's an example folding the HhaI DNA methyltransferase + its cognate DNA (with a trapped 5-fluoro-2′-deoxycytidine, CCD `C36`) + the SAH cofactor (PDB [1MHT](https://www.rcsb.org/structure/1MHT)):
70
 
71
  ```py
72
+ from esm.models.esmfold2 import (
73
+ DNAInput,
74
+ ESMFold2InputBuilder,
75
+ LigandInput,
76
+ Modification,
77
+ ProteinInput,
78
+ StructurePredictionInput,
79
+ )
80
  from transformers.models.esmfold2.modeling_esmfold2 import ESMFold2Model
81
 
82
+ HHAI_SEQ = (
83
+ "MIEIKDKQLTGLRFIDLFAGLGGFRLALESCGAECVYSNEWDKYAQEVYEMNFGEKPEGDITQVNEKTIPDH"
84
+ "DILCAGFPCQAFSISGKQKGFEDSRGTLFFDIARIVREKKPKVVFMENVKNFASHDNGNTLEVVKNTMNELD"
85
+ "YSFHAKVLNALDYGIPQKRERIYMICFRNDLNIQNFQFPKPFELNTFVKDLLLPDSEVEHLVIDRKDLVMTN"
86
+ "QEIEQTTPKTVRLGIVGKGGQGERIYSTRGIAITLSAYGGGIFAKTGGYLVNGKTRKLHPRECARVMGYPDS"
87
+ "YKVHPSTSQAYKQFGNSVVINVLQYIAYNIGSSLNFKPY"
88
  )
89
 
90
  model = ESMFold2Model.from_pretrained("biohub/ESMFold2").cuda().eval()
 
 
 
 
 
 
 
91
 
92
+ spi = StructurePredictionInput(
93
+ sequences=[
94
+ ProteinInput(id="A", sequence=HHAI_SEQ),
95
+ DNAInput(
96
+ id="B",
97
+ sequence="GATAGCGCTATC",
98
+ modifications=[Modification(position=5, ccd="C36")],
99
+ ),
100
+ DNAInput(
101
+ id="C",
102
+ sequence="TGATAGCGCTATC",
103
+ modifications=[Modification(position=6, ccd="C36")],
104
+ ),
105
+ LigandInput(id="L", ccd=["SAH"]),
106
+ ]
107
+ )
108
 
109
+ result = ESMFold2InputBuilder().fold(
110
+ model, spi, num_loops=3, num_sampling_steps=50, num_diffusion_samples=1, seed=0
111
+ )
 
 
112
 
113
+ print(f"pLDDT mean: {float(result.plddt.mean()):.3f}, pTM: {float(result.ptm):.3f}, ipTM: {float(result.iptm):.3f}")
114
 
115
+ with open("1mht_pred.cif", "w") as f:
116
+ f.write(result.complex.to_mmcif())
117
  ```
118
 
119
+ For the Biohub API, first generate an [API key](https://biohub.ai/developer-console/api-keys) and add it to your Biohub account. The code below assumes the environment variable `ESM_API_KEY=$YOUR_API_KEY`.
 
 
120
 
121
  ```py
122
  import os
123
 
124
+ from esm.models.esmfold2 import (
125
+ DNAInput,
126
+ LigandInput,
127
+ Modification,
128
+ ProteinInput,
129
+ StructurePredictionInput,
130
+ )
131
  from esm.sdk import esmfold2_client
132
  from esm.sdk.api import FoldingConfig
133
 
134
+ HHAI_SEQ = (
135
+ "MIEIKDKQLTGLRFIDLFAGLGGFRLALESCGAECVYSNEWDKYAQEVYEMNFGEKPEGDITQVNEKTIPDH"
136
+ "DILCAGFPCQAFSISGKQKGFEDSRGTLFFDIARIVREKKPKVVFMENVKNFASHDNGNTLEVVKNTMNELD"
137
+ "YSFHAKVLNALDYGIPQKRERIYMICFRNDLNIQNFQFPKPFELNTFVKDLLLPDSEVEHLVIDRKDLVMTN"
138
+ "QEIEQTTPKTVRLGIVGKGGQGERIYSTRGIAITLSAYGGGIFAKTGGYLVNGKTRKLHPRECARVMGYPDS"
139
+ "YKVHPSTSQAYKQFGNSVVINVLQYIAYNIGSSLNFKPY"
140
  )
 
141
 
142
  client = esmfold2_client(model="esmfold2-fast-2026-05", token=os.environ["ESM_API_KEY"])
143
 
144
+ spi = StructurePredictionInput(
145
+ sequences=[
146
+ ProteinInput(id="A", sequence=HHAI_SEQ),
147
+ DNAInput(
148
+ id="B",
149
+ sequence="GATAGCGCTATC",
150
+ modifications=[Modification(position=5, ccd="C36")],
151
+ ),
152
+ DNAInput(
153
+ id="C",
154
+ sequence="TGATAGCGCTATC",
155
+ modifications=[Modification(position=6, ccd="C36")],
156
+ ),
157
+ LigandInput(id="L", ccd=["SAH"]),
158
+ ]
159
  )
160
 
161
+ result = client.fold_all_atom(spi, config=FoldingConfig(num_loops=3, num_sampling_steps=50))
162
+
163
+ print(f"pLDDT mean: {float(result.plddt.mean()):.3f}, pTM: {float(result.ptm):.3f}, ipTM: {float(result.iptm):.3f}")
164
  ```
165
 
166
  ## Training Data