| |
|
|
| |
| |
|
|
| from pathlib import Path |
|
|
| import torch |
|
|
| from esm.esmfold.v1.esmfold import ESMFold |
|
|
|
|
| def _load_model(model_name): |
| if model_name.endswith(".pt"): |
| model_path = Path(model_name) |
| model_data = torch.load(str(model_path), map_location="cpu") |
| else: |
| url = f"https://dl.fbaipublicfiles.com/fair-esm/models/{model_name}.pt" |
| model_data = torch.hub.load_state_dict_from_url(url, progress=False, map_location="cpu") |
|
|
| cfg = model_data["cfg"]["model"] |
| model_state = model_data["model"] |
| model = ESMFold(esmfold_config=cfg) |
|
|
| expected_keys = set(model.state_dict().keys()) |
| found_keys = set(model_state.keys()) |
|
|
| missing_essential_keys = [] |
| for missing_key in expected_keys - found_keys: |
| if not missing_key.startswith("esm."): |
| missing_essential_keys.append(missing_key) |
|
|
| if missing_essential_keys: |
| raise RuntimeError(f"Keys '{', '.join(missing_essential_keys)}' are missing.") |
|
|
| model.load_state_dict(model_state, strict=False) |
|
|
| return model |
|
|
|
|
| def esmfold_v0(): |
| """ |
| ESMFold v0 model with 3B ESM-2, 48 folding blocks. |
| This version was used for the paper (Lin et al, 2022). It was trained |
| on all PDB chains until 2020-05, to ensure temporal holdout with CASP14 |
| and the CAMEO validation and test set reported there. |
| """ |
| return _load_model("esmfold_3B_v0") |
|
|
|
|
| def esmfold_v1(): |
| """ |
| ESMFold v1 model using 3B ESM-2, 48 folding blocks. |
| ESMFold provides fast high accuracy atomic level structure prediction |
| directly from the individual sequence of a protein. ESMFold uses the ESM2 |
| protein language model to extract meaningful representations from the |
| protein sequence. |
| """ |
| return _load_model("esmfold_3B_v1") |
|
|
|
|
| def esmfold_structure_module_only_8M(): |
| """ |
| ESMFold baseline model using 8M ESM-2, 0 folding blocks. |
| ESM-2 here is trained out to 500K updates. |
| This is a model designed to test the capabilities of the language model |
| when ablated for number of parameters in the language model. |
| See table S1 in (Lin et al, 2022). |
| """ |
| return _load_model("esmfold_structure_module_only_8M") |
|
|
|
|
| def esmfold_structure_module_only_8M_270K(): |
| """ |
| ESMFold baseline model using 8M ESM-2, 0 folding blocks. |
| ESM-2 here is trained out to 270K updates. |
| This is a model designed to test the capabilities of the language model |
| when ablated for number of parameters in the language model. |
| See table S1 in (Lin et al, 2022). |
| """ |
| return _load_model("esmfold_structure_module_only_8M_270K") |
|
|
|
|
| def esmfold_structure_module_only_35M(): |
| """ |
| ESMFold baseline model using 35M ESM-2, 0 folding blocks. |
| ESM-2 here is trained out to 500K updates. |
| This is a model designed to test the capabilities of the language model |
| when ablated for number of parameters in the language model. |
| See table S1 in (Lin et al, 2022). |
| """ |
| return _load_model("esmfold_structure_module_only_35M") |
|
|
|
|
| def esmfold_structure_module_only_35M_270K(): |
| """ |
| ESMFold baseline model using 35M ESM-2, 0 folding blocks. |
| ESM-2 here is trained out to 270K updates. |
| This is a model designed to test the capabilities of the language model |
| when ablated for number of parameters in the language model. |
| See table S1 in (Lin et al, 2022). |
| """ |
| return _load_model("esmfold_structure_module_only_35M_270K") |
|
|
|
|
| def esmfold_structure_module_only_150M(): |
| """ |
| ESMFold baseline model using 150M ESM-2, 0 folding blocks. |
| ESM-2 here is trained out to 500K updates. |
| This is a model designed to test the capabilities of the language model |
| when ablated for number of parameters in the language model. |
| See table S1 in (Lin et al, 2022). |
| """ |
| return _load_model("esmfold_structure_module_only_150M") |
|
|
|
|
| def esmfold_structure_module_only_150M_270K(): |
| """ |
| ESMFold baseline model using 150M ESM-2, 0 folding blocks. |
| ESM-2 here is trained out to 270K updates. |
| This is a model designed to test the capabilities of the language model |
| when ablated for number of parameters in the language model. |
| See table S1 in (Lin et al, 2022). |
| """ |
| return _load_model("esmfold_structure_module_only_150M_270K") |
|
|
|
|
| def esmfold_structure_module_only_650M(): |
| """ |
| ESMFold baseline model using 650M ESM-2, 0 folding blocks. |
| ESM-2 here is trained out to 500K updates. |
| This is a model designed to test the capabilities of the language model |
| when ablated for number of parameters in the language model. |
| See table S1 in (Lin et al, 2022). |
| """ |
| return _load_model("esmfold_structure_module_only_650M") |
|
|
|
|
| def esmfold_structure_module_only_650M_270K(): |
| """ |
| ESMFold baseline model using 650M ESM-2, 0 folding blocks. |
| ESM-2 here is trained out to 270K updates. |
| This is a model designed to test the capabilities of the language model |
| when ablated for number of parameters in the language model. |
| See table S1 in (Lin et al, 2022). |
| """ |
| return _load_model("esmfold_structure_module_only_650M_270K") |
|
|
|
|
| def esmfold_structure_module_only_3B(): |
| """ |
| ESMFold baseline model using 3B ESM-2, 0 folding blocks. |
| ESM-2 here is trained out to 500K updates. |
| This is a model designed to test the capabilities of the language model |
| when ablated for number of parameters in the language model. |
| See table S1 in (Lin et al, 2022). |
| """ |
| return _load_model("esmfold_structure_module_only_3B") |
|
|
|
|
| def esmfold_structure_module_only_3B_270K(): |
| """ |
| ESMFold baseline model using 3B ESM-2, 0 folding blocks. |
| ESM-2 here is trained out to 270K updates. |
| This is a model designed to test the capabilities of the language model |
| when ablated for number of parameters in the language model. |
| See table S1 in (Lin et al, 2022). |
| """ |
| return _load_model("esmfold_structure_module_only_3B_270K") |
|
|
|
|
| def esmfold_structure_module_only_15B(): |
| """ |
| ESMFold baseline model using 15B ESM-2, 0 folding blocks. |
| ESM-2 here is trained out to 270K updates. |
| The 15B parameter ESM-2 was not trained out to 500K updates |
| This is a model designed to test the capabilities of the language model |
| when ablated for number of parameters in the language model. |
| See table S1 in (Lin et al, 2022). |
| """ |
| return _load_model("esmfold_structure_module_only_15B") |
|
|