Spaces:
Running
Running
ynuozhang
commited on
Commit
·
063d2f7
1
Parent(s):
728610a
update link
Browse files- best_models.txt +1 -1
- description.md +13 -3
- inference.py +14 -0
best_models.txt
CHANGED
|
@@ -7,4 +7,4 @@ Toxicity, -, Transformer, Classifier, -, 0.3401,
|
|
| 7 |
Binding_affinity, unpooled, unpooled, Regression, -, -,
|
| 8 |
Permeability_PAMPA, -, CNN, Regression, -, -,
|
| 9 |
Permeability_CACO2, -, SVR, Regression, -, -,
|
| 10 |
-
Halflife,
|
|
|
|
| 7 |
Binding_affinity, unpooled, unpooled, Regression, -, -,
|
| 8 |
Permeability_PAMPA, -, CNN, Regression, -, -,
|
| 9 |
Permeability_CACO2, -, SVR, Regression, -, -,
|
| 10 |
+
Halflife, transformer_wt_log, xgb_smiles, Regression, -, -,
|
description.md
CHANGED
|
@@ -30,7 +30,7 @@
|
|
| 30 |
| Binding Affinity | 1436 | 1597 |
|
| 31 |
|
| 32 |
|
| 33 |
-
Our models are trained on curated datasets from multiple sources. For detailed cleaning up procedures please refer to our [paper]().
|
| 34 |
|
| 35 |
#### 🩸 Hemolysis Dataset
|
| 36 |
- **Primary Source:** [the Database of Antimicrobial Activity and Structure of Peptides (DBAASPv3)](https://academic.oup.com/nar/article-abstract/49/D1/D288/5957160)
|
|
@@ -86,7 +86,7 @@ Higher scores indicate stronger non-fouling behavior, desirable for circulation
|
|
| 86 |
- **CNN/Transformer Model:** One-dimensional convolutional/self-attention transformer networks operating on unpooled embeddings to capture local sequence patterns.
|
| 87 |
- **Binding Model:** Transformer-based architecture with cross-attention between protein and peptide representations.
|
| 88 |
- **SVR Model:** Support Vector Regression applied to pooled embeddings, providing a kernel-based, nonparametric regression baseline that is robust on smaller or noisy datasets.
|
| 89 |
-
- **Others:** SVM and Elastic Nets were trained with [
|
| 90 |
|
| 91 |
### Model Training and Weight Hosting
|
| 92 |
- More instructions can be found here at [PeptiVersse](https://huggingface.co/ChatterjeeLab/PeptiVerse)
|
|
@@ -111,7 +111,17 @@ Higher scores indicate stronger non-fouling behavior, desirable for circulation
|
|
| 111 |
|
| 112 |
If you use this tool, please cite:
|
| 113 |
```
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
```
|
| 116 |
|
| 117 |
### Contact
|
|
|
|
| 30 |
| Binding Affinity | 1436 | 1597 |
|
| 31 |
|
| 32 |
|
| 33 |
+
Our models are trained on curated datasets from multiple sources. For detailed cleaning up procedures please refer to our [paper](https://www.biorxiv.org/content/10.64898/2025.12.31.697180v1).
|
| 34 |
|
| 35 |
#### 🩸 Hemolysis Dataset
|
| 36 |
- **Primary Source:** [the Database of Antimicrobial Activity and Structure of Peptides (DBAASPv3)](https://academic.oup.com/nar/article-abstract/49/D1/D288/5957160)
|
|
|
|
| 86 |
- **CNN/Transformer Model:** One-dimensional convolutional/self-attention transformer networks operating on unpooled embeddings to capture local sequence patterns.
|
| 87 |
- **Binding Model:** Transformer-based architecture with cross-attention between protein and peptide representations.
|
| 88 |
- **SVR Model:** Support Vector Regression applied to pooled embeddings, providing a kernel-based, nonparametric regression baseline that is robust on smaller or noisy datasets.
|
| 89 |
+
- **Others:** SVM and Elastic Nets were trained with [RAPIDS cuML](https://github.com/rapidsai/cuml), which requires a CUDA environment and is therefore not supported in the web app. Model checkpoints remain available in the Hugging Face repository.
|
| 90 |
|
| 91 |
### Model Training and Weight Hosting
|
| 92 |
- More instructions can be found here at [PeptiVersse](https://huggingface.co/ChatterjeeLab/PeptiVerse)
|
|
|
|
| 111 |
|
| 112 |
If you use this tool, please cite:
|
| 113 |
```
|
| 114 |
+
@article {Zhang2025.12.31.697180,
|
| 115 |
+
author = {Zhang, Yinuo and Tang, Sophia and Chen, Tong and Mahood, Elizabeth and Vincoff, Sophia and Chatterjee, Pranam},
|
| 116 |
+
title = {PeptiVerse: A Unified Platform for Therapeutic Peptide Property Prediction},
|
| 117 |
+
elocation-id = {2025.12.31.697180},
|
| 118 |
+
year = {2026},
|
| 119 |
+
doi = {10.64898/2025.12.31.697180},
|
| 120 |
+
publisher = {Cold Spring Harbor Laboratory},
|
| 121 |
+
URL = {https://www.biorxiv.org/content/early/2026/01/03/2025.12.31.697180},
|
| 122 |
+
eprint = {https://www.biorxiv.org/content/early/2026/01/03/2025.12.31.697180.full.pdf},
|
| 123 |
+
journal = {bioRxiv}
|
| 124 |
+
}
|
| 125 |
```
|
| 126 |
|
| 127 |
### Contact
|
inference.py
CHANGED
|
@@ -630,6 +630,16 @@ class WTEmbedder:
|
|
| 630 |
self._cache_unpooled[s] = (X, M)
|
| 631 |
return X, M
|
| 632 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 633 |
|
| 634 |
|
| 635 |
# -----------------------------
|
|
@@ -744,6 +754,10 @@ class PeptiVersePredictor:
|
|
| 744 |
arch = "mlp"
|
| 745 |
elif arch.startswith("cnn"):
|
| 746 |
arch = "cnn"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 747 |
|
| 748 |
self.models[(prop_key, mode)] = build_torch_model_from_ckpt(arch, obj, self.device)
|
| 749 |
|
|
|
|
| 630 |
self._cache_unpooled[s] = (X, M)
|
| 631 |
return X, M
|
| 632 |
|
| 633 |
+
def _clean_state_dict(sd: dict) -> dict:
|
| 634 |
+
# just for wt halflife transformer predictor
|
| 635 |
+
out = {}
|
| 636 |
+
for k, v in sd.items():
|
| 637 |
+
if k.startswith("module."):
|
| 638 |
+
k = k[len("module."):]
|
| 639 |
+
if k.startswith("model."):
|
| 640 |
+
k = k[len("model."):]
|
| 641 |
+
out[k] = v
|
| 642 |
+
return out
|
| 643 |
|
| 644 |
|
| 645 |
# -----------------------------
|
|
|
|
| 754 |
arch = "mlp"
|
| 755 |
elif arch.startswith("cnn"):
|
| 756 |
arch = "cnn"
|
| 757 |
+
if prop_key == "halflife" and mode == "wt" and m == "transformer_wt_log":
|
| 758 |
+
if isinstance(obj, dict) and "state_dict" in obj:
|
| 759 |
+
obj = dict(obj)
|
| 760 |
+
obj["state_dict"] = _clean_state_dict(obj["state_dict"])
|
| 761 |
|
| 762 |
self.models[(prop_key, mode)] = build_torch_model_from_ckpt(arch, obj, self.device)
|
| 763 |
|