Spaces:
Running
Running
ynuozhang
commited on
Commit
Β·
a164d37
1
Parent(s):
453bb69
update models
Browse files- app.py +2 -2
- best_models.txt +3 -3
- description.md +22 -80
- peptiverse_styles.css +0 -26
app.py
CHANGED
|
@@ -72,8 +72,8 @@ for k, v in {
|
|
| 72 |
ASSETS_MODELS = ASSETS / "models"; ASSETS_MODELS.mkdir(parents=True, exist_ok=True)
|
| 73 |
ASSETS_DATA = ASSETS / "training_data_cleaned"; ASSETS_DATA.mkdir(parents=True, exist_ok=True)
|
| 74 |
|
| 75 |
-
MODEL_REPO = "ChatterjeeLab/
|
| 76 |
-
DATASET_REPO = "ChatterjeeLab/
|
| 77 |
|
| 78 |
def fetch_models_and_data():
|
| 79 |
snapshot_download(
|
|
|
|
| 72 |
ASSETS_MODELS = ASSETS / "models"; ASSETS_MODELS.mkdir(parents=True, exist_ok=True)
|
| 73 |
ASSETS_DATA = ASSETS / "training_data_cleaned"; ASSETS_DATA.mkdir(parents=True, exist_ok=True)
|
| 74 |
|
| 75 |
+
MODEL_REPO = "ChatterjeeLab/PeptiVerse" # model repo
|
| 76 |
+
DATASET_REPO = "ChatterjeeLab/PeptiVerse" # dataset repo
|
| 77 |
|
| 78 |
def fetch_models_and_data():
|
| 79 |
snapshot_download(
|
best_models.txt
CHANGED
|
@@ -5,6 +5,6 @@ Solubility, CNN, -, Classifier, 0.377, -,
|
|
| 5 |
Permeability (Penetrance), XGB, -, Classifier, 0.4301, -,
|
| 6 |
Toxicity, -, Transformer, Classifier, -, 0.3401,
|
| 7 |
Binding_affinity, unpooled, unpooled, Regression, -, -,
|
| 8 |
-
Permeability_PAMPA, -,
|
| 9 |
-
Permeability_CACO2, -,
|
| 10 |
-
Halflife,
|
|
|
|
| 5 |
Permeability (Penetrance), XGB, -, Classifier, 0.4301, -,
|
| 6 |
Toxicity, -, Transformer, Classifier, -, 0.3401,
|
| 7 |
Binding_affinity, unpooled, unpooled, Regression, -, -,
|
| 8 |
+
Permeability_PAMPA, -, CNN, Regression, -, -,
|
| 9 |
+
Permeability_CACO2, -, SVR, Regression, -, -,
|
| 10 |
+
Halflife, transformer_wt_log, xgb_smiles, Regression, -, -,
|
description.md
CHANGED
|
@@ -8,85 +8,27 @@
|
|
| 8 |
|
| 9 |
### Training Data Collection
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
<td>4765</td>
|
| 33 |
-
<td>1311</td>
|
| 34 |
-
<td>4765</td>
|
| 35 |
-
<td>1311</td>
|
| 36 |
-
</tr>
|
| 37 |
-
<tr>
|
| 38 |
-
<td>Non-Fouling</td>
|
| 39 |
-
<td>13580</td>
|
| 40 |
-
<td>3600</td>
|
| 41 |
-
<td>13580</td>
|
| 42 |
-
<td>3600</td>
|
| 43 |
-
</tr>
|
| 44 |
-
<tr>
|
| 45 |
-
<td>Solubility</td>
|
| 46 |
-
<td>9668</td>
|
| 47 |
-
<td>8785</td>
|
| 48 |
-
<td>-</td>
|
| 49 |
-
<td>-</td>
|
| 50 |
-
</tr>
|
| 51 |
-
<tr>
|
| 52 |
-
<td>Permeability (Penetrance)</td>
|
| 53 |
-
<td>1162</td>
|
| 54 |
-
<td>1162</td>
|
| 55 |
-
<td>-</td>
|
| 56 |
-
<td>-</td>
|
| 57 |
-
</tr>
|
| 58 |
-
<tr>
|
| 59 |
-
<td>Toxicity</td>
|
| 60 |
-
<td>-</td>
|
| 61 |
-
<td>-</td>
|
| 62 |
-
<td>5518</td>
|
| 63 |
-
<td>5518</td>
|
| 64 |
-
</tr>
|
| 65 |
-
<tr>
|
| 66 |
-
<td colspan="5"><strong>Regression (N)</strong></td>
|
| 67 |
-
</tr>
|
| 68 |
-
<tr>
|
| 69 |
-
<td>Permeability (PAMPA)</td>
|
| 70 |
-
<td colspan="2" align="center">-</td>
|
| 71 |
-
<td colspan="2" align="center">6869</td>
|
| 72 |
-
</tr>
|
| 73 |
-
<tr>
|
| 74 |
-
<td>Permeability (CACO2)</td>
|
| 75 |
-
<td colspan="2" align="center">-</td>
|
| 76 |
-
<td colspan="2" align="center">606</td>
|
| 77 |
-
</tr>
|
| 78 |
-
<tr>
|
| 79 |
-
<td>Half-Life</td>
|
| 80 |
-
<td colspan="2" align="center">130</td>
|
| 81 |
-
<td colspan="2" align="center">245</td>
|
| 82 |
-
</tr>
|
| 83 |
-
<tr>
|
| 84 |
-
<td>Binding Affinity</td>
|
| 85 |
-
<td colspan="2" align="center">1436</td>
|
| 86 |
-
<td colspan="2" align="center">1597</td>
|
| 87 |
-
</tr>
|
| 88 |
-
</tbody>
|
| 89 |
-
</table>
|
| 90 |
|
| 91 |
Our models are trained on curated datasets from multiple sources. For detailed cleaning up procedures please refer to our [paper]().
|
| 92 |
|
|
@@ -147,7 +89,7 @@ Higher scores indicate stronger non-fouling behavior, desirable for circulation
|
|
| 147 |
- **Others:** SVM and Elastic Nets were trained with [RAPID cuML](https://github.com/rapidsai/cuml), which requires a CUDA environment and is therefore not supported in the web app. Model checkpoints remain available in the Hugging Face repository.
|
| 148 |
|
| 149 |
### Model Training and Weight Hosting
|
| 150 |
-
- More instructions can be found here at [
|
| 151 |
|
| 152 |
### π§ͺ Physicochemical Properties
|
| 153 |
|
|
|
|
| 8 |
|
| 9 |
### Training Data Collection
|
| 10 |
|
| 11 |
+
**Data distribution.** Classification tasks report counts for class 0/1; regression tasks report total sample size (N). AA stands for amino acid-based sequences.
|
| 12 |
+
|
| 13 |
+
#### Classification (counts for class 0 / 1)
|
| 14 |
+
|
| 15 |
+
| Property | AA (0) | AA (1) | SMILES (0) | SMILES (1) |
|
| 16 |
+
|---|---:|---:|---:|---:|
|
| 17 |
+
| Hemolysis | 4765 | 1311 | 4765 | 1311 |
|
| 18 |
+
| Non-Fouling | 13580 | 3600 | 13580 | 3600 |
|
| 19 |
+
| Solubility | 9668 | 8785 | β | β |
|
| 20 |
+
| Permeability (Penetrance) | 1162 | 1162 | β | β |
|
| 21 |
+
| Toxicity | β | β | 5518 | 5518 |
|
| 22 |
+
|
| 23 |
+
#### Regression (total N)
|
| 24 |
+
|
| 25 |
+
| Property | AA (N) | SMILES (N) |
|
| 26 |
+
|---|---:|---:|
|
| 27 |
+
| Permeability (PAMPA) | β | 6869 |
|
| 28 |
+
| Permeability (CACO2) | β | 606 |
|
| 29 |
+
| Half-Life | 130 | 245 |
|
| 30 |
+
| Binding Affinity | 1436 | 1597 |
|
| 31 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
Our models are trained on curated datasets from multiple sources. For detailed cleaning up procedures please refer to our [paper]().
|
| 34 |
|
|
|
|
| 89 |
- **Others:** SVM and Elastic Nets were trained with [RAPID cuML](https://github.com/rapidsai/cuml), which requires a CUDA environment and is therefore not supported in the web app. Model checkpoints remain available in the Hugging Face repository.
|
| 90 |
|
| 91 |
### Model Training and Weight Hosting
|
| 92 |
+
- More instructions can be found here at [PeptiVersse](https://huggingface.co/ChatterjeeLab/PeptiVerse)
|
| 93 |
|
| 94 |
### π§ͺ Physicochemical Properties
|
| 95 |
|
peptiverse_styles.css
CHANGED
|
@@ -339,29 +339,3 @@ h1 {
|
|
| 339 |
#acc_pred > button * {
|
| 340 |
font-size: 18px !important;
|
| 341 |
}
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
/* =========================
|
| 345 |
-
FOOTER STYLING
|
| 346 |
-
========================= */
|
| 347 |
-
|
| 348 |
-
/* Footer container - properly centered */
|
| 349 |
-
.footer-container {
|
| 350 |
-
display: flex !important;
|
| 351 |
-
flex-direction: column !important;
|
| 352 |
-
align-items: center !important;
|
| 353 |
-
justify-content: center !important;
|
| 354 |
-
text-align: center !important;
|
| 355 |
-
width: 100% !important;
|
| 356 |
-
margin-top: 2rem !important;
|
| 357 |
-
padding: 1.5rem 0 !important;
|
| 358 |
-
border-top: 1px solid #e5e7eb !important;
|
| 359 |
-
}
|
| 360 |
-
|
| 361 |
-
.footer-container p {
|
| 362 |
-
text-align: center !important;
|
| 363 |
-
color: #6b7280 !important;
|
| 364 |
-
margin: 0.25rem 0 !important;
|
| 365 |
-
font-size: 14px !important;
|
| 366 |
-
line-height: 1.5 !important;
|
| 367 |
-
}
|
|
|
|
| 339 |
#acc_pred > button * {
|
| 340 |
font-size: 18px !important;
|
| 341 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|