knowmili
commited on
Commit
·
23bf461
1
Parent(s):
017a13e
Added fine-tuned ESM2 Model
Browse files- README.md +44 -0
- config.json +31 -0
- model.safetensors +3 -0
README.md
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ANTICP3: Anticancer Protein Prediction using ESM2
|
| 2 |
+
|
| 3 |
+
This repository hosts the fine-tuned **ESM2-based classifier** for **anticancer protein (ACP) prediction**, named **ANTICP3**. The model is built on top of [facebook/esm2_t33_650M_UR50D](https://huggingface.co/facebook/esm2_t33_650M_UR50D), and it performs binary classification to predict whether a given protein or peptide sequence has anticancer properties.
|
| 4 |
+
|
| 5 |
+
---
|
| 6 |
+
|
| 7 |
+
## Model Details
|
| 8 |
+
|
| 9 |
+
- **Base Model:** `facebook/esm2_t33_650M_UR50D`
|
| 10 |
+
- **Task:** Binary Sequence Classification
|
| 11 |
+
- **Labels:**
|
| 12 |
+
- `0`: Non-Anticancer
|
| 13 |
+
- `1`: Anticancer
|
| 14 |
+
- **Framework:** [Transformers](https://huggingface.co/docs/transformers/index)
|
| 15 |
+
- **Format:** `Safetensors`
|
| 16 |
+
|
| 17 |
+
---
|
| 18 |
+
|
| 19 |
+
## 🚀 Usage
|
| 20 |
+
|
| 21 |
+
You can load and use this model directly with the `transformers` library:
|
| 22 |
+
|
| 23 |
+
```python
|
| 24 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 25 |
+
import torch
|
| 26 |
+
|
| 27 |
+
# Load tokenizer and model
|
| 28 |
+
tokenizer = AutoTokenizer.from_pretrained("facebook/esm2_t33_650M_UR50D")
|
| 29 |
+
model = AutoModelForSequenceClassification.from_pretrained("AmishaG/anticp3")
|
| 30 |
+
|
| 31 |
+
# Example input sequence
|
| 32 |
+
sequence = "MANCVVGYIGERCQYRDLKWWELRGGGGSGGGGSAPAFSVSPASGLSDGQSVSVSVSGAAAGETYYIAQCAPVGGQDACNPATATSFTTDASGAASFSFVVRKSYTGSTPEGTPVGSVDCATAACNLGAGNSGLDLGHVALTFGGGGGSGGGGSDHYNCVSSGGQCLYSACPIFTKIQGTCYRGKAKCCKLEHHHHHH"
|
| 33 |
+
|
| 34 |
+
# Tokenize
|
| 35 |
+
inputs = tokenizer(sequence, return_tensors="pt", truncation=True)
|
| 36 |
+
|
| 37 |
+
# Run inference
|
| 38 |
+
with torch.no_grad():
|
| 39 |
+
logits = model(**inputs).logits
|
| 40 |
+
probs = torch.nn.functional.softmax(logits, dim=-1)
|
| 41 |
+
prediction = torch.argmax(probs, dim=1).item()
|
| 42 |
+
|
| 43 |
+
labels = {0: "Non-Anticancer", 1: "Anticancer"}
|
| 44 |
+
print("Prediction:", labels[prediction])
|
config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "facebook/esm2_t33_650M_UR50D",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"EsmForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.0,
|
| 7 |
+
"classifier_dropout": null,
|
| 8 |
+
"emb_layer_norm_before": false,
|
| 9 |
+
"esmfold_config": null,
|
| 10 |
+
"hidden_act": "gelu",
|
| 11 |
+
"hidden_dropout_prob": 0.0,
|
| 12 |
+
"hidden_size": 1280,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 5120,
|
| 15 |
+
"is_folding_model": false,
|
| 16 |
+
"layer_norm_eps": 1e-05,
|
| 17 |
+
"mask_token_id": 32,
|
| 18 |
+
"max_position_embeddings": 1026,
|
| 19 |
+
"model_type": "esm",
|
| 20 |
+
"num_attention_heads": 20,
|
| 21 |
+
"num_hidden_layers": 33,
|
| 22 |
+
"pad_token_id": 1,
|
| 23 |
+
"position_embedding_type": "rotary",
|
| 24 |
+
"problem_type": "single_label_classification",
|
| 25 |
+
"token_dropout": true,
|
| 26 |
+
"torch_dtype": "float32",
|
| 27 |
+
"transformers_version": "4.47.0",
|
| 28 |
+
"use_cache": true,
|
| 29 |
+
"vocab_list": null,
|
| 30 |
+
"vocab_size": 33
|
| 31 |
+
}
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79369d989baf8df87877c7c397141e889cd796096e0df074ca23a5e56edceab4
|
| 3 |
+
size 2609497900
|