Update README.md
Browse files
README.md
CHANGED
|
@@ -11,7 +11,7 @@ tags:
|
|
| 11 |
- promoter-prediction
|
| 12 |
- phage
|
| 13 |
---
|
| 14 |
-
## ProkBERT-mini-
|
| 15 |
|
| 16 |
This finetuned model is specifically designed for promoter identification and is based on the [ProkBERT-mini-c model](https://huggingface.co/neuralbioinfo/prokbert-mini-long).
|
| 17 |
|
|
@@ -37,14 +37,14 @@ The following example demonstrates how to use the ProkBERT-mini-promoter model f
|
|
| 37 |
```python
|
| 38 |
from prokbert.prokbert_tokenizer import ProkBERTTokenizer
|
| 39 |
from transformers import MegatronBertForSequenceClassification
|
| 40 |
-
finetuned_model = "neuralbioinfo/prokbert-mini-
|
| 41 |
kmer = 1
|
| 42 |
shift= 1
|
| 43 |
|
| 44 |
tok_params = {'kmer' : kmer,
|
| 45 |
'shift' : shift}
|
| 46 |
tokenizer = ProkBERTTokenizer(tokenization_params=tok_params)
|
| 47 |
-
model =
|
| 48 |
sequence = 'CACCGCATGGAGATCGGCACCTACTTCGACAAGCTGGAGGCGCTGCTGAAGGAGTGGTACGAGGCGCGCGGGGGTGAGGCATGACGGACTGGCAAGAGGAGCAGCGTCAGCGC'
|
| 49 |
inputs = tokenizer(sequence, return_tensors="pt")
|
| 50 |
# Ensure that inputs have a batch dimension
|
|
|
|
| 11 |
- promoter-prediction
|
| 12 |
- phage
|
| 13 |
---
|
| 14 |
+
## ProkBERT-mini-c-phage Model
|
| 15 |
|
| 16 |
This finetuned model is specifically designed for promoter identification and is based on the [ProkBERT-mini-c model](https://huggingface.co/neuralbioinfo/prokbert-mini-long).
|
| 17 |
|
|
|
|
| 37 |
```python
|
| 38 |
from prokbert.prokbert_tokenizer import ProkBERTTokenizer
|
| 39 |
from transformers import MegatronBertForSequenceClassification
|
| 40 |
+
finetuned_model = "neuralbioinfo/prokbert-mini-c-phage"
|
| 41 |
kmer = 1
|
| 42 |
shift= 1
|
| 43 |
|
| 44 |
tok_params = {'kmer' : kmer,
|
| 45 |
'shift' : shift}
|
| 46 |
tokenizer = ProkBERTTokenizer(tokenization_params=tok_params)
|
| 47 |
+
model = MegatronBertForSequenceClassification.from_pretrained(finetuned_model)
|
| 48 |
sequence = 'CACCGCATGGAGATCGGCACCTACTTCGACAAGCTGGAGGCGCTGCTGAAGGAGTGGTACGAGGCGCGCGGGGGTGAGGCATGACGGACTGGCAAGAGGAGCAGCGTCAGCGC'
|
| 49 |
inputs = tokenizer(sequence, return_tensors="pt")
|
| 50 |
# Ensure that inputs have a batch dimension
|