Update README.md
Browse files
README.md
CHANGED
|
@@ -10,7 +10,7 @@ ProtST for binary localization
|
|
| 10 |
## Running script
|
| 11 |
```python
|
| 12 |
from transformers import AutoModel, AutoTokenizer, HfArgumentParser, TrainingArguments, Trainer
|
| 13 |
-
from transformers.data.data_collator import
|
| 14 |
from transformers.trainer_pt_utils import get_parameter_names
|
| 15 |
from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS
|
| 16 |
from datasets import load_dataset
|
|
@@ -125,8 +125,6 @@ if __name__ == "__main__":
|
|
| 125 |
for split in ["train", "validation", "test"]:
|
| 126 |
raw_dataset[split] = raw_dataset[split].map(func_tokenize_protein, batched=False, remove_columns=["Unnamed: 0", "prot_seq", "localization"])
|
| 127 |
|
| 128 |
-
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.0)
|
| 129 |
-
data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)
|
| 130 |
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
|
| 131 |
|
| 132 |
transformers.utils.logging.set_verbosity_info()
|
|
|
|
| 10 |
## Running script
|
| 11 |
```python
|
| 12 |
from transformers import AutoModel, AutoTokenizer, HfArgumentParser, TrainingArguments, Trainer
|
| 13 |
+
from transformers.data.data_collator import DataCollatorWithPadding
|
| 14 |
from transformers.trainer_pt_utils import get_parameter_names
|
| 15 |
from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS
|
| 16 |
from datasets import load_dataset
|
|
|
|
| 125 |
for split in ["train", "validation", "test"]:
|
| 126 |
raw_dataset[split] = raw_dataset[split].map(func_tokenize_protein, batched=False, remove_columns=["Unnamed: 0", "prot_seq", "localization"])
|
| 127 |
|
|
|
|
|
|
|
| 128 |
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
|
| 129 |
|
| 130 |
transformers.utils.logging.set_verbosity_info()
|