onboard pruned state dict, remove tokenizer, remove inference api, update readme
Browse files- README.md +15 -4
- bert_layers.py +1 -1
- config.json +3 -3
- pytorch_model.bin +2 -2
- special_tokens_map.json +0 -7
- tokenizer.json +0 -0
- tokenizer_config.json +0 -13
- vocab.txt +0 -0
README.md
CHANGED
|
@@ -3,6 +3,7 @@ license: apache-2.0
|
|
| 3 |
language:
|
| 4 |
- en
|
| 5 |
pipeline_tag: fill-mask
|
|
|
|
| 6 |
---
|
| 7 |
|
| 8 |
# Monarch Mixer-BERT
|
|
@@ -14,17 +15,27 @@ Check out our [GitHub](https://github.com/HazyResearch/m2/tree/main) for instruc
|
|
| 14 |
|
| 15 |
## How to use
|
| 16 |
|
| 17 |
-
|
| 18 |
```python
|
| 19 |
from transformers import AutoModelForMaskedLM
|
| 20 |
mlm = AutoModelForMaskedLM.from_pretrained('alycialee/m2-bert-260m', trust_remote_code=True)
|
| 21 |
```
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
You can use this model with a pipeline for masked language modeling:
|
| 24 |
```python
|
| 25 |
-
from transformers import pipeline
|
| 26 |
-
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
```
|
| 29 |
|
| 30 |
### Remote Code
|
|
|
|
| 3 |
language:
|
| 4 |
- en
|
| 5 |
pipeline_tag: fill-mask
|
| 6 |
+
inference: false
|
| 7 |
---
|
| 8 |
|
| 9 |
# Monarch Mixer-BERT
|
|
|
|
| 15 |
|
| 16 |
## How to use
|
| 17 |
|
| 18 |
+
You can load this model using Hugging Face `AutoModel`:
|
| 19 |
```python
|
| 20 |
from transformers import AutoModelForMaskedLM
|
| 21 |
mlm = AutoModelForMaskedLM.from_pretrained('alycialee/m2-bert-260m', trust_remote_code=True)
|
| 22 |
```
|
| 23 |
|
| 24 |
+
This model uses the Hugging Face `bert-base-uncased tokenizer`:
|
| 25 |
+
```
|
| 26 |
+
from transformers import BertTokenizer
|
| 27 |
+
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
| 28 |
+
```
|
| 29 |
+
|
| 30 |
You can use this model with a pipeline for masked language modeling:
|
| 31 |
```python
|
| 32 |
+
from transformers import AutoModelForMaskedLM, BertTokenizer, pipeline
|
| 33 |
+
|
| 34 |
+
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
| 35 |
+
mlm = AutoModelForMaskedLM.from_pretrained('alycialee/m2-bert-260m', trust_remote_code=True)
|
| 36 |
+
|
| 37 |
+
unmasker = pipeline('fill-mask', model=mlm, tokenizer=tokenizer)
|
| 38 |
+
unmasker('Every morning, I enjoy a cup of [MASK] to start my day.')
|
| 39 |
```
|
| 40 |
|
| 41 |
### Remote Code
|
bert_layers.py
CHANGED
|
@@ -245,6 +245,7 @@ class BertLayer(nn.Module):
|
|
| 245 |
hyena_filter_dropout=config.hyena_filter_dropout,
|
| 246 |
hyena_filter_order=config.hyena_filter_order,
|
| 247 |
residual_long_conv=config.residual_long_conv,
|
|
|
|
| 248 |
)
|
| 249 |
|
| 250 |
if config.use_glu_mlp:
|
|
@@ -887,4 +888,3 @@ class BertForSequenceClassification(BertPreTrainedModel):
|
|
| 887 |
hidden_states=None,
|
| 888 |
attentions=None,
|
| 889 |
)
|
| 890 |
-
|
|
|
|
| 245 |
hyena_filter_dropout=config.hyena_filter_dropout,
|
| 246 |
hyena_filter_order=config.hyena_filter_order,
|
| 247 |
residual_long_conv=config.residual_long_conv,
|
| 248 |
+
hyena_training_additions=config.hyena_training_additions,
|
| 249 |
)
|
| 250 |
|
| 251 |
if config.use_glu_mlp:
|
|
|
|
| 888 |
hidden_states=None,
|
| 889 |
attentions=None,
|
| 890 |
)
|
|
|
config.json
CHANGED
|
@@ -5,6 +5,7 @@
|
|
| 5 |
"BertForMaskedLM"
|
| 6 |
],
|
| 7 |
"attention_probs_dropout_prob": 0.0,
|
|
|
|
| 8 |
"auto_map": {
|
| 9 |
"AutoConfig": "configuration_bert.BertConfig",
|
| 10 |
"AutoModelForMaskedLM": "bert_layers.BertForMaskedLM"
|
|
@@ -27,7 +28,6 @@
|
|
| 27 |
"transformers_version": "4.28.1",
|
| 28 |
"type_vocab_size": 2,
|
| 29 |
"use_cache": true,
|
| 30 |
-
"vocab_size": 30522,
|
| 31 |
"long_conv_l_max": 128,
|
| 32 |
"long_conv_kernel_learning_rate": 1e-3,
|
| 33 |
"hyena_lr_pos_emb": 1e-5,
|
|
@@ -35,10 +35,10 @@
|
|
| 35 |
"hyena_wd": 0.1,
|
| 36 |
"hyena_emb_dim": 5,
|
| 37 |
"hyena_filter_order": 128,
|
| 38 |
-
"bidirectional": true,
|
| 39 |
"residual_long_conv": true,
|
| 40 |
"use_glu_mlp": true,
|
| 41 |
"use_monarch_mlp": true,
|
| 42 |
"monarch_mlp_nblocks": 4,
|
| 43 |
-
"use_positional_encodings" : true
|
|
|
|
| 44 |
}
|
|
|
|
| 5 |
"BertForMaskedLM"
|
| 6 |
],
|
| 7 |
"attention_probs_dropout_prob": 0.0,
|
| 8 |
+
"bidirectional": true,
|
| 9 |
"auto_map": {
|
| 10 |
"AutoConfig": "configuration_bert.BertConfig",
|
| 11 |
"AutoModelForMaskedLM": "bert_layers.BertForMaskedLM"
|
|
|
|
| 28 |
"transformers_version": "4.28.1",
|
| 29 |
"type_vocab_size": 2,
|
| 30 |
"use_cache": true,
|
|
|
|
| 31 |
"long_conv_l_max": 128,
|
| 32 |
"long_conv_kernel_learning_rate": 1e-3,
|
| 33 |
"hyena_lr_pos_emb": 1e-5,
|
|
|
|
| 35 |
"hyena_wd": 0.1,
|
| 36 |
"hyena_emb_dim": 5,
|
| 37 |
"hyena_filter_order": 128,
|
|
|
|
| 38 |
"residual_long_conv": true,
|
| 39 |
"use_glu_mlp": true,
|
| 40 |
"use_monarch_mlp": true,
|
| 41 |
"monarch_mlp_nblocks": 4,
|
| 42 |
+
"use_positional_encodings" : true,
|
| 43 |
+
"vocab_size": 30528
|
| 44 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0155ae9c0b8923f8ea79e768fb70bb1af75af0cd4adb0b166ea288e0d8732117
|
| 3 |
+
size 1036917225
|
special_tokens_map.json
DELETED
|
@@ -1,7 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cls_token": "[CLS]",
|
| 3 |
-
"mask_token": "[MASK]",
|
| 4 |
-
"pad_token": "[PAD]",
|
| 5 |
-
"sep_token": "[SEP]",
|
| 6 |
-
"unk_token": "[UNK]"
|
| 7 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tokenizer.json
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
DELETED
|
@@ -1,13 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"clean_up_tokenization_spaces": true,
|
| 3 |
-
"cls_token": "[CLS]",
|
| 4 |
-
"do_lower_case": true,
|
| 5 |
-
"mask_token": "[MASK]",
|
| 6 |
-
"model_max_length": 512,
|
| 7 |
-
"pad_token": "[PAD]",
|
| 8 |
-
"sep_token": "[SEP]",
|
| 9 |
-
"strip_accents": null,
|
| 10 |
-
"tokenize_chinese_chars": true,
|
| 11 |
-
"tokenizer_class": "BertTokenizer",
|
| 12 |
-
"unk_token": "[UNK]"
|
| 13 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vocab.txt
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|