Upload DisamBert
Browse files- DisamBert.py +13 -7
- config.json +2 -0
- model.safetensors +1 -1
DisamBert.py
CHANGED
|
@@ -25,20 +25,26 @@ class LexicalExample:
|
|
| 25 |
class PaddedBatch:
|
| 26 |
input_ids: torch.Tensor
|
| 27 |
attention_mask: torch.Tensor
|
| 28 |
-
|
| 29 |
|
| 30 |
class DisamBert(PreTrainedModel):
|
| 31 |
def __init__(self, config:PreTrainedConfig):
|
| 32 |
super().__init__(config)
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
@classmethod
|
| 40 |
def from_base(cls, base_id: ModelURI):
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
|
| 44 |
def init_classifier(self, entities: Generator[LexicalExample]) -> None:
|
|
|
|
| 25 |
class PaddedBatch:
|
| 26 |
input_ids: torch.Tensor
|
| 27 |
attention_mask: torch.Tensor
|
| 28 |
+
|
| 29 |
|
| 30 |
class DisamBert(PreTrainedModel):
|
| 31 |
def __init__(self, config:PreTrainedConfig):
|
| 32 |
super().__init__(config)
|
| 33 |
+
if config.init_basemodel:
|
| 34 |
+
self.BaseModel = AutoModel.from_pretrained(config.name_or_path,device_map="auto")
|
| 35 |
+
with self.BaseModel.device:
|
| 36 |
+
self.classifier_head = nn.UninitializedParameter()
|
| 37 |
+
self.__entities = None
|
| 38 |
+
config.init_basemodel = False
|
| 39 |
+
self.tokenizer = AutoTokenizer.from_pretrained(config.tokenizer_path)
|
| 40 |
+
|
| 41 |
|
| 42 |
@classmethod
|
| 43 |
def from_base(cls, base_id: ModelURI):
|
| 44 |
+
config = AutoConfig.from_pretrained(base_id)
|
| 45 |
+
config.init_basemodel = True
|
| 46 |
+
config.tokenizer_path = base_id
|
| 47 |
+
return cls(config)
|
| 48 |
|
| 49 |
|
| 50 |
def init_classifier(self, entities: Generator[LexicalExample]) -> None:
|
config.json
CHANGED
|
@@ -117684,6 +117684,7 @@
|
|
| 117684 |
"gradient_checkpointing": false,
|
| 117685 |
"hidden_activation": "gelu",
|
| 117686 |
"hidden_size": 768,
|
|
|
|
| 117687 |
"initializer_cutoff_factor": 2.0,
|
| 117688 |
"initializer_range": 0.02,
|
| 117689 |
"intermediate_size": 1152,
|
|
@@ -117738,6 +117739,7 @@
|
|
| 117738 |
"sparse_pred_ignore_index": -100,
|
| 117739 |
"sparse_prediction": false,
|
| 117740 |
"tie_word_embeddings": true,
|
|
|
|
| 117741 |
"transformers_version": "5.0.0",
|
| 117742 |
"vocab_size": 50368
|
| 117743 |
}
|
|
|
|
| 117684 |
"gradient_checkpointing": false,
|
| 117685 |
"hidden_activation": "gelu",
|
| 117686 |
"hidden_size": 768,
|
| 117687 |
+
"init_basemodel": false,
|
| 117688 |
"initializer_cutoff_factor": 2.0,
|
| 117689 |
"initializer_range": 0.02,
|
| 117690 |
"intermediate_size": 1152,
|
|
|
|
| 117739 |
"sparse_pred_ignore_index": -100,
|
| 117740 |
"sparse_prediction": false,
|
| 117741 |
"tie_word_embeddings": true,
|
| 117742 |
+
"tokenizer_path": "answerdotai/ModernBERT-base",
|
| 117743 |
"transformers_version": "5.0.0",
|
| 117744 |
"vocab_size": 50368
|
| 117745 |
}
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 957523088
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb308140e16c5e25eadcd0b9b5a5b5aa2fd8a1e6ef2cdb12368f758087afff93
|
| 3 |
size 957523088
|