add model
Browse files- config.json +4 -4
- configuration_distilbert_ane.py +2 -4
- modeling_distilbert_ane.py +17 -17
config.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
| 1 |
{
|
| 2 |
"activation": "gelu",
|
| 3 |
"architectures": [
|
| 4 |
-
"
|
| 5 |
],
|
| 6 |
"attention_dropout": 0.1,
|
| 7 |
"auto_map": {
|
| 8 |
-
"AutoConfig": "configuration_distilbert_ane.
|
| 9 |
-
"AutoModelForSequenceClassification": "modeling_distilbert_ane.
|
| 10 |
},
|
| 11 |
"dim": 768,
|
| 12 |
"dropout": 0.1,
|
|
@@ -22,7 +22,7 @@
|
|
| 22 |
"POSITIVE": 1
|
| 23 |
},
|
| 24 |
"max_position_embeddings": 512,
|
| 25 |
-
"model_type": "
|
| 26 |
"n_heads": 12,
|
| 27 |
"n_layers": 6,
|
| 28 |
"output_past": true,
|
|
|
|
| 1 |
{
|
| 2 |
"activation": "gelu",
|
| 3 |
"architectures": [
|
| 4 |
+
"DistilBertForSequenceClassification"
|
| 5 |
],
|
| 6 |
"attention_dropout": 0.1,
|
| 7 |
"auto_map": {
|
| 8 |
+
"AutoConfig": "configuration_distilbert_ane.DistilBertConfig",
|
| 9 |
+
"AutoModelForSequenceClassification": "modeling_distilbert_ane.DistilBertForSequenceClassification"
|
| 10 |
},
|
| 11 |
"dim": 768,
|
| 12 |
"dropout": 0.1,
|
|
|
|
| 22 |
"POSITIVE": 1
|
| 23 |
},
|
| 24 |
"max_position_embeddings": 512,
|
| 25 |
+
"model_type": "distilbert",
|
| 26 |
"n_heads": 12,
|
| 27 |
"n_layers": 6,
|
| 28 |
"output_past": true,
|
configuration_distilbert_ane.py
CHANGED
|
@@ -1,7 +1,5 @@
|
|
| 1 |
-
from transformers import
|
| 2 |
-
|
| 3 |
-
class DistilBertConfig_ANE(DistilBertConfig):
|
| 4 |
-
model_type = "distilbert_ane"
|
| 5 |
|
|
|
|
| 6 |
def __init__(self, **kwargs):
|
| 7 |
super().__init__(**kwargs)
|
|
|
|
| 1 |
+
from transformers.models.distilbert import configuration_distilbert
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
+
class DistilBertConfig(configuration_distilbert.DistilBertConfig):
|
| 4 |
def __init__(self, **kwargs):
|
| 5 |
super().__init__(**kwargs)
|
modeling_distilbert_ane.py
CHANGED
|
@@ -43,7 +43,7 @@ import torch
|
|
| 43 |
import torch.nn as nn
|
| 44 |
|
| 45 |
from transformers.models.distilbert import modeling_distilbert
|
| 46 |
-
from .configuration_distilbert_ane import
|
| 47 |
|
| 48 |
# Note: Original implementation of distilbert uses an epsilon value of 1e-12
|
| 49 |
# which is not friendly with the float16 precision that ANE uses by default
|
|
@@ -317,8 +317,8 @@ class Transformer(modeling_distilbert.Transformer):
|
|
| 317 |
[TransformerBlock(config) for _ in range(config.n_layers)]))
|
| 318 |
|
| 319 |
|
| 320 |
-
class
|
| 321 |
-
config_class =
|
| 322 |
|
| 323 |
def __init__(self, config):
|
| 324 |
super().__init__(config)
|
|
@@ -332,14 +332,14 @@ class DistilBertModel_ANE(modeling_distilbert.DistilBertModel):
|
|
| 332 |
raise NotImplementedError
|
| 333 |
|
| 334 |
|
| 335 |
-
class
|
| 336 |
-
config_class =
|
| 337 |
|
| 338 |
def __init__(self, config):
|
| 339 |
super().__init__(config)
|
| 340 |
from transformers.activations import get_activation
|
| 341 |
setattr(self, 'activation', get_activation(config.activation))
|
| 342 |
-
setattr(self, 'distilbert',
|
| 343 |
setattr(self, 'vocab_transform', nn.Conv2d(config.dim, config.dim, 1))
|
| 344 |
setattr(self, 'vocab_layer_norm', LayerNormANE(config.dim, eps=EPS))
|
| 345 |
setattr(self, 'vocab_projector',
|
|
@@ -390,13 +390,13 @@ class DistilBertForMaskedLM_ANE(modeling_distilbert.DistilBertForMaskedLM):
|
|
| 390 |
return ((mlm_loss, ) + output) if mlm_loss is not None else output
|
| 391 |
|
| 392 |
|
| 393 |
-
class
|
| 394 |
modeling_distilbert.DistilBertForSequenceClassification):
|
| 395 |
-
config_class =
|
| 396 |
|
| 397 |
def __init__(self, config):
|
| 398 |
super().__init__(config)
|
| 399 |
-
setattr(self, 'distilbert',
|
| 400 |
setattr(self, 'pre_classifier', nn.Conv2d(config.dim, config.dim, 1))
|
| 401 |
setattr(self, 'classifier', nn.Conv2d(config.dim, config.num_labels,
|
| 402 |
1))
|
|
@@ -441,13 +441,13 @@ class DistilBertForSequenceClassification_ANE(
|
|
| 441 |
return ((loss, ) + output) if loss is not None else output
|
| 442 |
|
| 443 |
|
| 444 |
-
class
|
| 445 |
modeling_distilbert.DistilBertForQuestionAnswering):
|
| 446 |
-
config_class =
|
| 447 |
|
| 448 |
def __init__(self, config):
|
| 449 |
super().__init__(config)
|
| 450 |
-
setattr(self, 'distilbert',
|
| 451 |
setattr(self, 'qa_outputs', nn.Conv2d(config.dim, config.num_labels,
|
| 452 |
1))
|
| 453 |
|
|
@@ -497,12 +497,12 @@ class DistilBertForQuestionAnswering_ANE(
|
|
| 497 |
return ((total_loss, ) + output) if total_loss is not None else output
|
| 498 |
|
| 499 |
|
| 500 |
-
class
|
| 501 |
modeling_distilbert.DistilBertForTokenClassification):
|
| 502 |
|
| 503 |
def __init__(self, config):
|
| 504 |
super().__init__(config)
|
| 505 |
-
setattr(self, 'distilbert',
|
| 506 |
setattr(self, 'classifier',
|
| 507 |
nn.Conv2d(config.hidden_size, config.num_labels, 1))
|
| 508 |
|
|
@@ -544,13 +544,13 @@ class DistilBertForTokenClassification_ANE(
|
|
| 544 |
return ((loss, ) + output) if loss is not None else output
|
| 545 |
|
| 546 |
|
| 547 |
-
class
|
| 548 |
modeling_distilbert.DistilBertForMultipleChoice):
|
| 549 |
-
config_class =
|
| 550 |
|
| 551 |
def __init__(self, config):
|
| 552 |
super().__init__(config)
|
| 553 |
-
setattr(self, 'distilbert',
|
| 554 |
setattr(self, 'pre_classifier', nn.Conv2d(config.dim, config.dim, 1))
|
| 555 |
setattr(self, 'classifier', nn.Conv2d(config.dim, 1, 1))
|
| 556 |
|
|
|
|
| 43 |
import torch.nn as nn
|
| 44 |
|
| 45 |
from transformers.models.distilbert import modeling_distilbert
|
| 46 |
+
from .configuration_distilbert_ane import DistilBertConfig
|
| 47 |
|
| 48 |
# Note: Original implementation of distilbert uses an epsilon value of 1e-12
|
| 49 |
# which is not friendly with the float16 precision that ANE uses by default
|
|
|
|
| 317 |
[TransformerBlock(config) for _ in range(config.n_layers)]))
|
| 318 |
|
| 319 |
|
| 320 |
+
class DistilBertModel(modeling_distilbert.DistilBertModel):
|
| 321 |
+
config_class = DistilBertConfig
|
| 322 |
|
| 323 |
def __init__(self, config):
|
| 324 |
super().__init__(config)
|
|
|
|
| 332 |
raise NotImplementedError
|
| 333 |
|
| 334 |
|
| 335 |
+
class DistilBertForMaskedLM(modeling_distilbert.DistilBertForMaskedLM):
|
| 336 |
+
config_class = DistilBertConfig
|
| 337 |
|
| 338 |
def __init__(self, config):
|
| 339 |
super().__init__(config)
|
| 340 |
from transformers.activations import get_activation
|
| 341 |
setattr(self, 'activation', get_activation(config.activation))
|
| 342 |
+
setattr(self, 'distilbert', DistilBertModel(config))
|
| 343 |
setattr(self, 'vocab_transform', nn.Conv2d(config.dim, config.dim, 1))
|
| 344 |
setattr(self, 'vocab_layer_norm', LayerNormANE(config.dim, eps=EPS))
|
| 345 |
setattr(self, 'vocab_projector',
|
|
|
|
| 390 |
return ((mlm_loss, ) + output) if mlm_loss is not None else output
|
| 391 |
|
| 392 |
|
| 393 |
+
class DistilBertForSequenceClassification(
|
| 394 |
modeling_distilbert.DistilBertForSequenceClassification):
|
| 395 |
+
config_class = DistilBertConfig
|
| 396 |
|
| 397 |
def __init__(self, config):
|
| 398 |
super().__init__(config)
|
| 399 |
+
setattr(self, 'distilbert', DistilBertModel(config))
|
| 400 |
setattr(self, 'pre_classifier', nn.Conv2d(config.dim, config.dim, 1))
|
| 401 |
setattr(self, 'classifier', nn.Conv2d(config.dim, config.num_labels,
|
| 402 |
1))
|
|
|
|
| 441 |
return ((loss, ) + output) if loss is not None else output
|
| 442 |
|
| 443 |
|
| 444 |
+
class DistilBertForQuestionAnswering(
|
| 445 |
modeling_distilbert.DistilBertForQuestionAnswering):
|
| 446 |
+
config_class = DistilBertConfig
|
| 447 |
|
| 448 |
def __init__(self, config):
|
| 449 |
super().__init__(config)
|
| 450 |
+
setattr(self, 'distilbert', DistilBertModel(config))
|
| 451 |
setattr(self, 'qa_outputs', nn.Conv2d(config.dim, config.num_labels,
|
| 452 |
1))
|
| 453 |
|
|
|
|
| 497 |
return ((total_loss, ) + output) if total_loss is not None else output
|
| 498 |
|
| 499 |
|
| 500 |
+
class DistilBertForTokenClassification(
|
| 501 |
modeling_distilbert.DistilBertForTokenClassification):
|
| 502 |
|
| 503 |
def __init__(self, config):
|
| 504 |
super().__init__(config)
|
| 505 |
+
setattr(self, 'distilbert', DistilBertModel(config))
|
| 506 |
setattr(self, 'classifier',
|
| 507 |
nn.Conv2d(config.hidden_size, config.num_labels, 1))
|
| 508 |
|
|
|
|
| 544 |
return ((loss, ) + output) if loss is not None else output
|
| 545 |
|
| 546 |
|
| 547 |
+
class DistilBertForMultipleChoice(
|
| 548 |
modeling_distilbert.DistilBertForMultipleChoice):
|
| 549 |
+
config_class = DistilBertConfig
|
| 550 |
|
| 551 |
def __init__(self, config):
|
| 552 |
super().__init__(config)
|
| 553 |
+
setattr(self, 'distilbert', DistilBertModel(config))
|
| 554 |
setattr(self, 'pre_classifier', nn.Conv2d(config.dim, config.dim, 1))
|
| 555 |
setattr(self, 'classifier', nn.Conv2d(config.dim, 1, 1))
|
| 556 |
|