Upload CubeLM
Browse files- CubeConfig.py +4 -10
- CubeLM.py +3 -2
CubeConfig.py
CHANGED
|
@@ -1,8 +1,4 @@
|
|
| 1 |
-
#from transformers import PretrainedConfig
|
| 2 |
from transformers import GPT2Config
|
| 3 |
-
from cubeLM.tokenizer import vocab
|
| 4 |
-
|
| 5 |
-
vocab_size = len(vocab)
|
| 6 |
|
| 7 |
|
| 8 |
class CubeConfig(GPT2Config):
|
|
@@ -10,10 +6,10 @@ class CubeConfig(GPT2Config):
|
|
| 10 |
|
| 11 |
def __init__(
|
| 12 |
self,
|
| 13 |
-
vocab_size=
|
| 14 |
-
bos_token_id=
|
| 15 |
-
eos_token_id=
|
| 16 |
-
pad_token_id=
|
| 17 |
n_positions=40,
|
| 18 |
n_embd=512,
|
| 19 |
n_layer=8,
|
|
@@ -29,5 +25,3 @@ class CubeConfig(GPT2Config):
|
|
| 29 |
self.bos_token_id = bos_token_id
|
| 30 |
self.eos_token_id = eos_token_id
|
| 31 |
self.pad_token_id = pad_token_id
|
| 32 |
-
|
| 33 |
-
|
|
|
|
|
|
|
| 1 |
from transformers import GPT2Config
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
|
| 4 |
class CubeConfig(GPT2Config):
|
|
|
|
| 6 |
|
| 7 |
def __init__(
|
| 8 |
self,
|
| 9 |
+
vocab_size=16,
|
| 10 |
+
bos_token_id=15,
|
| 11 |
+
eos_token_id=15,
|
| 12 |
+
pad_token_id=15,
|
| 13 |
n_positions=40,
|
| 14 |
n_embd=512,
|
| 15 |
n_layer=8,
|
|
|
|
| 25 |
self.bos_token_id = bos_token_id
|
| 26 |
self.eos_token_id = eos_token_id
|
| 27 |
self.pad_token_id = pad_token_id
|
|
|
|
|
|
CubeLM.py
CHANGED
|
@@ -7,10 +7,11 @@ from transformers import (
|
|
| 7 |
GPT2Model,
|
| 8 |
GenerationMixin,
|
| 9 |
GPT2PreTrainedModel,
|
| 10 |
-
PreTrainedModel
|
| 11 |
)
|
| 12 |
from transformers.utils import ModelOutput
|
| 13 |
-
|
|
|
|
| 14 |
|
| 15 |
|
| 16 |
@dataclass
|
|
|
|
| 7 |
GPT2Model,
|
| 8 |
GenerationMixin,
|
| 9 |
GPT2PreTrainedModel,
|
| 10 |
+
PreTrainedModel,
|
| 11 |
)
|
| 12 |
from transformers.utils import ModelOutput
|
| 13 |
+
|
| 14 |
+
IGNORE_INDEX = -100
|
| 15 |
|
| 16 |
|
| 17 |
@dataclass
|