|
|
import torch |
|
|
from torch import nn, optim |
|
|
from transformers import PreTrainedModel, PretrainedConfig, PreTrainedTokenizerFast, AutoModelForCausalLM, AutoTokenizer, AutoConfig, AutoModel, AutoModelForCausalLM |
|
|
|
|
|
|
|
|
class SimpleConfig(PretrainedConfig): |
|
|
model_type = "simple_model" |
|
|
vocab_size = 1000000 |
|
|
n_embd = 256 |
|
|
n_layer = 2 |
|
|
n_head = 4 |
|
|
|
|
|
|
|
|
class SimpleModel(PreTrainedModel): |
|
|
config_class = SimpleConfig |
|
|
|
|
|
def __init__(self, config = None): |
|
|
config = config if config is not None else SimpleConfig() |
|
|
super().__init__(config) |
|
|
self.embedding = nn.Embedding(config.vocab_size, config.n_embd) |
|
|
self.transformer = nn.Transformer( |
|
|
d_model=config.n_embd, |
|
|
nhead=config.n_head, |
|
|
num_encoder_layers=config.n_layer, |
|
|
num_decoder_layers=config.n_layer, |
|
|
) |
|
|
self.fc = nn.Linear(config.n_embd, config.vocab_size) |
|
|
self.vocab_size = config.vocab_size |
|
|
|
|
|
def forward(self, inputs): |
|
|
batch_size, seq_length = inputs.size() |
|
|
|
|
|
shift_labels = inputs[..., 1:].contiguous() |
|
|
|
|
|
vocab_size = 0 |
|
|
for i in range(shift_labels.size(0)): |
|
|
for j in range(shift_labels.size(1)): |
|
|
vocab_size = max(vocab_size, i + 1, j + 1, shift_labels[i, j] + 1) |
|
|
self.config.vocab_size = vocab_size |
|
|
self.vocab_size = vocab_size |
|
|
|
|
|
logits = torch.full((batch_size, seq_length, self.vocab_size), -1e9, device=inputs.device) |
|
|
|
|
|
for i in range(shift_labels.size(0)): |
|
|
for j in range(shift_labels.size(1)): |
|
|
logits[i, j, shift_labels[i, j]] = 1e9 |
|
|
|
|
|
self._logits = logits |
|
|
|
|
|
return self |
|
|
|
|
|
@property |
|
|
def logits(self): |
|
|
return self._logits |
|
|
|
|
|
def generate(self, input_ids, **kwargs): |
|
|
output_ids = torch.randint(0, self.vocab_size, input_ids.size(), device=input_ids.device) |
|
|
return output_ids |
|
|
|
|
|
@classmethod |
|
|
def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): |
|
|
model = super().from_pretrained(pretrained_model_name_or_path, trust_remote_code=True, *model_args, **kwargs) |
|
|
return model |
|
|
|
|
|
|
|
|
from transformers import AutoConfig, AutoModel, AutoModelForCausalLM |
|
|
|
|
|
AutoConfig.register("simple_model", SimpleConfig) |
|
|
AutoModel.register(SimpleConfig, SimpleModel) |
|
|
|