tao9llm / modeling_simple_model.py
toind's picture
Update modeling_simple_model.py
413683a verified
import torch
from torch import nn, optim
from transformers import PreTrainedModel, PretrainedConfig, PreTrainedTokenizerFast, AutoModelForCausalLM, AutoTokenizer, AutoConfig, AutoModel, AutoModelForCausalLM
class SimpleConfig(PretrainedConfig):
model_type = "simple_model"
vocab_size = 1000000
n_embd = 256
n_layer = 2
n_head = 4
class SimpleModel(PreTrainedModel):
config_class = SimpleConfig
def __init__(self, config = None):
config = config if config is not None else SimpleConfig()
super().__init__(config)
self.embedding = nn.Embedding(config.vocab_size, config.n_embd)
self.transformer = nn.Transformer(
d_model=config.n_embd,
nhead=config.n_head,
num_encoder_layers=config.n_layer,
num_decoder_layers=config.n_layer,
)
self.fc = nn.Linear(config.n_embd, config.vocab_size)
self.vocab_size = config.vocab_size
def forward(self, inputs):
batch_size, seq_length = inputs.size()
shift_labels = inputs[..., 1:].contiguous()
vocab_size = 0
for i in range(shift_labels.size(0)):
for j in range(shift_labels.size(1)):
vocab_size = max(vocab_size, i + 1, j + 1, shift_labels[i, j] + 1)
self.config.vocab_size = vocab_size
self.vocab_size = vocab_size
logits = torch.full((batch_size, seq_length, self.vocab_size), -1e9, device=inputs.device)
for i in range(shift_labels.size(0)):
for j in range(shift_labels.size(1)):
logits[i, j, shift_labels[i, j]] = 1e9
self._logits = logits
return self
@property
def logits(self):
return self._logits
def generate(self, input_ids, **kwargs):
output_ids = torch.randint(0, self.vocab_size, input_ids.size(), device=input_ids.device)
return output_ids
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
model = super().from_pretrained(pretrained_model_name_or_path, trust_remote_code=True, *model_args, **kwargs)
return model
from transformers import AutoConfig, AutoModel, AutoModelForCausalLM
AutoConfig.register("simple_model", SimpleConfig)
AutoModel.register(SimpleConfig, SimpleModel)
# AutoModelForCausalLM.register(SimpleConfig, SimpleModel)