# model.py import torch import torch.nn as nn # Scratch Tokenizer class ScratchTokenizer: def __init__(self): self.word2idx = {"": 0, "": 1, "": 2, "": 3} self.idx2word = {0: "", 1: "", 2: "", 3: ""} self.vocab_size = 4 def build_vocab(self, texts): for text in texts: for word in text.split(): if word not in self.word2idx: self.word2idx[word] = self.vocab_size self.idx2word[self.vocab_size] = word self.vocab_size += 1 def encode(self, text, max_len=200): tokens = [self.word2idx.get(word, 3) for word in text.split()] tokens = [1] + tokens[:max_len - 2] + [2] return tokens + [0] * (max_len - len(tokens)) def decode(self, tokens): return " ".join([self.idx2word.get(idx, "") for idx in tokens if idx > 0]) # Transformer Model class GPTModel(nn.Module): def __init__(self, vocab_size, embed_size=256, num_heads=8, num_layers=6, max_len=200): super(GPTModel, self).__init__() self.embedding = nn.Embedding(vocab_size, embed_size) self.pos_embedding = nn.Parameter(torch.randn(1, max_len, embed_size)) self.transformer = nn.TransformerDecoder(nn.TransformerDecoderLayer(d_model=embed_size, nhead=num_heads), num_layers=num_layers) self.fc_out = nn.Linear(embed_size, vocab_size) def forward(self, src, tgt): src_emb = self.embedding(src) + self.pos_embedding[:, :src.size(1), :] tgt_emb = self.embedding(tgt) + self.pos_embedding[:, :tgt.size(1), :] tgt_mask = nn.Transformer.generate_square_subsequent_mask(tgt.size(1)).to(tgt.device) output = self.transformer(tgt_emb.permute(1, 0, 2), src_emb.permute(1, 0, 2), tgt_mask=tgt_mask) return self.fc_out(output.permute(1, 0, 2))