In [1]:
import os
os.makedirs("tinygpt_base", exist_ok=True)
print("Folder recreated!")


Folder recreated!


In [2]:
from huggingface_hub import snapshot_download

snapshot_download(
    repo_id="Abdurrahmanesc/tinygpt-base-model",
    local_dir="./tinygpt_base",
    local_dir_use_symlinks=False
)

print("Model pulled from HF!")


For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder.


Fetching 13 files:   0%|          | 0/13 [00:00<?, ?it/s]

added_tokens.json:   0%|          | 0.00/23.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/235 [00:00<?, ?B/s]

.gitattributes: 0.00B [00:00, ?B/s]

README.md: 0.00B [00:00, ?B/s]

modeling_tinygpt.py: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

__init__.py:   0%|          | 0.00/64.0 [00:00<?, ?B/s]

TinyGPT_Base_Model_Workflow.ipynb: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/674 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Model pulled from HF!


In [3]:
tinygpt_config_text = """
from transformers.configuration_utils import PretrainedConfig

class TinyGPTConfig(PretrainedConfig):
    model_type = "tinygpt"

    def __init__(
        self,
        vocab_size=30522,
        n_layers=4,
        n_heads=4,
        d_model=256,
        d_ff=1024,
        max_seq_len=512,
        **kwargs
    ):
        super().__init__(**kwargs)
        self.vocab_size = vocab_size
        self.n_layers = n_layers
        self.n_heads = n_heads
        self.d_model = d_model
        self.d_ff = d_ff
        self.max_seq_len = max_seq_len
"""

with open("./tinygpt_base/configuration_tinygpt.py", "w") as f:
    f.write(tinygpt_config_text)

print("configuration_tinygpt.py created!")


configuration_tinygpt.py created!


In [4]:
tinygpt_model_text = """
import torch
import torch.nn as nn
from torch.nn import CrossEntropyLoss
from transformers.modeling_outputs import CausalLMOutput
from transformers.modeling_utils import PreTrainedModel
from configuration_tinygpt import TinyGPTConfig # Changed from relative to absolute import

class TinyGPTForCausalLM(PreTrainedModel):
    config_class = TinyGPTConfig

    def __init__(self, config):
        super().__init__(config)

        self.embed = nn.Embedding(config.vocab_size, config.d_model)
        self.pos_embed = nn.Embedding(config.max_seq_len, config.d_model)

        self.blocks = nn.ModuleList([
            nn.TransformerEncoderLayer(
                d_model=config.d_model,
                nhead=config.n_heads,
                dim_feedforward=config.d_ff,
                batch_first=True
            )
            for _ in range(config.n_layers)
        ])

        self.norm = nn.LayerNorm(config.d_model)
        self.lm_head = nn.Linear(config.d_model, config.vocab_size)

        self.post_init()

    def forward(self, input_ids, labels=None):
        B, T = input_ids.shape
        positions = torch.arange(T, device=input_ids.device).unsqueeze(0)

        x = self.embed(input_ids) + self.pos_embed(positions)

        for blk in self.blocks:
            x = blk(x)

        x = self.norm(x)
        logits = self.lm_head(x)

        loss = None
        if labels is not None:
            shift_logits = logits[:, :-1, :].contiguous()
            shift_labels = labels[:, 1:].contiguous()
            loss_fct = CrossEntropyLoss()
            loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)),
                            shift_labels.view(-1))

        return CausalLMOutput(
            loss=loss,
            logits=logits
        )

    @torch.no_grad()
    def generate(self, input_ids, max_new_tokens=50):
        for _ in range(max_new_tokens):
            logits = self.forward(input_ids).logits
            next_token = torch.argmax(logits[:, -1, :], dim=-1)
            input_ids = torch.cat([input_ids, next_token[:, None]], dim=1)
        return input_ids
"""

with open("./tinygpt_base/modeling_tinygpt.py", "w") as f:
    f.write(tinygpt_model_text)

print("modeling_tinygpt.py created!")

modeling_tinygpt.py created!


In [5]:
!pip install --upgrade transformers



In [6]:
import sys
import os

# Add the directory containing the custom model files to sys.path
model_dir = "./tinygpt_base"
if model_dir not in sys.path:
    sys.path.insert(0, model_dir)

# Explicitly import the custom configuration and model classes
from configuration_tinygpt import TinyGPTConfig
from modeling_tinygpt import TinyGPTForCausalLM

# Register the custom model with AutoConfig and AutoModelForCausalLM
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer

AutoConfig.register("tinygpt", TinyGPTConfig)
AutoModelForCausalLM.register(TinyGPTConfig, TinyGPTForCausalLM)

# Now, load the model from the local directory (trust_remote_code=True is no longer needed after explicit registration)
model = AutoModelForCausalLM.from_pretrained(
    model_dir
)

tokenizer = AutoTokenizer.from_pretrained("Abdurrahmanesc/tinygpt-base-model")


inputs = tokenizer("Hello TinyGPT!", return_tensors="pt")
outputs = model.generate(inputs["input_ids"], max_new_tokens=30)

print(tokenizer.decode(outputs[0]))

Some weights of TinyGPTForCausalLM were not initialized from the model checkpoint at ./tinygpt_base and are newly initialized: ['blocks.0.linear1.bias', 'blocks.0.linear1.weight', 'blocks.0.linear2.bias', 'blocks.0.linear2.weight', 'blocks.0.norm1.bias', 'blocks.0.norm1.weight', 'blocks.0.norm2.bias', 'blocks.0.norm2.weight', 'blocks.0.self_attn.in_proj_bias', 'blocks.0.self_attn.in_proj_weight', 'blocks.0.self_attn.out_proj.bias', 'blocks.0.self_attn.out_proj.weight', 'blocks.1.linear1.bias', 'blocks.1.linear1.weight', 'blocks.1.linear2.bias', 'blocks.1.linear2.weight', 'blocks.1.norm1.bias', 'blocks.1.norm1.weight', 'blocks.1.norm2.bias', 'blocks.1.norm2.weight', 'blocks.1.self_attn.in_proj_bias', 'blocks.1.self_attn.in_proj_weight', 'blocks.1.self_attn.out_proj.bias', 'blocks.1.self_attn.out_proj.weight', 'blocks.2.linear1.bias', 'blocks.2.linear1.weight', 'blocks.2.linear2.bias', 'blocks.2.linear2.weight', 'blocks.2.norm1.bias', 'blocks.2.norm1.weight', 'blocks.2.norm2.bias', 'bloc

tokenizer_config.json:   0%|          | 0.00/674 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/23.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Hello TinyGPT! Admir?).vaspectHer vain breakdown beaten Washington offending taste unacceptable healthier startling�CHATovtable chicken�essel Sask Grayag outright339 managerserveDi1988
