dilip025
/

mini-gpt1

@@ -14,7 +14,7 @@ tags:
 ---
 # Mini GPT1 Clone
-This is a decoder-only transformer model (GPT1-style) trained from scratch using PyTorch.
 ## Model Details
@@ -32,14 +32,90 @@ Trained using `ByteLevelBPETokenizer` from the `tokenizers` library.
 ## Inference Example
 ```python
-from transformers import PreTrainedTokenizerFast, AutoModelForCausalLM
 import torch
-tokenizer = PreTrainedTokenizerFast(tokenizer_file="tokenizer/tokenizer.json")
-model = AutoModelForCausalLM.from_pretrained("dilip025/mini-gpt1")
-prompt = "Once upon a time,"
-input_ids = tokenizer(prompt, return_tensors="pt").input_ids
-outputs = model.generate(input_ids, max_length=50)
-print(tokenizer.decode(outputs[0], skip_special_tokens=True))

 ---
 # Mini GPT1 Clone
+This is a custom decoder-only transformer model (GPT1-style) trained from scratch using PyTorch.
 ## Model Details
 ## Inference Example
+Run it in google colab. Go to ==> https://colab.research.google.com
 ```python
+# Clone only if not already cloned
+import os
+if not os.path.exists("mini-gpt1"):
+    !git clone https://huggingface.co/dilip025/mini-gpt1
+# Install dependencies, Uncomment it if you haven't installed
+# !pip install torch tokenizers
+# Add repo path to Python
+import sys
+sys.path.append("mini-gpt1")
+# Imports
+from model_code.decoder_only_transformer import DecoderOnlyTransformer
+from tokenizers import ByteLevelBPETokenizer
 import torch
+# Load tokenizer
+tokenizer = ByteLevelBPETokenizer(
+    "mini-gpt1/vocab.json",
+    "mini-gpt1/merges.txt",
+)
+# Model config
+vocab_size = 35000
+max_len = 128
+embed_dim = 512
+num_heads = 8
+depth = 6
+ff_dim = 2048
+# Device
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Load model and weights
+model = DecoderOnlyTransformer(
+    vocab_size=vocab_size,
+    max_len=max_len,
+    embed_dim=embed_dim,
+    num_heads=num_heads,
+    depth=depth,
+    ff_dim=ff_dim,
+).to(device)
+state_dict = torch.load("mini-gpt1/pytorch_model.bin", map_location=device)
+model.load_state_dict(state_dict)
+model.eval()
+# 💡 Your generation function with temperature & top-k
+def generate(model, tokenizer, prompt, max_length=50, temperature=1.0, top_k=50):
+    model.eval()
+    device = next(model.parameters()).device
+    encoding = tokenizer.encode(prompt)
+    input_ids = torch.tensor([encoding.ids], dtype=torch.long).to(device)
+    generated = input_ids.clone()
+    for _ in range(max_length):
+        logits = model(generated)  # [1, T, vocab_size]
+        next_token_logits = logits[:, -1, :] / temperature
+        if top_k is not None:
+            values, indices = torch.topk(next_token_logits, top_k)
+            mask = torch.full_like(next_token_logits, float('-inf'))
+            mask.scatter_(1, indices, values)
+            next_token_logits = mask
+        probs = torch.softmax(next_token_logits, dim=-1)
+        next_token = torch.multinomial(probs, num_samples=1)
+        generated = torch.cat((generated, next_token), dim=1)
+        # Optional: stop on [EOS] token
+        if hasattr(tokenizer, 'token_to_id') and tokenizer.token_to_id('[EOS]') is not None:
+            if next_token.item() == tokenizer.token_to_id('[EOS]'):
+                break
+    return tokenizer.decode(generated[0].tolist())
+# 🔥 Example inference -- Run this in second cell too see gibberish ;)
+prompt = "He told me a story"
+output = generate(model, tokenizer, prompt, max_length=100, temperature=1.2, top_k=40)
+print("Generated Output:\n", output)