# Modded NanoGPT Model This is a GPT-2 style model trained with modifications from modded-nanogpt. ## Model Config - Layers: 2 - Heads: 4 - Embedding dimension: 64 - Vocab size: 50304 - Squared MLP: False - Bilinear: False - Gated: False - Expansion factor: 4 ## Training - Training step: 500 ## Usage ```python from huggingface_hub import hf_hub_download import torch from train_gpt2 import GPT, GPTConfig import json # Download config config_path = hf_hub_download(repo_id="Elriggs/gpt2-debug-baseline", filename="config.json") with open(config_path) as f: config_dict = json.load(f) # Remove non-GPTConfig fields config_dict.pop('step', None) # Create model config = GPTConfig(**config_dict) model = GPT(config) # Download and load weights weights_path = hf_hub_download(repo_id="Elriggs/gpt2-debug-baseline", filename="pytorch_model.bin") state_dict = torch.load(weights_path, map_location='cpu') model.load_state_dict(state_dict) model.eval() ```