nishantup
/

gpt2-slm-instruct

+{
+  "architecture": "Raschka GPTModel (separate W_query/W_key/W_value, no weight tying)",
+  "model_type": "instruction-tuned (SFT)",
+  "base_model": "nishantup/nanogpt-slm-124m (gpt_slm_best.pth)",
+  "model_config": {
+    "vocab_size": 50257,
+    "context_length": 256,
+    "emb_dim": 768,
+    "n_heads": 12,
+    "n_layers": 12,
+    "drop_rate": 0.0,
+    "qkv_bias": false
+  },
+  "total_parameters_millions": 163.2,
+  "tokenizer": "tiktoken gpt2 (50,257 BPE tokens)",
+  "framework": "PyTorch",
+  "prompt_format": "Alpaca (### Instruction / ### Input / ### Response)",
+  "training": {
+    "dataset": "Alpaca-format instruction dataset (1,100 examples)",
+    "epochs": 2,
+    "optimizer": "AdamW (lr=5e-5, weight_decay=0.1)",
+    "max_length": 256
+  }
+}