| { | |
| "architecture": "nanoGPT (custom, trained from scratch)", | |
| "model_type": "classifier (binary spam detection)", | |
| "base_model": "nishantup/nanogpt-slm-124m", | |
| "model_config": { | |
| "block_size": 256, | |
| "vocab_size": 50257, | |
| "n_layer": 12, | |
| "n_head": 12, | |
| "n_embd": 768, | |
| "dropout": 0.0, | |
| "bias": true | |
| }, | |
| "classifier_config": { | |
| "num_classes": 2, | |
| "labels": { | |
| "0": "not spam", | |
| "1": "spam" | |
| }, | |
| "max_length": 120, | |
| "pad_token_id": 50256, | |
| "classification_head": "lm_head replaced with Linear(768, 2)" | |
| }, | |
| "total_parameters_millions": 123.9, | |
| "tokenizer": "tiktoken gpt2 (50,257 BPE tokens)", | |
| "framework": "PyTorch", | |
| "training": { | |
| "dataset": "UCI SMS Spam Collection (balanced: 747 spam + 747 ham)", | |
| "split": "70% train / 10% test / 20% validation", | |
| "epochs": 5, | |
| "optimizer": "AdamW (lr=5e-5, weight_decay=0.1)", | |
| "max_length": 120, | |
| "frozen_layers": "All except last transformer block, final LayerNorm, and lm_head" | |
| } | |
| } |