nishantup
/

gpt2-slm-instruct

instruction-tuned (SFT)

instruction-tuning

Model card Files Files and versions

gpt2-slm-instruct / config.json

nishantup's picture

Upload config.json with huggingface_hub

eee46f1 verified 6 days ago

history blame contribute delete

774 Bytes

	{
	"architecture": "Raschka GPTModel (separate W_query/W_key/W_value, no weight tying)",
	"model_type": "instruction-tuned (SFT)",
	"base_model": "nishantup/nanogpt-slm-124m (gpt_slm_best.pth)",
	"model_config": {
	"vocab_size": 50257,
	"context_length": 256,
	"emb_dim": 768,
	"n_heads": 12,
	"n_layers": 12,
	"drop_rate": 0.0,
	"qkv_bias": false
	},
	"total_parameters_millions": 163.2,
	"tokenizer": "tiktoken gpt2 (50,257 BPE tokens)",
	"framework": "PyTorch",
	"prompt_format": "Alpaca (### Instruction / ### Input / ### Response)",
	"training": {
	"dataset": "Alpaca-format instruction dataset (1,100 examples)",
	"epochs": 2,
	"optimizer": "AdamW (lr=5e-5, weight_decay=0.1)",
	"max_length": 256
	}
	}