Base-mini / config.json
QuantaSparkLabs's picture
Update config.json
3d213d5 verified
Raw
History Blame Contribute Delete
881 Bytes
{
"model_type": "basemini",
"model_name": "Basemini",
"architectures": [
"TinyGPTForCausalLM"
],
"auto_map": {
"AutoConfig": "configuration_tiny_gpt.TinyGPTConfig",
"AutoModel": "modeling_tiny_gpt.TinyGPTModel",
"AutoModelForCausalLM": "modeling_tiny_gpt.TinyGPTForCausalLM"
},
"vocab_size": 466,
"ctx_len": 128,
"max_position_embeddings": 128,
"n_layer": 4,
"num_hidden_layers": 4,
"n_head": 4,
"num_attention_heads": 4,
"n_embd": 384,
"hidden_size": 384,
"dropout": 0.0,
"attention_backend": "torch",
"available_attention_backends": [
"sage",
"torch",
"flash2",
"flash3"
],
"trained_attention_backend": "torch",
"torch_fallback": true,
"torch_dtype": "float16",
"transformers_version": "custom",
"pad_token_id": 0,
"bos_token_id": 5,
"eos_token_id": 6,
"sep_token_id": 2,
"unk_token_id": 1
}