Quark-270m-Base / config.json
ThingsAI's picture
Upload 5 files
aef7e9d verified
raw
history blame contribute delete
560 Bytes
{
"model_type": "quark",
"architectures": [
"QuarkForCausalLM"
],
"auto_map": {
"AutoConfig": "configuration_quark.QuarkConfig",
"AutoModelForCausalLM": "modeling_quark.QuarkForCausalLM"
},
"tie_word_embeddings": true,
"torch_dtype": "bfloat16",
"pretrain_step": 16000,
"pretrain_loss": 4.5235,
"vocab_size": 65536,
"d_model": 768,
"n_heads": 12,
"n_kv_heads": 4,
"n_layers": 32,
"d_ff": 2048,
"head_dim": 64,
"max_seq_len": 2048,
"rope_theta": 10000.0,
"rms_eps": 1e-05,
"qkv_bias": true,
"dropout": 0.0
}