Quark-135m-Bilingual / config.json
ThingsAI's picture
Update config.json
2dfe12b verified
raw
history blame contribute delete
766 Bytes
{
"model_type": "quark",
"architectures": ["QuarkForCausalLM"],
"auto_map": {
"AutoConfig": "configuration_quark.QuarkConfig",
"AutoModelForCausalLM": "modeling_quark.QuarkForCausalLM"
},
"vocab_size": 65537,
"d_model": 576,
"n_heads": 9,
"n_kv_heads": 3,
"n_layers": 30,
"d_ff": 1536,
"head_dim": 64,
"max_seq_len": 2048,
"rope_theta": 10000.0,
"rms_eps": 1e-5,
"qkv_bias": true,
"dropout": 0.0,
"torch_dtype": "bfloat16",
"tie_word_embeddings": true,
"sft_dataset": "MBZUAI/Bactrian-X (it+en)",
"sft_steps": 4000,
"sft_loss": 1.9,
"base_pretrain": "15.7B tokens bilingual IT+EN",
"tokenizer": "ThingAI/QuarkTokenizer",
"languages": ["it", "en"],
"special_tokens": ["<|user|>", "<|assistant|>", "<|end|>"]
}