{ "model_type": "quark", "architectures": ["QuarkForCausalLM"], "auto_map": { "AutoConfig": "configuration_quark.QuarkConfig", "AutoModelForCausalLM": "modeling_quark.QuarkForCausalLM" }, "vocab_size": 65537, "d_model": 576, "n_heads": 9, "n_kv_heads": 3, "n_layers": 30, "d_ff": 1536, "head_dim": 64, "max_seq_len": 2048, "rope_theta": 10000.0, "rms_eps": 1e-5, "qkv_bias": true, "dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": true, "sft_dataset": "MBZUAI/Bactrian-X (it+en)", "sft_steps": 4000, "sft_loss": 1.9, "base_pretrain": "15.7B tokens bilingual IT+EN", "tokenizer": "ThingAI/QuarkTokenizer", "languages": ["it", "en"], "special_tokens": ["<|user|>", "<|assistant|>", "<|end|>"] }