NexaAI
/

llama3.2-1B-intel-npu

Model card Files Files and versions

llama3.2-1B-intel-npu / config.json

zackli4ai's picture

Upload 268 files

87f918d verified 4 months ago

history blame contribute delete

1.47 kB

	{
	"_name_or_path": "llama3_2_1b",
	"architectures": [
	"LlamaForCausalLM"
	],
	"asym": false,
	"attention_bias": false,
	"attention_dropout": 0.0,
	"bigdl_transformers_low_bit": "sym_int4_rtn",
	"bos_token_id": 128000,
	"const_parameter": true,
	"cos_sin_input": true,
	"embedding_post": true,
	"eos_token_id": [
	128001,
	128008,
	128009
	],
	"fused_layers": 2,
	"group_size": 0,
	"head_dim": 64,
	"hidden_act": "silu",
	"hidden_size": 2048,
	"initializer_range": 0.02,
	"intermediate_size": 8192,
	"kv_len": 1023,
	"lm_head_low_bit": "sym_int4_rtn",
	"max_position_embeddings": 131072,
	"max_prompt_len": 512,
	"mixed_precision": false,
	"mlp_bias": false,
	"model_type": "llama",
	"n_splits_down_proj": 1,
	"n_splits_linear": 1,
	"num_attention_heads": 32,
	"num_head": 32,
	"num_hidden_layers": 16,
	"num_key_value_heads": 8,
	"optimize_model": true,
	"pretraining_tp": 1,
	"qkv_bias": false,
	"rms_norm_eps": 1e-05,
	"rope_scaling": {
	"factor": 32.0,
	"high_freq_factor": 4.0,
	"low_freq_factor": 1.0,
	"original_max_position_embeddings": 8192,
	"rope_type": "llama3"
	},
	"rope_theta": 500000.0,
	"tie_word_embeddings": true,
	"torch_dtype": "float16",
	"transformers_version": "4.45.0",
	"transpose_value_cache": true,
	"use_cache": true,
	"use_prefill_sdp": false,
	"vocab_size": 128256,
	"weight_idx": 5,
	"weight_num": 7
	}