Update config.json
Browse files- config.json +18 -18
config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "
|
| 3 |
"architectures": [
|
| 4 |
"Phi4MMForCausalLM"
|
| 5 |
],
|
|
@@ -50,7 +50,7 @@
|
|
| 50 |
"auto_map": {
|
| 51 |
"AutoConfig": "configuration_phi4mm.Phi4MMConfig",
|
| 52 |
"AutoModelForCausalLM": "modeling_phi4mm.Phi4MMForCausalLM",
|
| 53 |
-
"AutoTokenizer": "
|
| 54 |
},
|
| 55 |
"bos_token_id": 199999,
|
| 56 |
"embd_layer": {
|
|
@@ -80,11 +80,22 @@
|
|
| 80 |
"full_attn_mod": 1,
|
| 81 |
"hidden_act": "silu",
|
| 82 |
"hidden_size": 3072,
|
| 83 |
-
"img_processor": null,
|
| 84 |
"initializer_range": 0.02,
|
| 85 |
"intermediate_size": 8192,
|
| 86 |
"interpolate_factor": 1,
|
| 87 |
"lm_head_bias": false,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
"max_position_embeddings": 131072,
|
| 89 |
"mlp_bias": false,
|
| 90 |
"model_type": "phi4mm",
|
|
@@ -201,21 +212,10 @@
|
|
| 201 |
},
|
| 202 |
"rope_theta": 10000.0,
|
| 203 |
"sliding_window": 262144,
|
| 204 |
-
"speech_lora": {
|
| 205 |
-
"dp": 0.01,
|
| 206 |
-
"layer": "((layers.*self_attn\\.(qkv|o)_proj)|(layers.*mlp\\.(gate_up|down)_proj))",
|
| 207 |
-
"lora_alpha": 640,
|
| 208 |
-
"r": 320
|
| 209 |
-
},
|
| 210 |
"tie_word_embeddings": true,
|
| 211 |
"torch_dtype": "bfloat16",
|
| 212 |
-
"transformers_version": "4.
|
| 213 |
"use_cache": true,
|
| 214 |
-
"
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
"lora_alpha": 512,
|
| 218 |
-
"r": 256
|
| 219 |
-
},
|
| 220 |
-
"vocab_size": 200064
|
| 221 |
-
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "Phi-4-multimodal-instruct",
|
| 3 |
"architectures": [
|
| 4 |
"Phi4MMForCausalLM"
|
| 5 |
],
|
|
|
|
| 50 |
"auto_map": {
|
| 51 |
"AutoConfig": "configuration_phi4mm.Phi4MMConfig",
|
| 52 |
"AutoModelForCausalLM": "modeling_phi4mm.Phi4MMForCausalLM",
|
| 53 |
+
"AutoTokenizer": "Xenova/gpt-4o"
|
| 54 |
},
|
| 55 |
"bos_token_id": 199999,
|
| 56 |
"embd_layer": {
|
|
|
|
| 80 |
"full_attn_mod": 1,
|
| 81 |
"hidden_act": "silu",
|
| 82 |
"hidden_size": 3072,
|
|
|
|
| 83 |
"initializer_range": 0.02,
|
| 84 |
"intermediate_size": 8192,
|
| 85 |
"interpolate_factor": 1,
|
| 86 |
"lm_head_bias": false,
|
| 87 |
+
"vision_lora": {
|
| 88 |
+
"dp": 0.0,
|
| 89 |
+
"layer": "layers.*((self_attn\\.(qkv_proj|o_proj))|(mlp\\.(gate_up|down)_proj))",
|
| 90 |
+
"lora_alpha": 512,
|
| 91 |
+
"r": 256
|
| 92 |
+
},
|
| 93 |
+
"speech_lora": {
|
| 94 |
+
"dp": 0.01,
|
| 95 |
+
"layer": "((layers.*self_attn\\.(qkv|o)_proj)|(layers.*mlp\\.(gate_up|down)_proj))",
|
| 96 |
+
"lora_alpha": 640,
|
| 97 |
+
"r": 320
|
| 98 |
+
},
|
| 99 |
"max_position_embeddings": 131072,
|
| 100 |
"mlp_bias": false,
|
| 101 |
"model_type": "phi4mm",
|
|
|
|
| 212 |
},
|
| 213 |
"rope_theta": 10000.0,
|
| 214 |
"sliding_window": 262144,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
"tie_word_embeddings": true,
|
| 216 |
"torch_dtype": "bfloat16",
|
| 217 |
+
"transformers_version": "4.46.1",
|
| 218 |
"use_cache": true,
|
| 219 |
+
"vocab_size": 200064,
|
| 220 |
+
"_attn_implementation": "flash_attention_2"
|
| 221 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|