SKNahin commited on
Commit
ce6429f
·
verified ·
1 Parent(s): c3c9ad7

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +18 -18
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/home/azureuser/phi4/phi4-works/phi4-mini/output_bn_new-13/checkpoint-4000",
3
  "architectures": [
4
  "Phi4MMForCausalLM"
5
  ],
@@ -50,7 +50,7 @@
50
  "auto_map": {
51
  "AutoConfig": "configuration_phi4mm.Phi4MMConfig",
52
  "AutoModelForCausalLM": "modeling_phi4mm.Phi4MMForCausalLM",
53
- "AutoTokenizer": "microsoft/Phi-4-multimodal-instruct--Xenova/gpt-4o"
54
  },
55
  "bos_token_id": 199999,
56
  "embd_layer": {
@@ -80,11 +80,22 @@
80
  "full_attn_mod": 1,
81
  "hidden_act": "silu",
82
  "hidden_size": 3072,
83
- "img_processor": null,
84
  "initializer_range": 0.02,
85
  "intermediate_size": 8192,
86
  "interpolate_factor": 1,
87
  "lm_head_bias": false,
 
 
 
 
 
 
 
 
 
 
 
 
88
  "max_position_embeddings": 131072,
89
  "mlp_bias": false,
90
  "model_type": "phi4mm",
@@ -201,21 +212,10 @@
201
  },
202
  "rope_theta": 10000.0,
203
  "sliding_window": 262144,
204
- "speech_lora": {
205
- "dp": 0.01,
206
- "layer": "((layers.*self_attn\\.(qkv|o)_proj)|(layers.*mlp\\.(gate_up|down)_proj))",
207
- "lora_alpha": 640,
208
- "r": 320
209
- },
210
  "tie_word_embeddings": true,
211
  "torch_dtype": "bfloat16",
212
- "transformers_version": "4.48.3",
213
  "use_cache": true,
214
- "vision_lora": {
215
- "dp": 0.0,
216
- "layer": "layers.*((self_attn\\.(qkv_proj|o_proj))|(mlp\\.(gate_up|down)_proj))",
217
- "lora_alpha": 512,
218
- "r": 256
219
- },
220
- "vocab_size": 200064
221
- }
 
1
  {
2
+ "_name_or_path": "Phi-4-multimodal-instruct",
3
  "architectures": [
4
  "Phi4MMForCausalLM"
5
  ],
 
50
  "auto_map": {
51
  "AutoConfig": "configuration_phi4mm.Phi4MMConfig",
52
  "AutoModelForCausalLM": "modeling_phi4mm.Phi4MMForCausalLM",
53
+ "AutoTokenizer": "Xenova/gpt-4o"
54
  },
55
  "bos_token_id": 199999,
56
  "embd_layer": {
 
80
  "full_attn_mod": 1,
81
  "hidden_act": "silu",
82
  "hidden_size": 3072,
 
83
  "initializer_range": 0.02,
84
  "intermediate_size": 8192,
85
  "interpolate_factor": 1,
86
  "lm_head_bias": false,
87
+ "vision_lora": {
88
+ "dp": 0.0,
89
+ "layer": "layers.*((self_attn\\.(qkv_proj|o_proj))|(mlp\\.(gate_up|down)_proj))",
90
+ "lora_alpha": 512,
91
+ "r": 256
92
+ },
93
+ "speech_lora": {
94
+ "dp": 0.01,
95
+ "layer": "((layers.*self_attn\\.(qkv|o)_proj)|(layers.*mlp\\.(gate_up|down)_proj))",
96
+ "lora_alpha": 640,
97
+ "r": 320
98
+ },
99
  "max_position_embeddings": 131072,
100
  "mlp_bias": false,
101
  "model_type": "phi4mm",
 
212
  },
213
  "rope_theta": 10000.0,
214
  "sliding_window": 262144,
 
 
 
 
 
 
215
  "tie_word_embeddings": true,
216
  "torch_dtype": "bfloat16",
217
+ "transformers_version": "4.46.1",
218
  "use_cache": true,
219
+ "vocab_size": 200064,
220
+ "_attn_implementation": "flash_attention_2"
221
+ }