SamMikaelson commited on
Commit
520014b
·
verified ·
1 Parent(s): e174be1

Add config.json with auto_map for standalone loading

Browse files
Files changed (1) hide show
  1. config.json +119 -0
config.json ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "deepseek-ai/DeepSeek-OCR",
3
+ "candidate_resolutions": [
4
+ [
5
+ 1024,
6
+ 1024
7
+ ]
8
+ ],
9
+ "global_view_pos": "head",
10
+ "architectures": [
11
+ "DeepseekOCRForCausalLM"
12
+ ],
13
+ "auto_map": {
14
+ "AutoConfig": "modeling_deepseekocr.DeepseekOCRConfig",
15
+ "AutoModel": "modeling_deepseekocr.DeepseekOCRForCausalLM",
16
+ "AutoModelForCausalLM": "modeling_deepseekocr.DeepseekOCRForCausalLM"
17
+ },
18
+ "language_config": {
19
+ "architectures": [
20
+ "DeepseekV2ForCausalLM"
21
+ ],
22
+ "auto_map": {
23
+ "AutoConfig": "configuration_deepseekv2.DeepseekV2Config",
24
+ "AutoModel": "modeling_deepseek.DeepseekV2Model",
25
+ "AutoModelForCausalLM": "modeling_deepseek.DeepseekV2ForCausalLM"
26
+ },
27
+ "bos_token_id": 0,
28
+ "eos_token_id": 1,
29
+ "first_k_dense_replace": 1,
30
+ "hidden_size": 1280,
31
+ "intermediate_size": 6848,
32
+ "kv_lora_rank": null,
33
+ "lm_head": true,
34
+ "max_position_embeddings": 8192,
35
+ "moe_intermediate_size": 896,
36
+ "n_group": 1,
37
+ "n_routed_experts": 64,
38
+ "n_shared_experts": 2,
39
+ "num_attention_heads": 10,
40
+ "num_experts_per_tok": 6,
41
+ "num_hidden_layers": 12,
42
+ "num_key_value_heads": 10,
43
+ "q_lora_rank": null,
44
+ "qk_nope_head_dim": 0,
45
+ "qk_rope_head_dim": 0,
46
+ "rm_head": false,
47
+ "topk_group": 1,
48
+ "topk_method": "greedy",
49
+ "torch_dtype": "bfloat16",
50
+ "use_mla": false,
51
+ "v_head_dim": 0,
52
+ "vocab_size": 129280
53
+ },
54
+ "model_type": "deepseek_vl_v2",
55
+ "projector_config": {
56
+ "input_dim": 2048,
57
+ "model_type": "mlp_projector",
58
+ "n_embed": 1280,
59
+ "projector_type": "linear"
60
+ },
61
+ "tile_tag": "2D",
62
+ "torch_dtype": "bfloat16",
63
+ "transformers_version": "4.46.3",
64
+ "vision_config": {
65
+ "image_size": 1024,
66
+ "mlp_ratio": 3.7362,
67
+ "model_name": "deeplip_b_l",
68
+ "model_type": "vision",
69
+ "width": {
70
+ "clip-l-14-224": {
71
+ "heads": 16,
72
+ "image_size": 224,
73
+ "layers": 24,
74
+ "patch_size": 14,
75
+ "width": 1024
76
+ },
77
+ "sam_vit_b": {
78
+ "downsample_channels": [
79
+ 512,
80
+ 1024
81
+ ],
82
+ "global_attn_indexes": [
83
+ 2,
84
+ 5,
85
+ 8,
86
+ 11
87
+ ],
88
+ "heads": 12,
89
+ "layers": 12,
90
+ "width": 768
91
+ }
92
+ }
93
+ },
94
+ "bos_token_id": 0,
95
+ "eos_token_id": 1,
96
+ "first_k_dense_replace": 1,
97
+ "hidden_size": 1280,
98
+ "intermediate_size": 6848,
99
+ "kv_lora_rank": null,
100
+ "lm_head": true,
101
+ "max_position_embeddings": 8192,
102
+ "moe_intermediate_size": 896,
103
+ "n_group": 1,
104
+ "n_routed_experts": 64,
105
+ "n_shared_experts": 2,
106
+ "num_attention_heads": 10,
107
+ "num_experts_per_tok": 6,
108
+ "num_hidden_layers": 12,
109
+ "num_key_value_heads": 10,
110
+ "q_lora_rank": null,
111
+ "qk_nope_head_dim": 0,
112
+ "qk_rope_head_dim": 0,
113
+ "rm_head": false,
114
+ "topk_group": 1,
115
+ "topk_method": "greedy",
116
+ "use_mla": false,
117
+ "v_head_dim": 0,
118
+ "vocab_size": 129280
119
+ }