prince-canuma commited on
Commit
ad07a2d
·
verified ·
1 Parent(s): 0f5a231

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -9,11 +9,12 @@ tags:
9
  - custom_code
10
  - mlx
11
  license: mit
 
12
  ---
13
 
14
  # mlx-community/DeepSeek-OCR-6bit
15
- This model was converted to MLX format from [`deepseek-ai/DeepSeek-OCR`]() using mlx-vlm version **0.3.5**.
16
- Refer to the [original model card](https://huggingface.co/deepseek-ai/DeepSeek-OCR) for more details on the model.
17
  ## Use with mlx
18
 
19
  ```bash
 
9
  - custom_code
10
  - mlx
11
  license: mit
12
+ library_name: transformers
13
  ---
14
 
15
  # mlx-community/DeepSeek-OCR-6bit
16
+ This model was converted to MLX format from [`prince-canuma/DeepSeek-OCR`]() using mlx-vlm version **0.3.10**.
17
+ Refer to the [original model card](https://huggingface.co/prince-canuma/DeepSeek-OCR) for more details on the model.
18
  ## Use with mlx
19
 
20
  ```bash
chat_template.jinja ADDED
@@ -0,0 +1 @@
 
 
1
+ {% for message in messages %}{% if message['role'] == 'user' %}{% elif message['role'] == 'assistant' %}{% endif %}{{message['content']}} {% endfor %}{% if add_generation_prompt %}{% endif %}
config.json CHANGED
@@ -1,14 +1,11 @@
1
  {
2
- "_attn_implementation_autoset": false,
3
- "add_cross_attention": false,
4
  "architectures": [
5
  "DeepseekOCRForCausalLM"
6
  ],
7
- "attention_bias": false,
8
- "attention_dropout": 0.0,
9
- "aux_loss_alpha": 0.001,
10
- "bad_words_ids": null,
11
- "begin_suppress_tokens": null,
12
  "bos_token_id": 0,
13
  "candidate_resolutions": [
14
  [
@@ -16,40 +13,21 @@
16
  1024
17
  ]
18
  ],
19
- "chunk_size_feed_forward": 0,
20
- "cross_attention_hidden_size": null,
21
- "decoder_start_token_id": null,
22
- "diversity_penalty": 0.0,
23
- "do_sample": false,
24
- "early_stopping": false,
25
- "encoder_no_repeat_ngram_size": 0,
26
  "eos_token_id": 1,
27
- "ep_size": 1,
28
- "exponential_decay_length_penalty": null,
29
- "finetuning_task": null,
30
  "first_k_dense_replace": 1,
31
- "forced_bos_token_id": null,
32
- "forced_eos_token_id": null,
33
  "global_view_pos": "head",
34
- "hidden_act": "silu",
35
  "hidden_size": 1280,
36
- "id2label": {
37
- "0": "LABEL_0",
38
- "1": "LABEL_1"
39
- },
40
- "initializer_range": 0.02,
41
  "intermediate_size": 6848,
42
- "is_decoder": false,
43
- "is_encoder_decoder": false,
44
  "kv_lora_rank": null,
45
- "label2id": {
46
- "LABEL_0": 0,
47
- "LABEL_1": 1
48
- },
49
  "language_config": {
50
  "architectures": [
51
  "DeepseekV2ForCausalLM"
52
  ],
 
 
 
 
 
53
  "bos_token_id": 0,
54
  "eos_token_id": 1,
55
  "first_k_dense_replace": 1,
@@ -77,40 +55,23 @@
77
  "v_head_dim": 0,
78
  "vocab_size": 129280
79
  },
80
- "length_penalty": 1.0,
81
  "lm_head": true,
82
- "max_length": 20,
83
  "max_position_embeddings": 8192,
84
- "min_length": 0,
85
- "model_type": "DeepseekOCR",
86
  "moe_intermediate_size": 896,
87
- "moe_layer_freq": 1,
88
  "n_group": 1,
89
  "n_routed_experts": 64,
90
  "n_shared_experts": 2,
91
- "no_repeat_ngram_size": 0,
92
- "norm_topk_prob": false,
93
  "num_attention_heads": 10,
94
- "num_beam_groups": 1,
95
- "num_beams": 1,
96
  "num_experts_per_tok": 6,
97
  "num_hidden_layers": 12,
98
  "num_key_value_heads": 10,
99
- "num_return_sequences": 1,
100
- "output_attentions": false,
101
- "output_hidden_states": false,
102
- "output_scores": false,
103
- "pad_token_id": null,
104
- "prefix": null,
105
- "pretraining_tp": 1,
106
- "problem_type": null,
107
  "projector_config": {
108
  "input_dim": 2048,
109
  "model_type": "mlp_projector",
110
  "n_embed": 1280,
111
  "projector_type": "linear"
112
  },
113
- "pruned_heads": {},
114
  "q_lora_rank": null,
115
  "qk_nope_head_dim": 0,
116
  "qk_rope_head_dim": 0,
@@ -124,35 +85,11 @@
124
  "bits": 6,
125
  "mode": "affine"
126
  },
127
- "remove_invalid_values": false,
128
- "repetition_penalty": 1.0,
129
- "return_dict": true,
130
- "return_dict_in_generate": false,
131
  "rm_head": false,
132
- "rms_norm_eps": 1e-06,
133
- "rope_scaling": null,
134
- "rope_theta": 10000.0,
135
- "routed_scaling_factor": 1.0,
136
- "scoring_func": "softmax",
137
- "sep_token_id": null,
138
- "seq_aux": true,
139
- "suppress_tokens": null,
140
- "task_specific_params": null,
141
- "temperature": 1.0,
142
- "tf_legacy_loss": false,
143
- "tie_encoder_decoder": false,
144
- "tie_word_embeddings": false,
145
  "tile_tag": "2D",
146
- "tokenizer_class": null,
147
- "top_k": 50,
148
- "top_p": 1.0,
149
  "topk_group": 1,
150
  "topk_method": "greedy",
151
- "torchscript": false,
152
  "transformers_version": "4.46.3",
153
- "typical_p": 1.0,
154
- "use_bfloat16": false,
155
- "use_cache": true,
156
  "use_mla": false,
157
  "v_head_dim": 0,
158
  "vision_config": {
 
1
  {
 
 
2
  "architectures": [
3
  "DeepseekOCRForCausalLM"
4
  ],
5
+ "auto_map": {
6
+ "AutoConfig": "modeling_deepseekocr.DeepseekOCRConfig",
7
+ "AutoModel": "modeling_deepseekocr.DeepseekOCRForCausalLM"
8
+ },
 
9
  "bos_token_id": 0,
10
  "candidate_resolutions": [
11
  [
 
13
  1024
14
  ]
15
  ],
 
 
 
 
 
 
 
16
  "eos_token_id": 1,
 
 
 
17
  "first_k_dense_replace": 1,
 
 
18
  "global_view_pos": "head",
 
19
  "hidden_size": 1280,
 
 
 
 
 
20
  "intermediate_size": 6848,
 
 
21
  "kv_lora_rank": null,
 
 
 
 
22
  "language_config": {
23
  "architectures": [
24
  "DeepseekV2ForCausalLM"
25
  ],
26
+ "auto_map": {
27
+ "AutoConfig": "configuration_deepseekv2.DeepseekV2Config",
28
+ "AutoModel": "modeling_deepseek.DeepseekV2Model",
29
+ "AutoModelForCausalLM": "modeling_deepseek.DeepseekV2ForCausalLM"
30
+ },
31
  "bos_token_id": 0,
32
  "eos_token_id": 1,
33
  "first_k_dense_replace": 1,
 
55
  "v_head_dim": 0,
56
  "vocab_size": 129280
57
  },
 
58
  "lm_head": true,
 
59
  "max_position_embeddings": 8192,
60
+ "model_type": "deepseekocr",
 
61
  "moe_intermediate_size": 896,
 
62
  "n_group": 1,
63
  "n_routed_experts": 64,
64
  "n_shared_experts": 2,
 
 
65
  "num_attention_heads": 10,
 
 
66
  "num_experts_per_tok": 6,
67
  "num_hidden_layers": 12,
68
  "num_key_value_heads": 10,
 
 
 
 
 
 
 
 
69
  "projector_config": {
70
  "input_dim": 2048,
71
  "model_type": "mlp_projector",
72
  "n_embed": 1280,
73
  "projector_type": "linear"
74
  },
 
75
  "q_lora_rank": null,
76
  "qk_nope_head_dim": 0,
77
  "qk_rope_head_dim": 0,
 
85
  "bits": 6,
86
  "mode": "affine"
87
  },
 
 
 
 
88
  "rm_head": false,
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  "tile_tag": "2D",
 
 
 
90
  "topk_group": 1,
91
  "topk_method": "greedy",
 
92
  "transformers_version": "4.46.3",
 
 
 
93
  "use_mla": false,
94
  "v_head_dim": 0,
95
  "vision_config": {
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37a43d9f02d100675c38a78f9f7b042e36008313aea91b3aec78aa66a30e8ce2
3
- size 3084471389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e820660fdfd4de509bfa4affad11a34891bd8654055b391f41a0c752daf09073
3
+ size 3185320765
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
The diff for this file is too large to render. See raw diff