Add files using upload-large-folder tool

Browse files

Files changed (6) hide show

README.md +22 -8
chat_template.jinja +14 -0
config.json +11 -4
special_tokens_map.json +23 -0
tokenizer.json +0 -0
tokenizer_config.json +0 -0

README.md CHANGED Viewed

@@ -1,16 +1,30 @@
 ---
-base_model: deepseek-ai/DeepSeek-Prover-V2-671B
-language:
-- en
-library_name: transformers
 tags:
-- deepseek
 - unsloth
-- transformers
-license: mit
 ---
-# deepseek-ai/DeepSeek-Prover-V2-671B
 <div align="center">
   <img src="https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/logo.svg?raw=true" width="60%" alt="DeepSeek-V3" />

 ---
 tags:
 - unsloth
+base_model:
+- deepseek-ai/DeepSeek-Prover-V2-671B
+library_name: transformers
 ---
+<div>
+<p style="margin-top: 0;margin-bottom: 0;">
+    <em><a href="https://docs.unsloth.ai/basics/unsloth-dynamic-v2.0-gguf">Unsloth Dynamic 2.0</a> achieves superior accuracy & outperforms other leading quants.</em>
+  </p>
+  <div style="display: flex; gap: 5px; align-items: center; ">
+    <a href="https://github.com/unslothai/unsloth/">
+      <img src="https://github.com/unslothai/unsloth/raw/main/images/unsloth%20new%20logo.png" width="133">
+    </a>
+    <a href="https://discord.gg/unsloth">
+      <img src="https://github.com/unslothai/unsloth/raw/main/images/Discord%20button.png" width="173">
+    </a>
+    <a href="https://docs.unsloth.ai/basics/qwen3-how-to-run-and-fine-tune">
+      <img src="https://raw.githubusercontent.com/unslothai/unsloth/refs/heads/main/images/documentation%20green%20button.png" width="143">
+    </a>
+  </div>
+</div>
+<!-- markdownlint-disable first-line-h1 -->
+<!-- markdownlint-disable html -->
+<!-- markdownlint-disable no-duplicate-header -->
 <div align="center">
   <img src="https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/logo.svg?raw=true" width="60%" alt="DeepSeek-V3" />

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,14 @@

+{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '
+' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<｜User｜>' + message['content'] + '<｜Assistant｜>'}}{%- endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{%- endif %}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- set ns.is_output_first = true %}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '
+' + '```json' + '
+' + tool['function']['arguments'] + '
+' + '```' + '<｜tool▁call▁end｜>'}}{%- else %}{{message['content'] + '<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '
+' + '```json' + '
+' + tool['function']['arguments'] + '
+' + '```' + '<｜tool▁call▁end｜>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'
+' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '
+' + '```json' + '
+' + tool['function']['arguments'] + '
+' + '```' + '<｜tool▁call▁end｜>'}}{%- endif %}{%- endfor %}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none)%}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{{content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'
+<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}{{'<｜Assistant｜>'}}{% endif %}

config.json CHANGED Viewed

@@ -13,6 +13,7 @@
   "eos_token_id": 1,
   "ep_size": 1,
   "first_k_dense_replace": 3,
   "hidden_act": "silu",
   "hidden_size": 7168,
   "initializer_range": 0.02,
@@ -31,7 +32,10 @@
   "num_hidden_layers": 61,
   "num_key_value_heads": 128,
   "num_nextn_predict_layers": 1,
   "q_lora_rank": 1536,
   "qk_nope_head_dim": 128,
   "qk_rope_head_dim": 64,
   "quantization_config": {
@@ -44,13 +48,15 @@
     ]
   },
   "rms_norm_eps": 1e-06,
   "rope_scaling": {
-    "beta_fast": 32,
-    "beta_slow": 1,
-    "factor": 40,
     "mscale": 1.0,
     "mscale_all_dim": 1.0,
     "original_max_position_embeddings": 4096,
     "type": "yarn"
   },
   "rope_theta": 10000,
@@ -60,7 +66,8 @@
   "topk_group": 4,
   "topk_method": "noaux_tc",
   "torch_dtype": "bfloat16",
-  "transformers_version": "4.46.3",
   "use_cache": true,
   "v_head_dim": 128,
   "vocab_size": 129280

   "eos_token_id": 1,
   "ep_size": 1,
   "first_k_dense_replace": 3,
+  "head_dim": 64,
   "hidden_act": "silu",
   "hidden_size": 7168,
   "initializer_range": 0.02,
   "num_hidden_layers": 61,
   "num_key_value_heads": 128,
   "num_nextn_predict_layers": 1,
+  "pad_token_id": 2,
+  "pretraining_tp": 1,
   "q_lora_rank": 1536,
+  "qk_head_dim": 192,
   "qk_nope_head_dim": 128,
   "qk_rope_head_dim": 64,
   "quantization_config": {
     ]
   },
   "rms_norm_eps": 1e-06,
+  "rope_interleave": true,
   "rope_scaling": {
+    "beta_fast": 32.0,
+    "beta_slow": 1.0,
+    "factor": 40.0,
     "mscale": 1.0,
     "mscale_all_dim": 1.0,
     "original_max_position_embeddings": 4096,
+    "rope_type": "yarn",
     "type": "yarn"
   },
   "rope_theta": 10000,
   "topk_group": 4,
   "topk_method": "noaux_tc",
   "torch_dtype": "bfloat16",
+  "transformers_version": "4.52.3",
+  "unsloth_fixed": true,
   "use_cache": true,
   "v_head_dim": 128,
   "vocab_size": 129280

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "bos_token": {
+    "content": "<｜begin▁of▁sentence｜>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<｜end▁of▁sentence｜>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<｜▁pad▁｜>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

The diff for this file is too large to render. See raw diff