shenzhanyou commited on
Commit
4e2b671
·
verified ·
1 Parent(s): bd84fdc

Add files using upload-large-folder tool

Browse files
Files changed (49) hide show
  1. config.json +55 -0
  2. configuration.json +1 -0
  3. generation_config.json +9 -0
  4. model-00003-of-000163.safetensors +3 -0
  5. model-00004-of-000163.safetensors +3 -0
  6. model-00005-of-000163.safetensors +3 -0
  7. model-00010-of-000163.safetensors +3 -0
  8. model-00012-of-000163.safetensors +3 -0
  9. model-00014-of-000163.safetensors +3 -0
  10. model-00027-of-000163.safetensors +3 -0
  11. model-00035-of-000163.safetensors +3 -0
  12. model-00039-of-000163.safetensors +3 -0
  13. model-00040-of-000163.safetensors +3 -0
  14. model-00041-of-000163.safetensors +3 -0
  15. model-00049-of-000163.safetensors +3 -0
  16. model-00050-of-000163.safetensors +3 -0
  17. model-00056-of-000163.safetensors +3 -0
  18. model-00059-of-000163.safetensors +3 -0
  19. model-00061-of-000163.safetensors +3 -0
  20. model-00062-of-000163.safetensors +3 -0
  21. model-00077-of-000163.safetensors +3 -0
  22. model-00078-of-000163.safetensors +3 -0
  23. model-00079-of-000163.safetensors +3 -0
  24. model-00084-of-000163.safetensors +3 -0
  25. model-00085-of-000163.safetensors +3 -0
  26. model-00086-of-000163.safetensors +3 -0
  27. model-00090-of-000163.safetensors +3 -0
  28. model-00092-of-000163.safetensors +3 -0
  29. model-00096-of-000163.safetensors +3 -0
  30. model-00103-of-000163.safetensors +3 -0
  31. model-00104-of-000163.safetensors +3 -0
  32. model-00111-of-000163.safetensors +3 -0
  33. model-00116-of-000163.safetensors +3 -0
  34. model-00118-of-000163.safetensors +3 -0
  35. model-00126-of-000163.safetensors +3 -0
  36. model-00127-of-000163.safetensors +3 -0
  37. model-00130-of-000163.safetensors +3 -0
  38. model-00131-of-000163.safetensors +3 -0
  39. model-00132-of-000163.safetensors +3 -0
  40. model-00136-of-000163.safetensors +3 -0
  41. model-00137-of-000163.safetensors +3 -0
  42. model-00139-of-000163.safetensors +3 -0
  43. model-00151-of-000163.safetensors +3 -0
  44. model-00153-of-000163.safetensors +3 -0
  45. model-00155-of-000163.safetensors +3 -0
  46. model-00161-of-000163.safetensors +3 -0
  47. model.safetensors.index.json +0 -0
  48. tokenizer.json +0 -0
  49. tokenizer_config.json +34 -0
config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DeepseekV3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "eos_token_id": 1,
9
+ "ep_size": 1,
10
+ "first_k_dense_replace": 3,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 7168,
13
+ "index_head_dim": 128,
14
+ "index_n_heads": 64,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 18432,
17
+ "kv_lora_rank": 512,
18
+ "max_position_embeddings": 163840,
19
+ "model_type": "deepseek_v3",
20
+ "moe_intermediate_size": 2048,
21
+ "moe_layer_freq": 1,
22
+ "n_group": 8,
23
+ "n_routed_experts": 256,
24
+ "n_shared_experts": 1,
25
+ "norm_topk_prob": true,
26
+ "num_attention_heads": 128,
27
+ "num_experts_per_tok": 8,
28
+ "num_hidden_layers": 61,
29
+ "num_key_value_heads": 128,
30
+ "num_nextn_predict_layers": 1,
31
+ "q_lora_rank": 1536,
32
+ "qk_nope_head_dim": 128,
33
+ "qk_rope_head_dim": 64,
34
+ "rms_norm_eps": 1e-06,
35
+ "rope_scaling": {
36
+ "beta_fast": 32,
37
+ "beta_slow": 1,
38
+ "factor": 40,
39
+ "mscale": 1.0,
40
+ "mscale_all_dim": 1.0,
41
+ "original_max_position_embeddings": 4096,
42
+ "type": "yarn"
43
+ },
44
+ "rope_theta": 10000,
45
+ "routed_scaling_factor": 2.5,
46
+ "scoring_func": "sigmoid",
47
+ "tie_word_embeddings": false,
48
+ "topk_group": 4,
49
+ "topk_method": "noaux_tc",
50
+ "torch_dtype": "bfloat16",
51
+ "transformers_version": "4.44.2",
52
+ "use_cache": true,
53
+ "v_head_dim": 128,
54
+ "vocab_size": 129280
55
+ }
configuration.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"framework": "pytorch", "task": "others", "allow_remote": true}
generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "do_sample": true,
6
+ "temperature": 1.0,
7
+ "top_p": 0.95,
8
+ "transformers_version": "4.46.3"
9
+ }
model-00003-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da1d2b328ca326dab29e4618ab6426bd2e4b8e0ffaf244d2ffc7ee9a5827d30e
3
+ size 8602554152
model-00004-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d87f6e612b980b7e97ec4797401876fabcc79a252350ba43e6e8adc144d1a86
3
+ size 8569458784
model-00005-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62690a06fe52986037446bda5d227fc2ff22db705da6d5338f6d53845b5eff61
3
+ size 8602554040
model-00010-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac9455ccd9e3aa141943b5162d5821ff31670a96c4ecac14f39bd29401c9bab7
3
+ size 8602553936
model-00012-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d4055de0379fe5d067e5fcf48e633cfb032d06e615ee3991dd6a86fd71f2f1f
3
+ size 2965443072
model-00014-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75c0c8c667b8e6def0a7c7f8c1e93b0de0a675dfdaf55745678c591017e50813
3
+ size 8602554128
model-00027-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f1f0ff3d55316aaf4f540b0c548a651da4e9d61a45e1edbede9e61d7572daa2
3
+ size 8602554312
model-00035-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35781342d3359759a6376e58c54e8d00add7bab24692982cd26128509ed2c5e1
3
+ size 8569401424
model-00039-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eeffdf24a29567054e8bd8d66714ef8d06eeeb404aba23b3f3e1a2e64a835220
3
+ size 8602554448
model-00040-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b44e7148e003f36c2c19900c235e4a2fd616cf5863b7768e1cd6d6c8e7aafd9
3
+ size 8569459088
model-00041-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1f0f57aa18f3f87af03e8c1b5497ce01e1271ccc94e4bf2d3ea9eee72a679c8
3
+ size 8602554320
model-00049-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f146bdd8f3955426077c00d21742d37995a413231a5962d4121fbc81fd6680e9
3
+ size 8602554312
model-00050-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79fcf33b868b8b36c3225342522728968a79e79fc2ce3a9dde06a8b80370372b
3
+ size 8602554448
model-00056-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a15307377d70dc557d51617703be037e9e871e6233d232471a3080ae0d575836
3
+ size 3728809784
model-00059-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9483ffb2cc87a676a46767f179b3062c9e1a842b4e88f549c2f6f62c3f8ee774
3
+ size 8569459184
model-00061-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cd122c96b548af179e0b8bc74c44b6041ad8ea7320f51950d9d60e77db3d798
3
+ size 8602554448
model-00062-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9614734373a011e6d9e2295094809e332004d945dcc3ffe268e294ca3ef8ad7d
3
+ size 8569459088
model-00077-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b553ec9e3a712949c233ff2ae27ce046d9f18249771a39e15e02be6d1f716bf8
3
+ size 8602554448
model-00078-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e32be473ad68e36b18012e683f2bba1726b382215310f51c5c98a9f585a77565
3
+ size 3728809784
model-00079-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21591177bcda9be2ac8e2d83f9b19f7e4b8243cfba7fe6ac7888f3f5aaab8514
3
+ size 8569401424
model-00084-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3b1879ac207e33a6f25f9404fa561d5688633209fac9d9e1319f9bf864afe85
3
+ size 8569459088
model-00085-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e582e58537519b930ba5aff1d12ad7f52118f5e64c8940a58107c3dffdedbfd9
3
+ size 8602554320
model-00086-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cd1593c23ca9d56557ca31b6b70cd212790380a990f561b6a079ce57e9e1bc0
3
+ size 8602554448
model-00090-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:809ba7cd493f8436b0af990b644c33e0ba58cc34a4498b0e4ccc7f1ef11587e2
3
+ size 8602554216
model-00092-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dda20441e4441c7455e879a3dea280c6a502db8d7aa797552ae0e1bc424884f
3
+ size 8569459104
model-00096-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f2094573946ff6a217b1b9064fe0539f7999a08aeb35289a9189e23e80c006e
3
+ size 8602554408
model-00103-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15a7992bf4d5eb32c911222cf6dcdfe800733dec1225b21d5cba74cd7b1a835c
3
+ size 8569459184
model-00104-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadef98b6592480b68603880edfe059fb8bac33e41397b9af31e73e045531995
3
+ size 8602554224
model-00111-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:033fe54945084afe6e88646ee610ff5cb494b4009ddbaa324a6f9ed701122996
3
+ size 8569459192
model-00116-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3b736ef3b71b5699a4a23701d6ab1cf49231ce972b55864cb1f51301c876bfb
3
+ size 8602554448
model-00118-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6700eec03362e56d1729eee325f93e83e2f9903a13c744bb80be172ee1cbc2e0
3
+ size 8602554408
model-00126-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a37b3840f05846b23494f2c63427590cde60848e425765018d962416b79e9079
3
+ size 8602554224
model-00127-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc654e03f13ab8d88632b70f64c34c89762a93fe687efa691ff2227572736449
3
+ size 8602554448
model-00130-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9eba484f52e112d6b4d37305188445047c38b03a321c61ee655b4b8c731a243b
3
+ size 8602554448
model-00131-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:108a76cd355af76932ef9afd79d4d04ba7bf7f49ed0ca6916e3bea127ebe60ca
3
+ size 8569459000
model-00132-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aaa86408b2a47c4bd83ef0033b6cb193b7c22c7e3e72c94ac81d5f6d70f73fb3
3
+ size 8602554416
model-00136-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25c2a278d604c95913ad1ee7aa4c2a25ff94e8c85bbbacfe3a837a650649cb92
3
+ size 8569459104
model-00137-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cec2560e9ca39e8d24ee7ea0ffaadd3ef368018d97de563eb1f5b95931884ebb
3
+ size 8602554312
model-00139-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f95d439a52439e443025fc6173c92adef23e733135ec55a6df218b6ba1e54788
3
+ size 8569459008
model-00151-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d14eb182f24271bb26d4c5806cbf91b2829dd7177267ad287347df3b8105c8f
3
+ size 8602554416
model-00153-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d59b8c221471318f072795c92620eb45575df3c8ca6a7520427e415a82f0330b
3
+ size 8602554216
model-00155-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b708798b42f6fbf412692921d9123e8815af8e5a080fc8f94c52daaed70619c
3
+ size 8569459104
model-00161-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:730a92ee0344ff4303cfceddc7949894047f9303efa7d98bb956e922acfe8866
3
+ size 8576865144
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<|begin▁of▁sentence|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "clean_up_tokenization_spaces": false,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "<|end▁of▁sentence|>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "legacy": true,
22
+ "model_max_length": 131072,
23
+ "pad_token": {
24
+ "__type": "AddedToken",
25
+ "content": "<|end▁of▁sentence|>",
26
+ "lstrip": false,
27
+ "normalized": true,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ "sp_model_kwargs": {},
32
+ "unk_token": null,
33
+ "tokenizer_class": "LlamaTokenizerFast"
34
+ }