mikasenghaas commited on
Commit
b63c508
·
verified ·
1 Parent(s): 98c96c6

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. config.json +74 -0
  2. generation_config.json +9 -0
  3. model-00001-of-00070.safetensors +3 -0
  4. model-00002-of-00070.safetensors +3 -0
  5. model-00003-of-00070.safetensors +3 -0
  6. model-00004-of-00070.safetensors +3 -0
  7. model-00005-of-00070.safetensors +3 -0
  8. model-00006-of-00070.safetensors +3 -0
  9. model-00007-of-00070.safetensors +3 -0
  10. model-00008-of-00070.safetensors +3 -0
  11. model-00009-of-00070.safetensors +3 -0
  12. model-00010-of-00070.safetensors +3 -0
  13. model-00011-of-00070.safetensors +3 -0
  14. model-00012-of-00070.safetensors +3 -0
  15. model-00013-of-00070.safetensors +3 -0
  16. model-00014-of-00070.safetensors +3 -0
  17. model-00015-of-00070.safetensors +3 -0
  18. model-00016-of-00070.safetensors +3 -0
  19. model-00017-of-00070.safetensors +3 -0
  20. model-00018-of-00070.safetensors +3 -0
  21. model-00019-of-00070.safetensors +3 -0
  22. model-00020-of-00070.safetensors +3 -0
  23. model-00021-of-00070.safetensors +3 -0
  24. model-00022-of-00070.safetensors +3 -0
  25. model-00023-of-00070.safetensors +3 -0
  26. model-00024-of-00070.safetensors +3 -0
  27. model-00025-of-00070.safetensors +3 -0
  28. model-00026-of-00070.safetensors +3 -0
  29. model-00027-of-00070.safetensors +3 -0
  30. model-00028-of-00070.safetensors +3 -0
  31. model-00029-of-00070.safetensors +3 -0
  32. model-00030-of-00070.safetensors +3 -0
  33. model-00031-of-00070.safetensors +3 -0
  34. model-00032-of-00070.safetensors +3 -0
  35. model-00033-of-00070.safetensors +3 -0
  36. model-00034-of-00070.safetensors +3 -0
  37. model-00035-of-00070.safetensors +3 -0
  38. model-00036-of-00070.safetensors +3 -0
  39. model-00037-of-00070.safetensors +3 -0
  40. model-00038-of-00070.safetensors +3 -0
  41. model-00039-of-00070.safetensors +3 -0
  42. model-00040-of-00070.safetensors +3 -0
  43. model-00041-of-00070.safetensors +3 -0
  44. model-00042-of-00070.safetensors +3 -0
  45. model-00043-of-00070.safetensors +3 -0
  46. model-00044-of-00070.safetensors +3 -0
  47. model-00045-of-00070.safetensors +3 -0
  48. model-00046-of-00070.safetensors +3 -0
  49. model-00047-of-00070.safetensors +3 -0
  50. model-00048-of-00070.safetensors +3 -0
config.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DeepseekV3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "deepseek-ai/DeepSeek-R1-0528--configuration_deepseek.DeepseekV3Config",
9
+ "AutoModel": "deepseek-ai/DeepSeek-R1-0528--modeling_deepseek.DeepseekV3Model",
10
+ "AutoModelForCausalLM": "deepseek-ai/DeepSeek-R1-0528--modeling_deepseek.DeepseekV3ForCausalLM"
11
+ },
12
+ "bos_token_id": 0,
13
+ "eos_token_id": 1,
14
+ "ep_size": 1,
15
+ "first_k_dense_replace": 3,
16
+ "head_dim": 64,
17
+ "hidden_act": "silu",
18
+ "hidden_size": 7168,
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 18432,
21
+ "kv_lora_rank": 512,
22
+ "max_position_embeddings": 163840,
23
+ "model_type": "deepseek_v3",
24
+ "moe_intermediate_size": 2048,
25
+ "moe_layer_freq": 1,
26
+ "n_group": 8,
27
+ "n_routed_experts": 256,
28
+ "n_shared_experts": 1,
29
+ "norm_topk_prob": true,
30
+ "num_attention_heads": 128,
31
+ "num_experts_per_tok": 8,
32
+ "num_hidden_layers": 30,
33
+ "num_key_value_heads": 128,
34
+ "num_nextn_predict_layers": 1,
35
+ "num_shards": 2,
36
+ "pretraining_tp": 1,
37
+ "q_lora_rank": 1536,
38
+ "qk_head_dim": 192,
39
+ "qk_nope_head_dim": 128,
40
+ "qk_rope_head_dim": 64,
41
+ "quantization_config": {
42
+ "activation_scheme": "dynamic",
43
+ "modules_to_not_convert": null,
44
+ "quant_method": "fp8",
45
+ "weight_block_size": [
46
+ 128,
47
+ 128
48
+ ]
49
+ },
50
+ "rms_norm_eps": 1e-06,
51
+ "rope_interleave": true,
52
+ "rope_scaling": {
53
+ "beta_fast": 32,
54
+ "beta_slow": 1,
55
+ "factor": 40,
56
+ "mscale": 1.0,
57
+ "mscale_all_dim": 1.0,
58
+ "original_max_position_embeddings": 4096,
59
+ "rope_type": "yarn",
60
+ "type": "yarn"
61
+ },
62
+ "rope_theta": 10000,
63
+ "routed_scaling_factor": 2.5,
64
+ "scoring_func": "sigmoid",
65
+ "shard_idx": 1,
66
+ "tie_word_embeddings": false,
67
+ "topk_group": 4,
68
+ "topk_method": "noaux_tc",
69
+ "torch_dtype": "bfloat16",
70
+ "transformers_version": "4.51.3",
71
+ "use_cache": true,
72
+ "v_head_dim": 128,
73
+ "vocab_size": 129280
74
+ }
generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "do_sample": true,
5
+ "eos_token_id": 1,
6
+ "temperature": 0.6,
7
+ "top_p": 0.95,
8
+ "transformers_version": "4.51.3"
9
+ }
model-00001-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8eeb4ac7459dbf84ccfb9b72f14cbfe1072746c61a54eabcb8d3151b4ab9fb77
3
+ size 4991596776
model-00002-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fea998905fd9f1e17e89805a92dd4b78bb7929e338d04154e20c88cc79fc7ed
3
+ size 4991916176
model-00003-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:594312fc514f672820b916b83018407fed94f6551cfda45545ea40c4ecadc040
3
+ size 4991882320
model-00004-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf0b21f3c38f89362ff042924ca659612f67e1246fa61853de384222a2139fce
3
+ size 4991915968
model-00005-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b991aba490184e025db3e82b9da7143c3f9fd8f1b2c04f037e03cec88ded036
3
+ size 4977202048
model-00006-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24727f5a2b1c5937e6fc9c274622388b7d1b4a7013afed5d823f0ac08eff7d01
3
+ size 4991913936
model-00007-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae935d6033b7b988fa01ef686d49ea6389918ba6e972977300029936cbaf5fbf
3
+ size 4991916376
model-00008-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f59913aaaf99db6b7c5fae2b0590f531515909ede0cd681cf097e87d179b0883
3
+ size 4991882320
model-00009-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d3cc24a0eb74cd958cbdaa07c59d689f43d1da86c0622ebbebb1fa52ee3c8ab
3
+ size 4991916232
model-00010-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07efc1aa03985c116ec2101cd91e837b3eb5299f00fa1c508140a47bbb6492fb
3
+ size 4991882320
model-00011-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81742bb39062eb32911faa6d2d646a17c1ad10f846ccfc900a79e2fac74dd5f8
3
+ size 4991916016
model-00012-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90dc872887779c96c3c2d55332602bf49d3cb3bf7777fb6140eca3b2c39c9f5d
3
+ size 4991882336
model-00013-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd49be740913e5d55305472e0a743785f07c1cc1a2d26fc31fd647d685d0e536
3
+ size 4991915792
model-00014-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faf83571d595cc27dd1a04e30fee02dcd1441b3c63b99c6fabb4d3f2e0995816
3
+ size 4991916376
model-00015-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3d808771e1cea49648bd4ed30bb446face4ba60d3207c6d51287cf6c94ed4c9
3
+ size 4991882320
model-00016-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8941d7cfb261bf6903519883defa8909ecddd15b85a2fe56e2e88123917f843
3
+ size 4991916280
model-00017-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb9b8f03406354e8081e8afe74a51df638e2a1e073e5deab94d0c06c740ce4c7
3
+ size 4991882320
model-00018-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acd7efb7b626a0e21d0b08bd6c577e644eede66f0f912d64b25e5634e815d545
3
+ size 4991916080
model-00019-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d99308e9184d39a0b83de932bb257fb49347eeed649513d596bd8174d3d11d3f
3
+ size 4991882320
model-00020-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3175dead9c19d4e52ab1f1feb0491a94ba060abbdebb6aaec0379a138c582e1
3
+ size 4991915872
model-00021-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcb9365f25b4a575b1cbd7db2c8cdc30afd727e5118577c938af184af42fe612
3
+ size 4991916368
model-00022-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2682eace252e73df3c4c82a6fd4af438a6043b8d80608da8449a9d7bf4cba7c7
3
+ size 4991882328
model-00023-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0beab83d107eeeefc9af891d5194d71b7d6d18ff887d67c0f9297f683bf2fab
3
+ size 4991916344
model-00024-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99b14606e4b895902f5eb61e10955f613f476a430c7f134ec5b900b784e25694
3
+ size 4991882288
model-00025-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:209f87403e2b95430fe605a79dee3b5cbe00c020b952f460de4043d96f4dd2cb
3
+ size 4991916816
model-00026-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bc8968c6fc6e09927f6bc0b82c690bc820176ce6363d0d8072f36f8e28a754a
3
+ size 4991882984
model-00027-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae50fc929aea2b19025ba9c5eb4c70021250c984990be6e0e403fe519c2d626e
3
+ size 4991916600
model-00028-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce1d785b8771bb1cc18231e6f04d3feedca6794c7f5753ab408b4aeb0aafc121
3
+ size 4991917056
model-00029-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a78fdded97488afa947a41bcfad44d7c0768586cb5ab61e505cecd06bdc269e3
3
+ size 4991883080
model-00030-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:905a71c69d47604116dbad9b372bb18f7c1b239a23836dbc0ebb9e0f97f953ef
3
+ size 4991917056
model-00031-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdc526cb20ae04cbf12ecd05c7818b51e87abf13c42fb871f5504f1ee9ef007b
3
+ size 4991882984
model-00032-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49aadc74991a2d4d2516a337b87bc32f496145e93fce8bab9c8ff82503df2bd8
3
+ size 4991916864
model-00033-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9ecdadae896cdeccbe021923ff1fd074f6aa232a04f652afb399cdfc6f1109c
3
+ size 4991882984
model-00034-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1f9c916e8785b109db59140e0ff584bf9a59f63428e5b1d6c9a8f41df54b281
3
+ size 4991916664
model-00035-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f861bb66e5dd495589c14522e0eaec9d7d7317caede1809457a8e26b6c4c730d
3
+ size 4991883048
model-00036-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39162b08baf47aeb2f8792735e264782eed7b4e6e89a16faff27e74e4c09ee40
3
+ size 4991916392
model-00037-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7351501e9b3a11d256d021f21441d887fdba6ccebbbbe91fd577db74c721f5ab
3
+ size 4991917048
model-00038-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a09af1d84ab7bac04cbcfee7ece967c37ff067fe83ffb7aa3181906ec95b7b7a
3
+ size 4991882984
model-00039-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbdcd85e99bed954898ab9a8e58174a4c1ad2fe190128716e67a51f38d3f9c9e
3
+ size 4991916928
model-00040-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd1772ae7813947f95e64b99c33cd1c58436c01d8104da667a28725e86e9cdf3
3
+ size 4991882984
model-00041-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7903fbd70d8150b9fe675de77d7a48dd01364f3eb114ff61e2c6fda3ae39b04
3
+ size 4991916720
model-00042-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:021fbb8f0b4fa755757ef2487f989983a378846b8bb0d66b4a5388b4f047c208
3
+ size 4991882992
model-00043-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f095d5e7d812f8e5c51413b1da706295710c9d7d9d6c9b980cf4503cacde01f
3
+ size 4991916504
model-00044-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:107f0ef985c76d57fe7b7b177e8b4adf04e1664b64a35b21ba97c6ad44c4ccb5
3
+ size 4991917056
model-00045-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27ea5807dbc09033131a363805976cc906258cfd3dc65864797e688a0ba01044
3
+ size 4991882984
model-00046-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c623efad3587c862f84104d64d48a2a4949637a457077a7d0cc0f2cc0dbf24f
3
+ size 4991916984
model-00047-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92d05a58b11b82097cf52e711ca0591c0b8830fcf9d9f267226af0b8af772da9
3
+ size 4991882984
model-00048-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47f57602ee16f9e5c63a07f61f4043f2620a9a54eb0c4f781d9bf11cb16a8c07
3
+ size 4991916768