atamb123 commited on
Commit
4ba8fa7
·
verified ·
1 Parent(s): dde4ef2

Add files using upload-large-folder tool

Browse files
Files changed (50) hide show
  1. config.json +67 -0
  2. model-00001-of-00135.safetensors +3 -0
  3. model-00002-of-00135.safetensors +3 -0
  4. model-00003-of-00135.safetensors +3 -0
  5. model-00004-of-00135.safetensors +3 -0
  6. model-00005-of-00135.safetensors +3 -0
  7. model-00006-of-00135.safetensors +3 -0
  8. model-00007-of-00135.safetensors +3 -0
  9. model-00008-of-00135.safetensors +3 -0
  10. model-00009-of-00135.safetensors +3 -0
  11. model-00010-of-00135.safetensors +3 -0
  12. model-00011-of-00135.safetensors +3 -0
  13. model-00012-of-00135.safetensors +3 -0
  14. model-00013-of-00135.safetensors +3 -0
  15. model-00014-of-00135.safetensors +3 -0
  16. model-00015-of-00135.safetensors +3 -0
  17. model-00017-of-00135.safetensors +3 -0
  18. model-00018-of-00135.safetensors +3 -0
  19. model-00019-of-00135.safetensors +3 -0
  20. model-00020-of-00135.safetensors +3 -0
  21. model-00023-of-00135.safetensors +3 -0
  22. model-00025-of-00135.safetensors +3 -0
  23. model-00026-of-00135.safetensors +3 -0
  24. model-00032-of-00135.safetensors +3 -0
  25. model-00033-of-00135.safetensors +3 -0
  26. model-00035-of-00135.safetensors +3 -0
  27. model-00036-of-00135.safetensors +3 -0
  28. model-00037-of-00135.safetensors +3 -0
  29. model-00040-of-00135.safetensors +3 -0
  30. model-00043-of-00135.safetensors +3 -0
  31. model-00044-of-00135.safetensors +3 -0
  32. model-00045-of-00135.safetensors +3 -0
  33. model-00047-of-00135.safetensors +3 -0
  34. model-00049-of-00135.safetensors +3 -0
  35. model-00050-of-00135.safetensors +3 -0
  36. model-00052-of-00135.safetensors +3 -0
  37. model-00053-of-00135.safetensors +3 -0
  38. model-00054-of-00135.safetensors +3 -0
  39. model-00055-of-00135.safetensors +3 -0
  40. model-00057-of-00135.safetensors +3 -0
  41. model-00058-of-00135.safetensors +3 -0
  42. model-00061-of-00135.safetensors +3 -0
  43. model-00062-of-00135.safetensors +3 -0
  44. model-00065-of-00135.safetensors +3 -0
  45. model-00067-of-00135.safetensors +3 -0
  46. model-00078-of-00135.safetensors +3 -0
  47. model-00079-of-00135.safetensors +3 -0
  48. model.safetensors.index.json +0 -0
  49. tokenizer.json +0 -0
  50. tokenizer_config.json +35 -0
config.json ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DeepseekV3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "configuration_deepseek.DeepseekV3Config",
9
+ "AutoModel": "modeling_deepseek.DeepseekV3Model",
10
+ "AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM"
11
+ },
12
+ "bos_token_id": 0,
13
+ "eos_token_id": 1,
14
+ "ep_size": 1,
15
+ "first_k_dense_replace": 3,
16
+ "hidden_act": "silu",
17
+ "hidden_size": 7168,
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 18432,
20
+ "kv_lora_rank": 512,
21
+ "max_position_embeddings": 163840,
22
+ "model_type": "deepseek_v3",
23
+ "moe_intermediate_size": 2048,
24
+ "moe_layer_freq": 1,
25
+ "n_group": 8,
26
+ "n_routed_experts": 256,
27
+ "n_shared_experts": 1,
28
+ "norm_topk_prob": true,
29
+ "num_attention_heads": 128,
30
+ "num_experts_per_tok": 8,
31
+ "num_hidden_layers": 61,
32
+ "num_key_value_heads": 128,
33
+ "num_nextn_predict_layers": 1,
34
+ "q_lora_rank": 1536,
35
+ "qk_nope_head_dim": 128,
36
+ "qk_rope_head_dim": 64,
37
+ "quantization_config": {
38
+ "activation_scheme": "dynamic",
39
+ "fmt": "e4m3",
40
+ "quant_method": "fp8",
41
+ "weight_block_size": [
42
+ 128,
43
+ 128
44
+ ]
45
+ },
46
+ "rms_norm_eps": 1e-06,
47
+ "rope_scaling": {
48
+ "beta_fast": 32,
49
+ "beta_slow": 1,
50
+ "factor": 40,
51
+ "mscale": 1.0,
52
+ "mscale_all_dim": 1.0,
53
+ "original_max_position_embeddings": 4096,
54
+ "type": "yarn"
55
+ },
56
+ "rope_theta": 10000,
57
+ "routed_scaling_factor": 2.5,
58
+ "scoring_func": "sigmoid",
59
+ "tie_word_embeddings": false,
60
+ "topk_group": 4,
61
+ "topk_method": "noaux_tc",
62
+ "torch_dtype": "bfloat16",
63
+ "transformers_version": "4.33.1",
64
+ "use_cache": true,
65
+ "v_head_dim": 128,
66
+ "vocab_size": 129280
67
+ }
model-00001-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1557108716c68ce40dd73a9d8f866ec16f975f90e64db7f2537492d5ba6c089
3
+ size 4894921824
model-00002-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07d56b50dcfaad791e544e378e2ed5344c0715455ed924c814e6f5fab6808de4
3
+ size 4933686504
model-00003-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:875817555c2cee522b3069cfcb57219fd67c318098d840821de8044090771acf
3
+ size 4933686504
model-00004-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8a24c5b60ce6fd1457ec45d1f586835095c10b59ed86387b6e41674ed36a8be
3
+ size 4933686504
model-00005-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e87809b8a14a999502800d864ffcc2fc17b8869a8e645b3d7dbd3861d3120a30
3
+ size 4993477416
model-00006-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:113ecbef28a9e9902e24ab664e51990204bdaba85fd5c97e42e72072604cdc19
3
+ size 4992422104
model-00007-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d525e848c5df0a1e9670e626cb21a4db4d44bf310c49ef448664029fc29a356a
3
+ size 4992422104
model-00008-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f884b7a5eab24975ad96a9b430f25e87914cb426247c14b3445f4b58fe5b98e
3
+ size 4992421944
model-00009-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48b253edaf85ee05fbef4f8add1b6a124d6f3ae7cfeb8b27d2dffcc55896ec95
3
+ size 4992422056
model-00010-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa87bcaf14a490fdde69f4a758838ad1c31eb4db919dccf5549d2e05d98860a3
3
+ size 4992422104
model-00011-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:255a6bb04fd3010faf32db4e955c7108f6cb87e009b207469e54898b9712d9e2
3
+ size 4992422104
model-00012-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86550f96bc2067dec0b544744dde5d35f1cb531fe176694d32ada92202fd05eb
3
+ size 4992421880
model-00013-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5e82b430a82fec465650cfced8df7a8f789b1009a2ee8a1504a140643b89088
3
+ size 4996092864
model-00014-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6789c07d132956ef37a031fb96dc8ff9acb31c8e9d2a5f152d824e936b8c9d2b
3
+ size 4983984800
model-00015-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:964d8c0b2c684b733f91d1f6ea8c921f72333424a4a8e081e9970ec51d030f5d
3
+ size 4933686496
model-00017-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85c0f9591ae3e2eb5e5ac9e9ddd593d45e4116d40beabf042ffe771fdfbbaded
3
+ size 4933686504
model-00018-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9de4939e9b9c6bfc15d9e57aae9759aebbcaff47dfd739a227077e110e98d842
3
+ size 4933686504
model-00019-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:decaa96119dfed359d459c3339af8b6d5757bc38c84c315a2926b3f42ced03ca
3
+ size 4933686288
model-00020-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d56b4b255189638160cace987b673f04875c7d9b0d40f3f81c0292866b6d905c
3
+ size 4933686072
model-00023-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f59f64c158b70f540fd312f723a2478a98e422d0a2fa6bf8f4fcc161ca6f088b
3
+ size 4992525496
model-00025-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4187241ee99e3c444d75b28ec973cc3aa08c386c3a3364d5a3733bf36eb20e4
3
+ size 4992525496
model-00026-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ab2f5aab5bb97c0426a70d25d3c90200412ff3053a7286005b8b67b3660de5e
3
+ size 4992525480
model-00032-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b83ab02e071804bc38e2d39bf1a750f7be4b9f46e94f2c8c73338f20c91ec895
3
+ size 4992525496
model-00033-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62daa64b2b6e17890a2febad2367e99104d1ae93abc79c1e34ae98b3483fa0dc
3
+ size 4992525304
model-00035-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7baa6956d1513a3c72da102912bc785be67a611e105029f361bb8b337cff8725
3
+ size 4992525496
model-00036-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48ebf0d363d1a3b3984e02dc682977beeb866e60f14ae78c316430821542e459
3
+ size 4992525208
model-00037-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b1899809272b57c2ca4521f8aeca014299e570200ea68daf530f61a9c6b840b
3
+ size 4992525024
model-00040-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afa075001a27e1387f1325510c9422a6e02a13838fed61840d58a6b32d6c3f0b
3
+ size 4992525496
model-00043-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6427ed3a9efe86df41eba06f50dcf598c19b0839dca85183ea4c520962407de
3
+ size 4992525304
model-00044-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6de1783d2e52738e2806c96995592ca02ce0f0d6bfcb87ab5098d3fe56925cc
3
+ size 4992525496
model-00045-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b22b0a353386294b439406c44c424687dc08dfec75d26d0c2e1d7f6369c67978
3
+ size 4992525496
model-00047-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35aa8d421ba6537bf7d9548c118bcd28aabdfecddacd5bf86edec3767b698f63
3
+ size 4992525496
model-00049-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5fd8a9860b03986297e1a441c7b0956f4937b5a7f3ae0e8611ec687304b14c5
3
+ size 4992525304
model-00050-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be2008029e707f926b7d754e5fbfae392e614044b29fedb749eab1ab586d2174
3
+ size 4992525496
model-00052-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e710dd06ba1407045b627439f1cc38fdefa355d04c319801de276d31a65c9980
3
+ size 4992525304
model-00053-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:415614eed81bca012e6db5274f6523bee2dfcdc1cce49be4d560ed3fa87f7b25
3
+ size 4992524976
model-00054-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38cd55212accf78324b67081667c95ab743cda9ea2fba94aab5e82d72a1d1264
3
+ size 4992526000
model-00055-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40629e2452be910bc1f9c919da92102386ec03733cd0bdf754a23b7992d6ba63
3
+ size 4992526104
model-00057-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67742cb3f97577a7d3f602d94d8ba48866dc60036bd1555444ee8897f5ca659d
3
+ size 4992526104
model-00058-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db8bcb345ec9f48accef40770083d2fe57401d8157408b433f33dfe935c527b6
3
+ size 4992526088
model-00061-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfbc3dd632acc0e59be10eef4f4d303ab2d1e0abf9628a116d4331f49dbe515f
3
+ size 4992526080
model-00062-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8ab0ffcb7f612505ce01795f480ea480c52283c3c35fd1e606717447efb19f7
3
+ size 4992525992
model-00065-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:182bd2e29ac8736928d963f8a99a45f6d88e3d2cf490df51392a87dcebb1b2ca
3
+ size 4992526040
model-00067-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:728cc88e6a81f281ec2d9bd7ddaa35f13d75d964b8efb9a02cf7ed3d4234c2eb
3
+ size 4992526080
model-00078-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5c44bbd6fb9d2d3b9156566bc6125bff6d24d7e204a5546ed1e46a1421a7439
3
+ size 4992526176
model-00079-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22c1f6a8e65653ac60165cb223c4690b2bd572589c10bc0100723dc85e756009
3
+ size 4992525984
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<|begin▁of▁sentence|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "clean_up_tokenization_spaces": false,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "<|end▁of▁sentence|>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "legacy": true,
22
+ "model_max_length": 131072,
23
+ "pad_token": {
24
+ "__type": "AddedToken",
25
+ "content": "<|end▁of▁sentence|>",
26
+ "lstrip": false,
27
+ "normalized": true,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ "sp_model_kwargs": {},
32
+ "unk_token": null,
33
+ "tokenizer_class": "LlamaTokenizerFast",
34
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{{'<|Assistant|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}"
35
+ }