UnicornChan commited on
Commit
4d63f9c
·
verified ·
1 Parent(s): f315c20

Add files using upload-large-folder tool

Browse files
Files changed (50) hide show
  1. config.json +150 -0
  2. model-00001.safetensors +3 -0
  3. model-00002.safetensors +3 -0
  4. model-00003.safetensors +3 -0
  5. model-00006.safetensors +3 -0
  6. model-00007.safetensors +3 -0
  7. model-00008.safetensors +3 -0
  8. model-00009.safetensors +3 -0
  9. model-00010.safetensors +3 -0
  10. model-00012.safetensors +3 -0
  11. model-00013.safetensors +3 -0
  12. model-00014.safetensors +3 -0
  13. model-00016.safetensors +3 -0
  14. model-00017.safetensors +3 -0
  15. model-00018.safetensors +3 -0
  16. model-00020.safetensors +3 -0
  17. model-00021.safetensors +3 -0
  18. model-00022.safetensors +3 -0
  19. model-00023.safetensors +3 -0
  20. model-00025.safetensors +3 -0
  21. model-00026.safetensors +3 -0
  22. model-00027.safetensors +3 -0
  23. model-00028.safetensors +3 -0
  24. model-00029.safetensors +3 -0
  25. model-00030.safetensors +3 -0
  26. model-00031.safetensors +3 -0
  27. model-00033.safetensors +3 -0
  28. model-00034.safetensors +3 -0
  29. model-00036.safetensors +3 -0
  30. model-00037.safetensors +3 -0
  31. model-00040.safetensors +3 -0
  32. model-00042.safetensors +3 -0
  33. model-00043.safetensors +3 -0
  34. model-00044.safetensors +3 -0
  35. model-00045.safetensors +3 -0
  36. model-00046.safetensors +3 -0
  37. model-00047.safetensors +3 -0
  38. model-00048.safetensors +3 -0
  39. model-00049.safetensors +3 -0
  40. model-00050.safetensors +3 -0
  41. model-00051.safetensors +3 -0
  42. model-00052.safetensors +3 -0
  43. model-00053.safetensors +3 -0
  44. model-00054.safetensors +3 -0
  45. model-00055.safetensors +3 -0
  46. model-00056.safetensors +3 -0
  47. model-00057.safetensors +3 -0
  48. model-00058.safetensors +3 -0
  49. model-00059.safetensors +3 -0
  50. model-00060.safetensors +3 -0
config.json ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_attn_implementation_autoset": false,
3
+ "_name_or_path": "",
4
+ "add_cross_attention": false,
5
+ "architectures": [
6
+ "DeepseekV3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {
11
+ "AutoConfig": "configuration_deepseek.DeepseekV3Config",
12
+ "AutoModel": "modeling_deepseek.DeepseekV3Model",
13
+ "AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM"
14
+ },
15
+ "aux_loss_alpha": 0.001,
16
+ "bad_words_ids": null,
17
+ "begin_suppress_tokens": null,
18
+ "bos_token_id": 163584,
19
+ "chunk_size_feed_forward": 0,
20
+ "cross_attention_hidden_size": null,
21
+ "decoder_start_token_id": null,
22
+ "diversity_penalty": 0.0,
23
+ "do_sample": false,
24
+ "early_stopping": false,
25
+ "encoder_no_repeat_ngram_size": 0,
26
+ "eos_token_id": 163586,
27
+ "ep_size": 1,
28
+ "exponential_decay_length_penalty": null,
29
+ "finetuning_task": null,
30
+ "first_k_dense_replace": 1,
31
+ "forced_bos_token_id": null,
32
+ "forced_eos_token_id": null,
33
+ "hidden_act": "silu",
34
+ "hidden_size": 7168,
35
+ "id2label": {
36
+ "0": "LABEL_0",
37
+ "1": "LABEL_1"
38
+ },
39
+ "initializer_range": 0.02,
40
+ "intermediate_size": 18432,
41
+ "is_decoder": false,
42
+ "is_encoder_decoder": false,
43
+ "kv_lora_rank": 512,
44
+ "label2id": {
45
+ "LABEL_0": 0,
46
+ "LABEL_1": 1
47
+ },
48
+ "length_penalty": 1.0,
49
+ "max_length": 20,
50
+ "max_position_embeddings": 262144,
51
+ "min_length": 0,
52
+ "model_type": "kimi_k2",
53
+ "moe_intermediate_size": 2048,
54
+ "moe_layer_freq": 1,
55
+ "n_group": 1,
56
+ "n_routed_experts": 384,
57
+ "n_shared_experts": 1,
58
+ "no_repeat_ngram_size": 0,
59
+ "norm_topk_prob": true,
60
+ "num_attention_heads": 64,
61
+ "num_beam_groups": 1,
62
+ "num_beams": 1,
63
+ "num_experts_per_tok": 8,
64
+ "num_hidden_layers": 61,
65
+ "num_key_value_heads": 64,
66
+ "num_nextn_predict_layers": 0,
67
+ "num_return_sequences": 1,
68
+ "output_attentions": false,
69
+ "output_hidden_states": false,
70
+ "output_scores": false,
71
+ "pad_token_id": 163839,
72
+ "prefix": null,
73
+ "pretraining_tp": 1,
74
+ "problem_type": null,
75
+ "pruned_heads": {},
76
+ "q_lora_rank": 1536,
77
+ "qk_nope_head_dim": 128,
78
+ "qk_rope_head_dim": 64,
79
+ "quantization_config": {
80
+ "config_groups": {
81
+ "group_0": {
82
+ "input_activations": null,
83
+ "output_activations": null,
84
+ "targets": [
85
+ "Linear"
86
+ ],
87
+ "weights": {
88
+ "actorder": null,
89
+ "block_structure": null,
90
+ "dynamic": false,
91
+ "group_size": 32,
92
+ "num_bits": 4,
93
+ "observer": "minmax",
94
+ "observer_kwargs": {},
95
+ "strategy": "group",
96
+ "symmetric": true,
97
+ "type": "int"
98
+ }
99
+ }
100
+ },
101
+ "format": "pack-quantized",
102
+ "ignore": [
103
+ "lm_head",
104
+ "re:.*self_attn.*",
105
+ "re:.*shared_experts.*",
106
+ "re:.*mlp\\.(gate|up|gate_up|down)_proj.*"
107
+ ],
108
+ "kv_cache_scheme": null,
109
+ "quant_method": "compressed-tensors",
110
+ "quantization_status": "compressed"
111
+ },
112
+ "remove_invalid_values": false,
113
+ "repetition_penalty": 1.0,
114
+ "return_dict": true,
115
+ "return_dict_in_generate": false,
116
+ "rms_norm_eps": 1e-05,
117
+ "rope_scaling": {
118
+ "beta_fast": 1.0,
119
+ "beta_slow": 1.0,
120
+ "factor": 64.0,
121
+ "mscale": 1.0,
122
+ "mscale_all_dim": 1.0,
123
+ "original_max_position_embeddings": 4096,
124
+ "type": "yarn"
125
+ },
126
+ "rope_theta": 50000.0,
127
+ "routed_scaling_factor": 2.827,
128
+ "scoring_func": "sigmoid",
129
+ "sep_token_id": null,
130
+ "seq_aux": true,
131
+ "suppress_tokens": null,
132
+ "task_specific_params": null,
133
+ "temperature": 1.0,
134
+ "tf_legacy_loss": false,
135
+ "tie_encoder_decoder": false,
136
+ "tie_word_embeddings": false,
137
+ "tokenizer_class": null,
138
+ "top_k": 50,
139
+ "top_p": 1.0,
140
+ "topk_group": 1,
141
+ "topk_method": "noaux_tc",
142
+ "torch_dtype": "bfloat16",
143
+ "torchscript": false,
144
+ "transformers_version": "4.51.3",
145
+ "typical_p": 1.0,
146
+ "use_bfloat16": false,
147
+ "use_cache": true,
148
+ "v_head_dim": 128,
149
+ "vocab_size": 163840
150
+ }
model-00001.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55209560c20fc81470e0dfaef2c7575f8d565113536dc9b073c74b2a0f64c24c
3
+ size 8484522656
model-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05bdfda5fc956b612c8d76fd40e59b83b22874e64f072ca51218a37bda10959f
3
+ size 8484522656
model-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:996026aedd810de51f64dc3f9264245692de211aa4f611ce4ac0567f901bd49d
3
+ size 8484522656
model-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3356270c2b23499ba45357b2086bed726c74401f0641c9d4f32bccda3a9962bc
3
+ size 8484522656
model-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3c23c959b9127c791aaa6fce3fb3693bb9524a0c89c72928592fdb7dbe56fe5
3
+ size 8484522656
model-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c825d31efd6b7dc99eedb729665edeaed0e7ef4c41290101057e22415453e770
3
+ size 8484522656
model-00009.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d3f5a27332b3312ac0a69af3b786b75a7eb9aede4df2c0c5f3406ff15032505
3
+ size 8484522656
model-00010.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a6a2234f3bc6f761f878be6c9eabf5aaca196c0b093e5498898e716ffec44be
3
+ size 8484527264
model-00012.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9511470fec2991ebe20b930e9f7400f71ee87d8dc52c323e08163750e0402dac
3
+ size 8484527264
model-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c4ebfa7fe4692ba55617c8e1a50aba30ff1ab8253aa6fa0fb351d469b8e69b4
3
+ size 8484527264
model-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1ecaff3aeb8e9bbc859ba7a66ea64d69893d99336dd3e68e3c5d8230f8a4613
3
+ size 8484527264
model-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34b5835ec9f993cccf15988d88fe57c0ccfaffeb3442a531078d4b67ff138b06
3
+ size 8484527264
model-00017.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ac324e958b2c83f5183f84ed872652f7556eec540fc1ece8b3ee3c932b6b2fc
3
+ size 8484527264
model-00018.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:841e6b8453d4a473bb03dc9e388b27434519983c2f49d54891b7b78c9dba8e4e
3
+ size 8484527264
model-00020.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:376feba9a49a1f6d43f7aafe48356065c9a64cea74763e3d1dc8b33dea798570
3
+ size 8484527264
model-00021.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57faba219fe667ff5803b266fec0e58b7eef5c3435bd1dcc7152f6be2b7b726e
3
+ size 8484527264
model-00022.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c58b3ef78d5a2503ea1ede03dd7969a87ae0cca3a2918313b6addae9a01287ba
3
+ size 8484527264
model-00023.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fd796a705aaeee2f54ca7973560a3b0521032566aa635d462b86793260b03b7
3
+ size 8484527264
model-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37be56a29bb6e7fca430ba0693fa79b40a77bc47f05e5e0e2799f9a642e2e3a1
3
+ size 8484527264
model-00026.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:754dc6acc39782bfb75eb44a6c7a59d43fa164fbbbe9a670ae501433ee9f7471
3
+ size 8484527264
model-00027.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23808b167bb0bb483ecae754614c6c0328e5288b7b9266e896e1132a28a697e1
3
+ size 8484527264
model-00028.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd65bfdb7f0849c4251504e3483fd88a388f8a8b335b10b1c7882261506d4523
3
+ size 8484527264
model-00029.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19e253d32fd16a3b10d4a191c24c962646c03c9e0b431315a7037b5cbf2fc7c1
3
+ size 8484527264
model-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e0d6b7cd99541e9362638da3033273ed51dffb920326fa3a6a8794167dd48ae
3
+ size 8484527264
model-00031.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d01682dfdb7ba97392a046f88f7b55f74eb412fc3c893c0b3d0b2e476c2e092
3
+ size 8484527264
model-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ddfd16b682a63116acf39ce7dfa2d48efe9cf37a09708e05046d57d8f58e75d
3
+ size 8484527264
model-00034.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02e98b0d75600a9060d38acf17351d0c39e282e802e38d6d8eb3936ffbd875f8
3
+ size 8484527264
model-00036.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee13abeca3af257ef41c098c2399414dee049931fbdee408ee843ed1a6899e79
3
+ size 8484527264
model-00037.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1331f6351f97aad8041ffc2e8fb6abe84192ef7cfaa27e7e073b71469db4855
3
+ size 8484527264
model-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25c63c149e2e3b8d482020bd720d7f17be6cacdd27c7cf37ed05485509dc93be
3
+ size 8484527264
model-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b927ddbc64c9a41fac30a246e57339554edb9227894b0ac5c2a216964372e9f1
3
+ size 8484527264
model-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7f46786f504efd88956f9201b85404966267b02e5e107ae17a5b96f9ece0233
3
+ size 8484527264
model-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:332dad28398984d9beb1e917e36f3d904fb7b4a73e2ec0de595d06492abf148d
3
+ size 8484527264
model-00045.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a6ef048e6963c39fd0a839badd21c829ef801058a0c4a9e285495f45f9036b4
3
+ size 8484527264
model-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2133e6e9b74d2b114aa1b13eb037ed4f970cb350d0a8cdaa51913ecf7e5c1ae
3
+ size 8484527264
model-00047.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9a94511dd893eb0dfd364327a8c849482d4459d7dcf6883f014948fe12d8aff
3
+ size 8484527264
model-00048.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbe1feaa0497edc3823976528119f9517404e8b65927dca32ed1c92872952b33
3
+ size 8484527264
model-00049.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e719b25029b0c08a129f40e4b064733eb2c0f21273b9dd43130a14597848f030
3
+ size 8484527264
model-00050.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ce6a9b71fc3b293796b6daba84ad177a26046949049dca834c69ed458af1e77
3
+ size 8484527264
model-00051.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d820c15fe558751da2b5d5d3dec03a6e3d2fd3f5ad2acad833275d6f72545bec
3
+ size 8484527264
model-00052.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b45710207e4959b1dda989db06a2802a8e3e09e9bd2a19bae2602c0d1863d812
3
+ size 8484527264
model-00053.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06b5f841cbc80c7e1d68bc7c671b01d135ee31bc27260fc5cee5c8b7af427572
3
+ size 8484527264
model-00054.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c544e5841d97a4455699a0686b78f7e8f1511a75ecc9909a1f2f2ec1dd69cf28
3
+ size 8484527264
model-00055.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f9191309c4e6fe09fc7a6530bc0531091536769b689d2db89b04a011d818659
3
+ size 8484527264
model-00056.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c714aea11c57f34da68ffc3dd16380e8914b6e3b3de902d2d1b81ff3a1fe915
3
+ size 8484527264
model-00057.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c3f72fa2cdc55a20458e972752a3689e4f3f303e4e51b9a1fc4392aaa57398d
3
+ size 8484527264
model-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6eb2b86950e377d34e52c9234212595874f7d008377ed4b4def44a505a2311bf
3
+ size 8484527264
model-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8a4866796ad38857b64243b8ded34526cbfb618454275a7d6af70a6015be604
3
+ size 8484527264
model-00060.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d825d8d4229a1752c4a9d245bd521fa34ba3190690542a54d280ca75153bb5a
3
+ size 8484527264