new model (#1)
Browse files- new model (72ca591d9e3b058d04e9dd027036249ac42ffa9c)
This view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +2 -0
- LICENSE +21 -0
- config.json +234 -0
- generation_config.json +9 -0
- model-00001-of-000163.safetensors +3 -0
- model-00002-of-000163.safetensors +3 -0
- model-00003-of-000163.safetensors +3 -0
- model-00004-of-000163.safetensors +3 -0
- model-00005-of-000163.safetensors +3 -0
- model-00006-of-000163.safetensors +3 -0
- model-00007-of-000163.safetensors +3 -0
- model-00008-of-000163.safetensors +3 -0
- model-00009-of-000163.safetensors +3 -0
- model-00010-of-000163.safetensors +3 -0
- model-00011-of-000163.safetensors +3 -0
- model-00012-of-000163.safetensors +3 -0
- model-00013-of-000163.safetensors +3 -0
- model-00014-of-000163.safetensors +3 -0
- model-00015-of-000163.safetensors +3 -0
- model-00016-of-000163.safetensors +3 -0
- model-00017-of-000163.safetensors +3 -0
- model-00018-of-000163.safetensors +3 -0
- model-00019-of-000163.safetensors +3 -0
- model-00020-of-000163.safetensors +3 -0
- model-00021-of-000163.safetensors +3 -0
- model-00022-of-000163.safetensors +3 -0
- model-00023-of-000163.safetensors +3 -0
- model-00024-of-000163.safetensors +3 -0
- model-00025-of-000163.safetensors +3 -0
- model-00026-of-000163.safetensors +3 -0
- model-00027-of-000163.safetensors +3 -0
- model-00028-of-000163.safetensors +3 -0
- model-00029-of-000163.safetensors +3 -0
- model-00030-of-000163.safetensors +3 -0
- model-00031-of-000163.safetensors +3 -0
- model-00032-of-000163.safetensors +3 -0
- model-00033-of-000163.safetensors +3 -0
- model-00034-of-000163.safetensors +3 -0
- model-00035-of-000163.safetensors +3 -0
- model-00036-of-000163.safetensors +3 -0
- model-00037-of-000163.safetensors +3 -0
- model-00038-of-000163.safetensors +3 -0
- model-00039-of-000163.safetensors +3 -0
- model-00040-of-000163.safetensors +3 -0
- model-00041-of-000163.safetensors +3 -0
- model-00042-of-000163.safetensors +3 -0
- model-00043-of-000163.safetensors +3 -0
- model-00044-of-000163.safetensors +3 -0
- model-00045-of-000163.safetensors +3 -0
- model-00046-of-000163.safetensors +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
*.pdf filter=lfs diff=lfs merge=lfs -text
|
LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2023 DeepSeek
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
config.json
ADDED
|
@@ -0,0 +1,234 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"DeepseekV32ForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 0,
|
| 8 |
+
"eos_token_id": 1,
|
| 9 |
+
"ep_size": 1,
|
| 10 |
+
"first_k_dense_replace": 3,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 7168,
|
| 13 |
+
"index_head_dim": 128,
|
| 14 |
+
"index_n_heads": 64,
|
| 15 |
+
"index_topk": 2048,
|
| 16 |
+
"initializer_range": 0.02,
|
| 17 |
+
"intermediate_size": 18432,
|
| 18 |
+
"kv_lora_rank": 512,
|
| 19 |
+
"max_position_embeddings": 163840,
|
| 20 |
+
"model_type": "deepseek_v32",
|
| 21 |
+
"moe_intermediate_size": 2048,
|
| 22 |
+
"moe_layer_freq": 1,
|
| 23 |
+
"n_group": 8,
|
| 24 |
+
"n_routed_experts": 256,
|
| 25 |
+
"n_shared_experts": 1,
|
| 26 |
+
"norm_topk_prob": true,
|
| 27 |
+
"num_attention_heads": 128,
|
| 28 |
+
"num_experts_per_tok": 8,
|
| 29 |
+
"num_hidden_layers": 61,
|
| 30 |
+
"num_key_value_heads": 128,
|
| 31 |
+
"num_nextn_predict_layers": 1,
|
| 32 |
+
"q_lora_rank": 1536,
|
| 33 |
+
"qk_nope_head_dim": 128,
|
| 34 |
+
"qk_rope_head_dim": 64,
|
| 35 |
+
"quantization_config": {
|
| 36 |
+
"algo_config": null,
|
| 37 |
+
"exclude": [
|
| 38 |
+
"model.layers.0.self_attn.indexer.weights_proj.weight",
|
| 39 |
+
"model.layers.1.self_attn.indexer.weights_proj.weight",
|
| 40 |
+
"model.layers.2.self_attn.indexer.weights_proj.weight",
|
| 41 |
+
"model.layers.3.self_attn.indexer.weights_proj.weight",
|
| 42 |
+
"model.layers.4.self_attn.indexer.weights_proj.weight",
|
| 43 |
+
"model.layers.5.self_attn.indexer.weights_proj.weight",
|
| 44 |
+
"model.layers.6.self_attn.indexer.weights_proj.weight",
|
| 45 |
+
"model.layers.7.self_attn.indexer.weights_proj.weight",
|
| 46 |
+
"model.layers.8.self_attn.indexer.weights_proj.weight",
|
| 47 |
+
"model.layers.9.self_attn.indexer.weights_proj.weight",
|
| 48 |
+
"model.layers.10.self_attn.indexer.weights_proj.weight",
|
| 49 |
+
"model.layers.11.self_attn.indexer.weights_proj.weight",
|
| 50 |
+
"model.layers.12.self_attn.indexer.weights_proj.weight",
|
| 51 |
+
"model.layers.13.self_attn.indexer.weights_proj.weight",
|
| 52 |
+
"model.layers.14.self_attn.indexer.weights_proj.weight",
|
| 53 |
+
"model.layers.15.self_attn.indexer.weights_proj.weight",
|
| 54 |
+
"model.layers.16.self_attn.indexer.weights_proj.weight",
|
| 55 |
+
"model.layers.17.self_attn.indexer.weights_proj.weight",
|
| 56 |
+
"model.layers.18.self_attn.indexer.weights_proj.weight",
|
| 57 |
+
"model.layers.19.self_attn.indexer.weights_proj.weight",
|
| 58 |
+
"model.layers.20.self_attn.indexer.weights_proj.weight",
|
| 59 |
+
"model.layers.21.self_attn.indexer.weights_proj.weight",
|
| 60 |
+
"model.layers.22.self_attn.indexer.weights_proj.weight",
|
| 61 |
+
"model.layers.23.self_attn.indexer.weights_proj.weight",
|
| 62 |
+
"model.layers.24.self_attn.indexer.weights_proj.weight",
|
| 63 |
+
"model.layers.25.self_attn.indexer.weights_proj.weight",
|
| 64 |
+
"model.layers.26.self_attn.indexer.weights_proj.weight",
|
| 65 |
+
"model.layers.27.self_attn.indexer.weights_proj.weight",
|
| 66 |
+
"model.layers.28.self_attn.indexer.weights_proj.weight",
|
| 67 |
+
"model.layers.29.self_attn.indexer.weights_proj.weight",
|
| 68 |
+
"model.layers.30.self_attn.indexer.weights_proj.weight",
|
| 69 |
+
"model.layers.31.self_attn.indexer.weights_proj.weight",
|
| 70 |
+
"model.layers.32.self_attn.indexer.weights_proj.weight",
|
| 71 |
+
"model.layers.33.self_attn.indexer.weights_proj.weight",
|
| 72 |
+
"model.layers.34.self_attn.indexer.weights_proj.weight",
|
| 73 |
+
"model.layers.35.self_attn.indexer.weights_proj.weight",
|
| 74 |
+
"model.layers.36.self_attn.indexer.weights_proj.weight",
|
| 75 |
+
"model.layers.37.self_attn.indexer.weights_proj.weight",
|
| 76 |
+
"model.layers.38.self_attn.indexer.weights_proj.weight",
|
| 77 |
+
"model.layers.39.self_attn.indexer.weights_proj.weight",
|
| 78 |
+
"model.layers.40.self_attn.indexer.weights_proj.weight",
|
| 79 |
+
"model.layers.41.self_attn.indexer.weights_proj.weight",
|
| 80 |
+
"model.layers.42.self_attn.indexer.weights_proj.weight",
|
| 81 |
+
"model.layers.43.self_attn.indexer.weights_proj.weight",
|
| 82 |
+
"model.layers.44.self_attn.indexer.weights_proj.weight",
|
| 83 |
+
"model.layers.45.self_attn.indexer.weights_proj.weight",
|
| 84 |
+
"model.layers.46.self_attn.indexer.weights_proj.weight",
|
| 85 |
+
"model.layers.47.self_attn.indexer.weights_proj.weight",
|
| 86 |
+
"model.layers.48.self_attn.indexer.weights_proj.weight",
|
| 87 |
+
"model.layers.49.self_attn.indexer.weights_proj.weight",
|
| 88 |
+
"model.layers.50.self_attn.indexer.weights_proj.weight",
|
| 89 |
+
"model.layers.51.self_attn.indexer.weights_proj.weight",
|
| 90 |
+
"model.layers.52.self_attn.indexer.weights_proj.weight",
|
| 91 |
+
"model.layers.53.self_attn.indexer.weights_proj.weight",
|
| 92 |
+
"model.layers.54.self_attn.indexer.weights_proj.weight",
|
| 93 |
+
"model.layers.55.self_attn.indexer.weights_proj.weight",
|
| 94 |
+
"model.layers.56.self_attn.indexer.weights_proj.weight",
|
| 95 |
+
"model.layers.57.self_attn.indexer.weights_proj.weight",
|
| 96 |
+
"model.layers.58.self_attn.indexer.weights_proj.weight",
|
| 97 |
+
"model.layers.59.self_attn.indexer.weights_proj.weight",
|
| 98 |
+
"model.layers.60.self_attn.indexer.weights_proj.weight",
|
| 99 |
+
"model.layers.61.self_attn.indexer.weights_proj.weight",
|
| 100 |
+
"model.layers.3.mlp.gate.weight",
|
| 101 |
+
"model.layers.4.mlp.gate.weight",
|
| 102 |
+
"model.layers.5.mlp.gate.weight",
|
| 103 |
+
"model.layers.6.mlp.gate.weight",
|
| 104 |
+
"model.layers.7.mlp.gate.weight",
|
| 105 |
+
"model.layers.8.mlp.gate.weight",
|
| 106 |
+
"model.layers.9.mlp.gate.weight",
|
| 107 |
+
"model.layers.10.mlp.gate.weight",
|
| 108 |
+
"model.layers.11.mlp.gate.weight",
|
| 109 |
+
"model.layers.12.mlp.gate.weight",
|
| 110 |
+
"model.layers.13.mlp.gate.weight",
|
| 111 |
+
"model.layers.14.mlp.gate.weight",
|
| 112 |
+
"model.layers.15.mlp.gate.weight",
|
| 113 |
+
"model.layers.16.mlp.gate.weight",
|
| 114 |
+
"model.layers.17.mlp.gate.weight",
|
| 115 |
+
"model.layers.18.mlp.gate.weight",
|
| 116 |
+
"model.layers.19.mlp.gate.weight",
|
| 117 |
+
"model.layers.20.mlp.gate.weight",
|
| 118 |
+
"model.layers.21.mlp.gate.weight",
|
| 119 |
+
"model.layers.22.mlp.gate.weight",
|
| 120 |
+
"model.layers.23.mlp.gate.weight",
|
| 121 |
+
"model.layers.24.mlp.gate.weight",
|
| 122 |
+
"model.layers.25.mlp.gate.weight",
|
| 123 |
+
"model.layers.26.mlp.gate.weight",
|
| 124 |
+
"model.layers.27.mlp.gate.weight",
|
| 125 |
+
"model.layers.28.mlp.gate.weight",
|
| 126 |
+
"model.layers.29.mlp.gate.weight",
|
| 127 |
+
"model.layers.30.mlp.gate.weight",
|
| 128 |
+
"model.layers.31.mlp.gate.weight",
|
| 129 |
+
"model.layers.32.mlp.gate.weight",
|
| 130 |
+
"model.layers.33.mlp.gate.weight",
|
| 131 |
+
"model.layers.34.mlp.gate.weight",
|
| 132 |
+
"model.layers.35.mlp.gate.weight",
|
| 133 |
+
"model.layers.36.mlp.gate.weight",
|
| 134 |
+
"model.layers.37.mlp.gate.weight",
|
| 135 |
+
"model.layers.38.mlp.gate.weight",
|
| 136 |
+
"model.layers.39.mlp.gate.weight",
|
| 137 |
+
"model.layers.40.mlp.gate.weight",
|
| 138 |
+
"model.layers.41.mlp.gate.weight",
|
| 139 |
+
"model.layers.42.mlp.gate.weight",
|
| 140 |
+
"model.layers.43.mlp.gate.weight",
|
| 141 |
+
"model.layers.44.mlp.gate.weight",
|
| 142 |
+
"model.layers.45.mlp.gate.weight",
|
| 143 |
+
"model.layers.46.mlp.gate.weight",
|
| 144 |
+
"model.layers.47.mlp.gate.weight",
|
| 145 |
+
"model.layers.48.mlp.gate.weight",
|
| 146 |
+
"model.layers.49.mlp.gate.weight",
|
| 147 |
+
"model.layers.50.mlp.gate.weight",
|
| 148 |
+
"model.layers.51.mlp.gate.weight",
|
| 149 |
+
"model.layers.52.mlp.gate.weight",
|
| 150 |
+
"model.layers.53.mlp.gate.weight",
|
| 151 |
+
"model.layers.54.mlp.gate.weight",
|
| 152 |
+
"model.layers.55.mlp.gate.weight",
|
| 153 |
+
"model.layers.56.mlp.gate.weight",
|
| 154 |
+
"model.layers.57.mlp.gate.weight",
|
| 155 |
+
"model.layers.58.mlp.gate.weight",
|
| 156 |
+
"model.layers.59.mlp.gate.weight",
|
| 157 |
+
"model.layers.60.mlp.gate.weight",
|
| 158 |
+
"model.layers.61.mlp.gate.weight",
|
| 159 |
+
"lm_head",
|
| 160 |
+
"model.layers.61.eh_proj.weight",
|
| 161 |
+
"model.layers.61.shared_head.head"
|
| 162 |
+
],
|
| 163 |
+
"export": {
|
| 164 |
+
"kv_cache_group": [],
|
| 165 |
+
"min_kv_scale": 0.0,
|
| 166 |
+
"pack_method": "reorder",
|
| 167 |
+
"weight_format": "real_quantized",
|
| 168 |
+
"weight_merge_groups": null
|
| 169 |
+
},
|
| 170 |
+
"global_quant_config": {
|
| 171 |
+
"bias": null,
|
| 172 |
+
"input_tensors": {
|
| 173 |
+
"ch_axis": 1,
|
| 174 |
+
"dtype": "fp8_e4m3",
|
| 175 |
+
"group_size": null,
|
| 176 |
+
"is_dynamic": true,
|
| 177 |
+
"is_scale_quant": false,
|
| 178 |
+
"mx_element_dtype": null,
|
| 179 |
+
"observer_cls": "PerChannelMinMaxObserver",
|
| 180 |
+
"qscheme": "per_channel",
|
| 181 |
+
"round_method": null,
|
| 182 |
+
"scale_calculation_mode": null,
|
| 183 |
+
"scale_format": null,
|
| 184 |
+
"scale_type": null,
|
| 185 |
+
"symmetric": null
|
| 186 |
+
},
|
| 187 |
+
"output_tensors": null,
|
| 188 |
+
"target_device": null,
|
| 189 |
+
"weight": {
|
| 190 |
+
"ch_axis": 0,
|
| 191 |
+
"dtype": "fp8_e4m3",
|
| 192 |
+
"group_size": null,
|
| 193 |
+
"is_dynamic": false,
|
| 194 |
+
"is_scale_quant": false,
|
| 195 |
+
"mx_element_dtype": null,
|
| 196 |
+
"observer_cls": "PerChannelMinMaxObserver",
|
| 197 |
+
"qscheme": "per_channel",
|
| 198 |
+
"round_method": null,
|
| 199 |
+
"scale_calculation_mode": null,
|
| 200 |
+
"scale_format": null,
|
| 201 |
+
"scale_type": null,
|
| 202 |
+
"symmetric": null
|
| 203 |
+
}
|
| 204 |
+
},
|
| 205 |
+
"kv_cache_quant_config": {},
|
| 206 |
+
"layer_quant_config": {},
|
| 207 |
+
"layer_type_quant_config": {},
|
| 208 |
+
"quant_method": "quark",
|
| 209 |
+
"quant_mode": "eager_mode",
|
| 210 |
+
"softmax_quant_spec": null,
|
| 211 |
+
"version": "0.10+b8ad5c1d29"
|
| 212 |
+
},
|
| 213 |
+
"rms_norm_eps": 1e-06,
|
| 214 |
+
"rope_scaling": {
|
| 215 |
+
"beta_fast": 32,
|
| 216 |
+
"beta_slow": 1,
|
| 217 |
+
"factor": 40,
|
| 218 |
+
"mscale": 1.0,
|
| 219 |
+
"mscale_all_dim": 1.0,
|
| 220 |
+
"original_max_position_embeddings": 4096,
|
| 221 |
+
"type": "yarn"
|
| 222 |
+
},
|
| 223 |
+
"rope_theta": 10000,
|
| 224 |
+
"routed_scaling_factor": 2.5,
|
| 225 |
+
"scoring_func": "sigmoid",
|
| 226 |
+
"tie_word_embeddings": false,
|
| 227 |
+
"topk_group": 4,
|
| 228 |
+
"topk_method": "noaux_tc",
|
| 229 |
+
"torch_dtype": "bfloat16",
|
| 230 |
+
"transformers_version": "4.44.2",
|
| 231 |
+
"use_cache": true,
|
| 232 |
+
"v_head_dim": 128,
|
| 233 |
+
"vocab_size": 129280
|
| 234 |
+
}
|
generation_config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"bos_token_id": 0,
|
| 4 |
+
"eos_token_id": 1,
|
| 5 |
+
"do_sample": true,
|
| 6 |
+
"temperature": 1.0,
|
| 7 |
+
"top_p": 0.95,
|
| 8 |
+
"transformers_version": "4.46.3"
|
| 9 |
+
}
|
model-00001-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fd42f0e746b6d25721ad0c24da49e4ba8b25afaae53b9ce0fb77315fd0f0cdb7
|
| 3 |
+
size 5233944152
|
model-00002-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:725ee2a4ca02719321e6e1cd2d3a9e24fc8ae01054fb9ccb76ba4ab317ef026d
|
| 3 |
+
size 4303534408
|
model-00003-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:641ab947e7c261ecdfd22bf2fa9cad957d4a621ee63d0c1e226ae12cdb129292
|
| 3 |
+
size 4303524584
|
model-00004-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:992680d9866443abd39d90152a093102e5f37cf432d69dd23fccaa6f3178c701
|
| 3 |
+
size 4303314880
|
model-00005-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be1189d443978eff401410f4684f85ebe5761957fda65f9b743ae0e86a669f0b
|
| 3 |
+
size 4303534600
|
model-00006-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e676510186fb9b017981e7a61ac538b2d5ce53e1d831d7f9388a5b6b714f104
|
| 3 |
+
size 4308346688
|
model-00007-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b918b10874ad926b88dd4310a2282be90923201beb8cd930590f687070c444a1
|
| 3 |
+
size 4313187232
|
model-00008-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d2d340218476724d447614076b590752cfa030654a1d9c6b5b29f6c9e0aa5544
|
| 3 |
+
size 4303534784
|
model-00009-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c39d7625d032a3b585ca80197b24d67e85e3c8b39eaa82464ddbb60f06b5ee00
|
| 3 |
+
size 4303315080
|
model-00010-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d6fb9650ddf06c028ab80ae19f18f6014b860d248882e847f98a3455656deb25
|
| 3 |
+
size 4303524144
|
model-00011-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1969e95f0417c78e012b8d2759855bd4691445df045182241cf00d49fff23c0e
|
| 3 |
+
size 4303534832
|
model-00012-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c608d9c9c9271a95c477d1508c688d842f844dd94010ed4c9c3f09d7c17be4d4
|
| 3 |
+
size 1483530632
|
model-00013-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c3b4474d58ba215dd915a531ff6255e9bcd29dd1666432b52f019df41af8018
|
| 3 |
+
size 4303257096
|
model-00014-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c0e3079e0f1dda755b3e410dbd4d5209243a4eb60ed6b06fc33d43072ad5835
|
| 3 |
+
size 4303524536
|
model-00015-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d77fda420226cb5c02779bf329ca55b1a20d2b7d0eefcd4d2e9ad5f6d974de59
|
| 3 |
+
size 4303315096
|
model-00016-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:08a342ba9b1e7ce6af8f2ddee8ff294846ee19b7fec50af854b3163a3aeca9b0
|
| 3 |
+
size 4303534384
|
model-00017-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4f8b796127f1a1d96ac270f0833d6b142acfa7cbe3b5c3d531fee3a26f598a9
|
| 3 |
+
size 4303534832
|
model-00018-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6582387f7369c415b9991022df3a595bc9a12ceaf5225fd71ab4d4e8ca537e50
|
| 3 |
+
size 4303314904
|
model-00019-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:40ef7ff2f5b5269be75041875d06419ffd59db0f9978a3ce13ff803e92b11dc9
|
| 3 |
+
size 4303524328
|
model-00020-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b178f600dd7539839bc048e2628e3ed031dde5009be6b5e92cf759de04336d19
|
| 3 |
+
size 4303534832
|
model-00021-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81fbc3904f2ec7375e5def71aacfce186c932e2df1b5ddbaea02a391668a73e6
|
| 3 |
+
size 4303315264
|
model-00022-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:385566636d03f117ef15f3e47eaa6faea5d2f0918d9b6e826e0783209cdd0fd4
|
| 3 |
+
size 4303535344
|
model-00023-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6e053a687b73b961345a69caec55e870326421badf6f1708fb123a7c11233090
|
| 3 |
+
size 4303315696
|
model-00024-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4fc3a61bef721b425f4b43e930a796a002c2f6b4b3f44dc6f51642bc37115b1d
|
| 3 |
+
size 4303524704
|
model-00025-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dbb0253f8b4acb3403612dcb67e991c36b50956ce302f2a349cc885ae38ee941
|
| 3 |
+
size 4303535424
|
model-00026-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c39f47edd8c0701523e524b8624f9d99f18f8122e5b1036b8a7c52d60028f861
|
| 3 |
+
size 4303315512
|
model-00027-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6b50ef4023fc43b4a295eaa73e3ac1d31fbb9351bc4995482930ecc1b0dcff2d
|
| 3 |
+
size 4303535136
|
model-00028-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c95498741ddebba9d7eecc04ddbf71e42b62dd5227475d6ee199e5cb63b40dcf
|
| 3 |
+
size 4303525176
|
model-00029-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fec91210b3572fa22197a8d2febb0312dadf2a24dc00a39135dd28a374a25c0e
|
| 3 |
+
size 4303315320
|
model-00030-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c33582c18dfd09974e6ee9e058b9af42eaff508aeca69308e69c09081c22816e
|
| 3 |
+
size 4303535328
|
model-00031-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9bc708da0d89e4dbb938670f219618ad0ecb9d4e39ee61470d4d5e762e5a51dc
|
| 3 |
+
size 4303315720
|
model-00032-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:51de11a67f79eece379d60760cc8a86d15f0db85cd3133e82fb2a9e048224291
|
| 3 |
+
size 4303534928
|
model-00033-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a2fa7f408aa164545110de5b06aa416c315b98fd2614980dd1d3d4c36d25ef87
|
| 3 |
+
size 4303525176
|
model-00034-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5b02f2fa8bcb4c995882c6dd0184867c25e1d4b5de940597e9d50117fd01cb90
|
| 3 |
+
size 1865417632
|
model-00035-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1d04a040085f57d45364a9f859de826a88178506e2c1bff267252aabae80ad7a
|
| 3 |
+
size 4303257680
|
model-00036-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eab3caca85d1597a34ed94999b0b4bd9a8055843f19b42996a4e30979535423b
|
| 3 |
+
size 4303525128
|
model-00037-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:35f563a538dc6618ab3282c07a77224962f6571a309bb8bea23f91639c536112
|
| 3 |
+
size 4303315672
|
model-00038-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8a09e205cb41dd9423dc7b56680e9c1276b4190ad8ba4a881178ae75e8d319f4
|
| 3 |
+
size 4303534976
|
model-00039-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:252086cb613c2abb3072cd2af17cc5851a503c26f8e59304a442e9b3531fd86c
|
| 3 |
+
size 4303535416
|
model-00040-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6483d57a8e49ac0d8a17e5f0cc5bc5459061e48f5654f06cd414ea804e7ee49b
|
| 3 |
+
size 4303315480
|
model-00041-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c6ac11f251b2d9724f86be27b1ed6d76083a7c2297749b119edd228b423bad6
|
| 3 |
+
size 4303524912
|
model-00042-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:adb84f63d462953161b178aa7f18c9dfcd1f622579ebf3a81b3c3a2b6b232c3e
|
| 3 |
+
size 4303535424
|
model-00043-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6429b1326ff1fb4ba63a772cfcb8ef0385540c5ab7754851e154a61ca6e348b6
|
| 3 |
+
size 4303315296
|
model-00044-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ee56b05a52f1e1de2a4f08ebdf430174a50e0e2a24289b5110f9c63d9b10c089
|
| 3 |
+
size 4303535344
|
model-00045-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e071bcd280689c028f7e6cebc27dcfb8d2ff9fd9bc98702c9e48c00b908864bc
|
| 3 |
+
size 4303315696
|
model-00046-of-000163.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:62a76c1d7ef18ff4eb23d288dbe8c894a0dcaaa44bf90ccc4ead17843cea2baa
|
| 3 |
+
size 4303524704
|