Upload model trained with Unsloth
Browse filesUpload model trained with Unsloth 2x faster
- config.json +124 -0
- generation_config.json +8 -0
- model-00001-of-00021.safetensors +3 -0
- model-00002-of-00021.safetensors +3 -0
- model-00003-of-00021.safetensors +3 -0
- model-00004-of-00021.safetensors +3 -0
- model-00005-of-00021.safetensors +3 -0
- model-00006-of-00021.safetensors +3 -0
- model-00007-of-00021.safetensors +3 -0
- model-00008-of-00021.safetensors +3 -0
- model-00009-of-00021.safetensors +3 -0
- model-00010-of-00021.safetensors +3 -0
- model-00011-of-00021.safetensors +3 -0
- model-00012-of-00021.safetensors +3 -0
- model-00013-of-00021.safetensors +3 -0
- model-00014-of-00021.safetensors +3 -0
- model-00015-of-00021.safetensors +3 -0
- model-00016-of-00021.safetensors +3 -0
- model-00017-of-00021.safetensors +3 -0
- model-00018-of-00021.safetensors +3 -0
- model-00019-of-00021.safetensors +3 -0
- model-00020-of-00021.safetensors +3 -0
- model-00021-of-00021.safetensors +3 -0
- model.safetensors.index.json +0 -0
config.json
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Qwen2ForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_dropout": 0.0,
|
| 6 |
+
"bos_token_id": 151643,
|
| 7 |
+
"dtype": "bfloat16",
|
| 8 |
+
"eos_token_id": 151643,
|
| 9 |
+
"hidden_act": "silu",
|
| 10 |
+
"hidden_size": 8192,
|
| 11 |
+
"initializer_range": 0.02,
|
| 12 |
+
"intermediate_size": 29568,
|
| 13 |
+
"layer_types": [
|
| 14 |
+
"full_attention",
|
| 15 |
+
"full_attention",
|
| 16 |
+
"full_attention",
|
| 17 |
+
"full_attention",
|
| 18 |
+
"full_attention",
|
| 19 |
+
"full_attention",
|
| 20 |
+
"full_attention",
|
| 21 |
+
"full_attention",
|
| 22 |
+
"full_attention",
|
| 23 |
+
"full_attention",
|
| 24 |
+
"full_attention",
|
| 25 |
+
"full_attention",
|
| 26 |
+
"full_attention",
|
| 27 |
+
"full_attention",
|
| 28 |
+
"full_attention",
|
| 29 |
+
"full_attention",
|
| 30 |
+
"full_attention",
|
| 31 |
+
"full_attention",
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention",
|
| 45 |
+
"full_attention",
|
| 46 |
+
"full_attention",
|
| 47 |
+
"full_attention",
|
| 48 |
+
"full_attention",
|
| 49 |
+
"full_attention",
|
| 50 |
+
"full_attention",
|
| 51 |
+
"full_attention",
|
| 52 |
+
"full_attention",
|
| 53 |
+
"full_attention",
|
| 54 |
+
"full_attention",
|
| 55 |
+
"full_attention",
|
| 56 |
+
"full_attention",
|
| 57 |
+
"full_attention",
|
| 58 |
+
"full_attention",
|
| 59 |
+
"full_attention",
|
| 60 |
+
"full_attention",
|
| 61 |
+
"full_attention",
|
| 62 |
+
"full_attention",
|
| 63 |
+
"full_attention",
|
| 64 |
+
"full_attention",
|
| 65 |
+
"full_attention",
|
| 66 |
+
"full_attention",
|
| 67 |
+
"full_attention",
|
| 68 |
+
"full_attention",
|
| 69 |
+
"full_attention",
|
| 70 |
+
"full_attention",
|
| 71 |
+
"full_attention",
|
| 72 |
+
"full_attention",
|
| 73 |
+
"full_attention",
|
| 74 |
+
"full_attention",
|
| 75 |
+
"full_attention",
|
| 76 |
+
"full_attention",
|
| 77 |
+
"full_attention",
|
| 78 |
+
"full_attention",
|
| 79 |
+
"full_attention",
|
| 80 |
+
"full_attention",
|
| 81 |
+
"full_attention",
|
| 82 |
+
"full_attention",
|
| 83 |
+
"full_attention",
|
| 84 |
+
"full_attention",
|
| 85 |
+
"full_attention",
|
| 86 |
+
"full_attention",
|
| 87 |
+
"full_attention",
|
| 88 |
+
"full_attention",
|
| 89 |
+
"full_attention",
|
| 90 |
+
"full_attention",
|
| 91 |
+
"full_attention",
|
| 92 |
+
"full_attention",
|
| 93 |
+
"full_attention"
|
| 94 |
+
],
|
| 95 |
+
"max_position_embeddings": 131072,
|
| 96 |
+
"max_window_layers": 80,
|
| 97 |
+
"model_type": "qwen2",
|
| 98 |
+
"num_attention_heads": 64,
|
| 99 |
+
"num_hidden_layers": 80,
|
| 100 |
+
"num_key_value_heads": 8,
|
| 101 |
+
"pad_token_id": 151665,
|
| 102 |
+
"quantization_config": {
|
| 103 |
+
"bnb_4bit_compute_dtype": "bfloat16",
|
| 104 |
+
"bnb_4bit_quant_type": "nf4",
|
| 105 |
+
"bnb_4bit_use_double_quant": true,
|
| 106 |
+
"llm_int8_enable_fp32_cpu_offload": false,
|
| 107 |
+
"llm_int8_has_fp16_weight": false,
|
| 108 |
+
"llm_int8_skip_modules": null,
|
| 109 |
+
"llm_int8_threshold": 6.0,
|
| 110 |
+
"load_in_4bit": true,
|
| 111 |
+
"load_in_8bit": false,
|
| 112 |
+
"quant_method": "bitsandbytes"
|
| 113 |
+
},
|
| 114 |
+
"rms_norm_eps": 1e-05,
|
| 115 |
+
"rope_scaling": null,
|
| 116 |
+
"rope_theta": 1000000.0,
|
| 117 |
+
"sliding_window": null,
|
| 118 |
+
"tie_word_embeddings": false,
|
| 119 |
+
"transformers_version": "4.57.3",
|
| 120 |
+
"unsloth_version": "2026.1.1",
|
| 121 |
+
"use_cache": true,
|
| 122 |
+
"use_sliding_window": false,
|
| 123 |
+
"vocab_size": 152064
|
| 124 |
+
}
|
generation_config.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token_id": 151643,
|
| 3 |
+
"eos_token_id": 151643,
|
| 4 |
+
"max_length": 131072,
|
| 5 |
+
"max_new_tokens": 2048,
|
| 6 |
+
"pad_token_id": 151665,
|
| 7 |
+
"transformers_version": "4.57.3"
|
| 8 |
+
}
|
model-00001-of-00021.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f7bbc9e00227a70a86999ea48ec27d72bd64a3047fd7b5c6a94d1f7adde8bb26
|
| 3 |
+
size 2491416720
|
model-00002-of-00021.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:46df2cb5abf5107efcffa95fbf2cd1df3aa3fc2cc308404df15b409ac65abad5
|
| 3 |
+
size 1889219600
|
model-00003-of-00021.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:99d4b1048c6da70820a687124b778afbdd1fe6ccfe5ab4d41fff2e8fb49d3a60
|
| 3 |
+
size 1936253606
|
model-00004-of-00021.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d4f4752a5ce43179d30e1a182215e9b20d87894a57ec82ff6b31fbf0f4987169
|
| 3 |
+
size 1936253712
|
model-00005-of-00021.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:95bd127f71f7dd2c3e6df8d9dc5154694b5d4e010793c6e51d623577dfc5167c
|
| 3 |
+
size 1979587778
|
model-00006-of-00021.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c17373b37ae44f7074bd353268476f24ceb27abbf4430e3dacc6218c8446f98
|
| 3 |
+
size 1970875175
|
model-00007-of-00021.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a2be80b06e4fcb7b152d91ccb8addfdcafc32087fd751744ff4363a0c346b35d
|
| 3 |
+
size 1936253784
|
model-00008-of-00021.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1f09d1ee85091982bae012d8d75adfa5c0f0aeb55072402eecb3578b9738c061
|
| 3 |
+
size 1979587767
|
model-00009-of-00021.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0cc109614fa3a09c7a8adf0a6c5b1ceecd99e4a6c62203b8f4102a61a5c9a462
|
| 3 |
+
size 1970875170
|
model-00010-of-00021.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d415e242378fb1679fdd6ec5e9b20d9b96995358bafaf39b43cd85d6f4669ca
|
| 3 |
+
size 1936253786
|
model-00011-of-00021.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:01a0467d6005478c5705fc649fb9383637159522780d2e3ccad89fa1d962e8d8
|
| 3 |
+
size 1979587772
|
model-00012-of-00021.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fe216445b6ed341fa5b409563c149fc95e3717baa982eb1a76cc40db2afb6ba9
|
| 3 |
+
size 1970875171
|
model-00013-of-00021.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bfbb922d54fd987747bc252631ac013a7fdeb141b7c48681d44eb1056e62f891
|
| 3 |
+
size 1936253785
|
model-00014-of-00021.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c0e46ce18c6e0e252783003badb2cfa91dcb7fbd6b951948bfa9dc9a4adc6b6
|
| 3 |
+
size 1979587776
|
model-00015-of-00021.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:23e4196a607374480efce597acb735b69ae4f2f6e6b281d24356cc913e679d7b
|
| 3 |
+
size 1970875170
|
model-00016-of-00021.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7fd23319455d9fd12ebdf4e00f73a1a26980a80b98cc0a049b72217d90b6be9a
|
| 3 |
+
size 1936253778
|
model-00017-of-00021.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ce38092bb78aaf07ec85769b147fcd6fec41d9ca0dd6571f0207d5836c8882b8
|
| 3 |
+
size 1979587770
|
model-00018-of-00021.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dd4c00cd5982caf37fb68eafb76b6d543f5358eabe6f70bfcfc740727d70a2f7
|
| 3 |
+
size 1970875172
|
model-00019-of-00021.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1d0e5e43baca0b3395ba8bc20c10c96848d4b6a53d4d62c4ea00c79dd72cbba0
|
| 3 |
+
size 1936253786
|
model-00020-of-00021.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7b23ce310f122819e85af06d64ed91c9327943cee4f11cc853d4775aa6a8a16f
|
| 3 |
+
size 2491416704
|
model-00021-of-00021.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d8db181f946af7d7b1f1da94cecf43c856493de44d3b4fd11386e04374fc1004
|
| 3 |
+
size 1030654473
|
model.safetensors.index.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|