Madras1 commited on
Commit
fc25680
·
verified ·
1 Parent(s): 7082700

Upload model trained with Unsloth

Browse files

Upload model trained with Unsloth 2x faster

config.json ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen2ForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 151643,
7
+ "dtype": "bfloat16",
8
+ "eos_token_id": 151643,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 8192,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 29568,
13
+ "layer_types": [
14
+ "full_attention",
15
+ "full_attention",
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention",
53
+ "full_attention",
54
+ "full_attention",
55
+ "full_attention",
56
+ "full_attention",
57
+ "full_attention",
58
+ "full_attention",
59
+ "full_attention",
60
+ "full_attention",
61
+ "full_attention",
62
+ "full_attention",
63
+ "full_attention",
64
+ "full_attention",
65
+ "full_attention",
66
+ "full_attention",
67
+ "full_attention",
68
+ "full_attention",
69
+ "full_attention",
70
+ "full_attention",
71
+ "full_attention",
72
+ "full_attention",
73
+ "full_attention",
74
+ "full_attention",
75
+ "full_attention",
76
+ "full_attention",
77
+ "full_attention",
78
+ "full_attention",
79
+ "full_attention",
80
+ "full_attention",
81
+ "full_attention",
82
+ "full_attention",
83
+ "full_attention",
84
+ "full_attention",
85
+ "full_attention",
86
+ "full_attention",
87
+ "full_attention",
88
+ "full_attention",
89
+ "full_attention",
90
+ "full_attention",
91
+ "full_attention",
92
+ "full_attention",
93
+ "full_attention"
94
+ ],
95
+ "max_position_embeddings": 131072,
96
+ "max_window_layers": 80,
97
+ "model_type": "qwen2",
98
+ "num_attention_heads": 64,
99
+ "num_hidden_layers": 80,
100
+ "num_key_value_heads": 8,
101
+ "pad_token_id": 151665,
102
+ "quantization_config": {
103
+ "bnb_4bit_compute_dtype": "bfloat16",
104
+ "bnb_4bit_quant_type": "nf4",
105
+ "bnb_4bit_use_double_quant": true,
106
+ "llm_int8_enable_fp32_cpu_offload": false,
107
+ "llm_int8_has_fp16_weight": false,
108
+ "llm_int8_skip_modules": null,
109
+ "llm_int8_threshold": 6.0,
110
+ "load_in_4bit": true,
111
+ "load_in_8bit": false,
112
+ "quant_method": "bitsandbytes"
113
+ },
114
+ "rms_norm_eps": 1e-05,
115
+ "rope_scaling": null,
116
+ "rope_theta": 1000000.0,
117
+ "sliding_window": null,
118
+ "tie_word_embeddings": false,
119
+ "transformers_version": "4.57.3",
120
+ "unsloth_version": "2026.1.1",
121
+ "use_cache": true,
122
+ "use_sliding_window": false,
123
+ "vocab_size": 152064
124
+ }
generation_config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "eos_token_id": 151643,
4
+ "max_length": 131072,
5
+ "max_new_tokens": 2048,
6
+ "pad_token_id": 151665,
7
+ "transformers_version": "4.57.3"
8
+ }
model-00001-of-00021.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7bbc9e00227a70a86999ea48ec27d72bd64a3047fd7b5c6a94d1f7adde8bb26
3
+ size 2491416720
model-00002-of-00021.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46df2cb5abf5107efcffa95fbf2cd1df3aa3fc2cc308404df15b409ac65abad5
3
+ size 1889219600
model-00003-of-00021.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99d4b1048c6da70820a687124b778afbdd1fe6ccfe5ab4d41fff2e8fb49d3a60
3
+ size 1936253606
model-00004-of-00021.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4f4752a5ce43179d30e1a182215e9b20d87894a57ec82ff6b31fbf0f4987169
3
+ size 1936253712
model-00005-of-00021.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95bd127f71f7dd2c3e6df8d9dc5154694b5d4e010793c6e51d623577dfc5167c
3
+ size 1979587778
model-00006-of-00021.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c17373b37ae44f7074bd353268476f24ceb27abbf4430e3dacc6218c8446f98
3
+ size 1970875175
model-00007-of-00021.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2be80b06e4fcb7b152d91ccb8addfdcafc32087fd751744ff4363a0c346b35d
3
+ size 1936253784
model-00008-of-00021.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f09d1ee85091982bae012d8d75adfa5c0f0aeb55072402eecb3578b9738c061
3
+ size 1979587767
model-00009-of-00021.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cc109614fa3a09c7a8adf0a6c5b1ceecd99e4a6c62203b8f4102a61a5c9a462
3
+ size 1970875170
model-00010-of-00021.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d415e242378fb1679fdd6ec5e9b20d9b96995358bafaf39b43cd85d6f4669ca
3
+ size 1936253786
model-00011-of-00021.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01a0467d6005478c5705fc649fb9383637159522780d2e3ccad89fa1d962e8d8
3
+ size 1979587772
model-00012-of-00021.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe216445b6ed341fa5b409563c149fc95e3717baa982eb1a76cc40db2afb6ba9
3
+ size 1970875171
model-00013-of-00021.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfbb922d54fd987747bc252631ac013a7fdeb141b7c48681d44eb1056e62f891
3
+ size 1936253785
model-00014-of-00021.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c0e46ce18c6e0e252783003badb2cfa91dcb7fbd6b951948bfa9dc9a4adc6b6
3
+ size 1979587776
model-00015-of-00021.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23e4196a607374480efce597acb735b69ae4f2f6e6b281d24356cc913e679d7b
3
+ size 1970875170
model-00016-of-00021.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fd23319455d9fd12ebdf4e00f73a1a26980a80b98cc0a049b72217d90b6be9a
3
+ size 1936253778
model-00017-of-00021.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce38092bb78aaf07ec85769b147fcd6fec41d9ca0dd6571f0207d5836c8882b8
3
+ size 1979587770
model-00018-of-00021.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd4c00cd5982caf37fb68eafb76b6d543f5358eabe6f70bfcfc740727d70a2f7
3
+ size 1970875172
model-00019-of-00021.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d0e5e43baca0b3395ba8bc20c10c96848d4b6a53d4d62c4ea00c79dd72cbba0
3
+ size 1936253786
model-00020-of-00021.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b23ce310f122819e85af06d64ed91c9327943cee4f11cc853d4775aa6a8a16f
3
+ size 2491416704
model-00021-of-00021.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8db181f946af7d7b1f1da94cecf43c856493de44d3b4fd11386e04374fc1004
3
+ size 1030654473
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff