Mungert commited on
Commit
ba41b6a
ยท
verified ยท
0 Parent(s):

Super-squash history to reclaim storage

Browse files
.gitattributes ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-f16.gguf filter=lfs diff=lfs merge=lfs -text
37
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-f16_q8_0.gguf filter=lfs diff=lfs merge=lfs -text
38
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-bf16_q8_0.gguf filter=lfs diff=lfs merge=lfs -text
39
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-f16_q6_k.gguf filter=lfs diff=lfs merge=lfs -text
40
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-bf16_q6_k.gguf filter=lfs diff=lfs merge=lfs -text
41
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-f16_q4_k.gguf filter=lfs diff=lfs merge=lfs -text
42
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-bf16_q4_k.gguf filter=lfs diff=lfs merge=lfs -text
43
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-q3_k_l.gguf filter=lfs diff=lfs merge=lfs -text
44
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-q4_k_l.gguf filter=lfs diff=lfs merge=lfs -text
45
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-q5_k_l.gguf filter=lfs diff=lfs merge=lfs -text
46
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-q6_k_l.gguf filter=lfs diff=lfs merge=lfs -text
47
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-q3_k_m.gguf filter=lfs diff=lfs merge=lfs -text
48
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-q3_k_s.gguf filter=lfs diff=lfs merge=lfs -text
49
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-q4_k_m.gguf filter=lfs diff=lfs merge=lfs -text
50
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-q4_k_s.gguf filter=lfs diff=lfs merge=lfs -text
51
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-q5_k_m.gguf filter=lfs diff=lfs merge=lfs -text
52
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-q5_k_s.gguf filter=lfs diff=lfs merge=lfs -text
53
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-q6_k_m.gguf filter=lfs diff=lfs merge=lfs -text
54
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-q8_0.gguf filter=lfs diff=lfs merge=lfs -text
55
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-q4_0.gguf filter=lfs diff=lfs merge=lfs -text
56
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-q4_1.gguf filter=lfs diff=lfs merge=lfs -text
57
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-q4_0_l.gguf filter=lfs diff=lfs merge=lfs -text
58
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-q4_1_l.gguf filter=lfs diff=lfs merge=lfs -text
59
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-q5_0.gguf filter=lfs diff=lfs merge=lfs -text
60
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-q5_1.gguf filter=lfs diff=lfs merge=lfs -text
61
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-q5_0_l.gguf filter=lfs diff=lfs merge=lfs -text
62
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-q5_1_l.gguf filter=lfs diff=lfs merge=lfs -text
63
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-iq3_xs.gguf filter=lfs diff=lfs merge=lfs -text
64
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-iq3_xxs.gguf filter=lfs diff=lfs merge=lfs -text
65
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-iq3_s.gguf filter=lfs diff=lfs merge=lfs -text
66
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-iq3_m.gguf filter=lfs diff=lfs merge=lfs -text
67
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-iq4_xs.gguf filter=lfs diff=lfs merge=lfs -text
68
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-iq4_nl.gguf filter=lfs diff=lfs merge=lfs -text
69
+ HyperCLOVAX-SEED-Text-Instruct-0.5B.imatrix filter=lfs diff=lfs merge=lfs -text
70
+ HyperCLOVAX-SEED-Text-Instruct-0.5B-bf16.gguf filter=lfs diff=lfs merge=lfs -text
HyperCLOVAX-SEED-Text-Instruct-0.5B-bf16.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:013a5ab31ad010b74f3a0d0e32221c3cd13e96773ae18a89f10d4dc31433a4f0
3
+ size 1136722944
HyperCLOVAX-SEED-Text-Instruct-0.5B-bf16_q8_0.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdc57c2c9276c9cb9d4b693af9469157d1a9d57a9ca57e95ecdddefc036c2097
3
+ size 712049664
HyperCLOVAX-SEED-Text-Instruct-0.5B-f16_q8_0.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5205d5513aaacc68f65f53fb1531ee9e202ced3592f9aa6fc2c23be47b5ddb8c
3
+ size 712049664
HyperCLOVAX-SEED-Text-Instruct-0.5B-iq3_m.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c60055572fb3137ca96023d2c9dbb38daa0cf394b411a58955a8c3280b02ecc
3
+ size 284280160
HyperCLOVAX-SEED-Text-Instruct-0.5B-iq3_s.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84010df18f39e53a62b8eaeaa0c1bdd11866b9acdc2c1b14dd1cca9eb4f6fcb4
3
+ size 276759904
HyperCLOVAX-SEED-Text-Instruct-0.5B-iq3_xs.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:539698937f21c681d48c21601fe6622a10083c669932e80c390c876cebf79493
3
+ size 266143072
HyperCLOVAX-SEED-Text-Instruct-0.5B-iq3_xxs.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d4b46fb2c6feef07e59d570d64fddcd76f8f9793c317e38963b138fc01d03af
3
+ size 254641504
HyperCLOVAX-SEED-Text-Instruct-0.5B-iq4_nl.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6c0c0222bfc9399e682110e92e80f35a1f545db8a6ea4c07a3ab8ef6dea22b6
3
+ size 351962464
HyperCLOVAX-SEED-Text-Instruct-0.5B-iq4_xs.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34f2ab94a03ba537a5b9260f6f469b1d8777ee4995bf1263a0cb55cc27d910f0
3
+ size 337806688
HyperCLOVAX-SEED-Text-Instruct-0.5B-q3_k_m.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7ccbc891ade8ccb777c93991f9a97221ad1ad9a9251e159b59c0796f7bfe555
3
+ size 313820512
HyperCLOVAX-SEED-Text-Instruct-0.5B-q3_k_s.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48cc0c00b794d52a00eac3d5b163e93af0db0db2db8748b8477357a145a555d8
3
+ size 281232736
HyperCLOVAX-SEED-Text-Instruct-0.5B-q4_0.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e3820e057c78ee62f1fbe9a91f79a3cddfec593d4a3a93c25087d85363b4b7b
3
+ size 322766176
HyperCLOVAX-SEED-Text-Instruct-0.5B-q4_1.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7c36e3d16c1a52a4fe98873d895659504323599122abe20eb20c92bea6da8a5
3
+ size 358155616
HyperCLOVAX-SEED-Text-Instruct-0.5B-q4_k_m.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:314880238197184dde5a637eb9a4e383e9e496ea0e4ddc1bfd74928f9cccdcb6
3
+ size 365577568
HyperCLOVAX-SEED-Text-Instruct-0.5B-q4_k_s.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35288d0571fc53a5a2c3be4fb3ee09cf7de32d5d32e4c3f557fc93b76e49810a
3
+ size 357598560
HyperCLOVAX-SEED-Text-Instruct-0.5B-q5_0.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06767a5fe1b0c59e06aef073224ae4e50370447515ecd782ed38890a171ddb1c
3
+ size 393545056
HyperCLOVAX-SEED-Text-Instruct-0.5B-q5_1.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88279f53002d8897c0b75f0cac4a5ef5d90afc4168a241faf1550aff22bf61cc
3
+ size 428934496
HyperCLOVAX-SEED-Text-Instruct-0.5B-q5_k_m.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31ef42f2f58b27ea90697b5be65e48298c28a88800c33f582ead1ea079c423ec
3
+ size 418612576
HyperCLOVAX-SEED-Text-Instruct-0.5B-q5_k_s.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:668462ad54e52e11857da30fb10f231321d15477a271a6a6632a3ae4ad82a813
3
+ size 414434656
HyperCLOVAX-SEED-Text-Instruct-0.5B-q6_k_m.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:836258aa50a6946c63cb154c84c6b31c963c59ab1024bc62d4b1d030a7312e9a
3
+ size 468747616
HyperCLOVAX-SEED-Text-Instruct-0.5B-q8_0.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ce1341ac0a9aa78c6331748876ba17246d167477976ad0b8c570c25bccecfa6
3
+ size 605881344
HyperCLOVAX-SEED-Text-Instruct-0.5B.imatrix ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ed0679206462d97f1dc7c0450e26c4a73b5d0f7502be3ea3eaa408233134773
3
+ size 1086948
README.md ADDED
@@ -0,0 +1,280 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ license_name: hyperclovax-seed
4
+ license_link: LICENSE
5
+ pipeline_tag: text-generation
6
+ library_name: transformers
7
+ ---
8
+
9
+ # <span style="color: #7FFF7F;">HyperCLOVAX-SEED-Text-Instruct-0.5B GGUF Models</span>
10
+
11
+
12
+ ## <span style="color: #7F7FFF;">Model Generation Details</span>
13
+
14
+ This model was generated using [llama.cpp](https://github.com/ggerganov/llama.cpp) at commit [`5e7d95e2`](https://github.com/ggerganov/llama.cpp/commit/5e7d95e22e386d316f7f659b74c9c34b65507912).
15
+
16
+
17
+
18
+
19
+
20
+ ## **Choosing the Right Model Format**
21
+
22
+ Selecting the correct model format depends on your **hardware capabilities** and **memory constraints**.
23
+
24
+ ### **BF16 (Brain Float 16) โ€“ Use if BF16 acceleration is available**
25
+ - A 16-bit floating-point format designed for **faster computation** while retaining good precision.
26
+ - Provides **similar dynamic range** as FP32 but with **lower memory usage**.
27
+ - Recommended if your hardware supports **BF16 acceleration** (check your device's specs).
28
+ - Ideal for **high-performance inference** with **reduced memory footprint** compared to FP32.
29
+
30
+ ๐Ÿ“Œ **Use BF16 if:**
31
+ โœ” Your hardware has native **BF16 support** (e.g., newer GPUs, TPUs).
32
+ โœ” You want **higher precision** while saving memory.
33
+ โœ” You plan to **requantize** the model into another format.
34
+
35
+ ๐Ÿ“Œ **Avoid BF16 if:**
36
+ โŒ Your hardware does **not** support BF16 (it may fall back to FP32 and run slower).
37
+ โŒ You need compatibility with older devices that lack BF16 optimization.
38
+
39
+ ---
40
+
41
+ ### **F16 (Float 16) โ€“ More widely supported than BF16**
42
+ - A 16-bit floating-point **high precision** but with less of range of values than BF16.
43
+ - Works on most devices with **FP16 acceleration support** (including many GPUs and some CPUs).
44
+ - Slightly lower numerical precision than BF16 but generally sufficient for inference.
45
+
46
+ ๐Ÿ“Œ **Use F16 if:**
47
+ โœ” Your hardware supports **FP16** but **not BF16**.
48
+ โœ” You need a **balance between speed, memory usage, and accuracy**.
49
+ โœ” You are running on a **GPU** or another device optimized for FP16 computations.
50
+
51
+ ๐Ÿ“Œ **Avoid F16 if:**
52
+ โŒ Your device lacks **native FP16 support** (it may run slower than expected).
53
+ โŒ You have memory limitations.
54
+
55
+ ---
56
+
57
+ ### **Quantized Models (Q4_K, Q6_K, Q8, etc.) โ€“ For CPU & Low-VRAM Inference**
58
+ Quantization reduces model size and memory usage while maintaining as much accuracy as possible.
59
+ - **Lower-bit models (Q4_K)** โ†’ **Best for minimal memory usage**, may have lower precision.
60
+ - **Higher-bit models (Q6_K, Q8_0)** โ†’ **Better accuracy**, requires more memory.
61
+
62
+ ๐Ÿ“Œ **Use Quantized Models if:**
63
+ โœ” You are running inference on a **CPU** and need an optimized model.
64
+ โœ” Your device has **low VRAM** and cannot load full-precision models.
65
+ โœ” You want to reduce **memory footprint** while keeping reasonable accuracy.
66
+
67
+ ๐Ÿ“Œ **Avoid Quantized Models if:**
68
+ โŒ You need **maximum accuracy** (full-precision models are better for this).
69
+ โŒ Your hardware has enough VRAM for higher-precision formats (BF16/F16).
70
+
71
+ ---
72
+
73
+ ### **Very Low-Bit Quantization (IQ3_XS, IQ3_S, IQ3_M, Q4_K, Q4_0)**
74
+ These models are optimized for **extreme memory efficiency**, making them ideal for **low-power devices** or **large-scale deployments** where memory is a critical constraint.
75
+
76
+ - **IQ3_XS**: Ultra-low-bit quantization (3-bit) with **extreme memory efficiency**.
77
+ - **Use case**: Best for **ultra-low-memory devices** where even Q4_K is too large.
78
+ - **Trade-off**: Lower accuracy compared to higher-bit quantizations.
79
+
80
+ - **IQ3_S**: Small block size for **maximum memory efficiency**.
81
+ - **Use case**: Best for **low-memory devices** where **IQ3_XS** is too aggressive.
82
+
83
+ - **IQ3_M**: Medium block size for better accuracy than **IQ3_S**.
84
+ - **Use case**: Suitable for **low-memory devices** where **IQ3_S** is too limiting.
85
+
86
+ - **Q4_K**: 4-bit quantization with **block-wise optimization** for better accuracy.
87
+ - **Use case**: Best for **low-memory devices** where **Q6_K** is too large.
88
+
89
+ - **Q4_0**: Pure 4-bit quantization, optimized for **ARM devices**.
90
+ - **Use case**: Best for **ARM-based devices** or **low-memory environments**.
91
+
92
+ ---
93
+
94
+ ### **Summary Table: Model Format Selection**
95
+
96
+ | Model Format | Precision | Memory Usage | Device Requirements | Best Use Case |
97
+ |--------------|------------|---------------|----------------------|---------------|
98
+ | **BF16** | Highest | High | BF16-supported GPU/CPUs | High-speed inference with reduced memory |
99
+ | **F16** | High | High | FP16-supported devices | GPU inference when BF16 isn't available |
100
+ | **Q4_K** | Medium Low | Low | CPU or Low-VRAM devices | Best for memory-constrained environments |
101
+ | **Q6_K** | Medium | Moderate | CPU with more memory | Better accuracy while still being quantized |
102
+ | **Q8_0** | High | Moderate | CPU or GPU with enough VRAM | Best accuracy among quantized models |
103
+ | **IQ3_XS** | Very Low | Very Low | Ultra-low-memory devices | Extreme memory efficiency and low accuracy |
104
+ | **Q4_0** | Low | Low | ARM or low-memory devices | llama.cpp can optimize for ARM devices |
105
+
106
+ ---
107
+
108
+ ## **Included Files & Details**
109
+
110
+ ### `HyperCLOVAX-SEED-Text-Instruct-0.5B-bf16.gguf`
111
+ - Model weights preserved in **BF16**.
112
+ - Use this if you want to **requantize** the model into a different format.
113
+ - Best if your device supports **BF16 acceleration**.
114
+
115
+ ### `HyperCLOVAX-SEED-Text-Instruct-0.5B-f16.gguf`
116
+ - Model weights stored in **F16**.
117
+ - Use if your device supports **FP16**, especially if BF16 is not available.
118
+
119
+ ### `HyperCLOVAX-SEED-Text-Instruct-0.5B-bf16-q8_0.gguf`
120
+ - **Output & embeddings** remain in **BF16**.
121
+ - All other layers quantized to **Q8_0**.
122
+ - Use if your device supports **BF16** and you want a quantized version.
123
+
124
+ ### `HyperCLOVAX-SEED-Text-Instruct-0.5B-f16-q8_0.gguf`
125
+ - **Output & embeddings** remain in **F16**.
126
+ - All other layers quantized to **Q8_0**.
127
+
128
+ ### `HyperCLOVAX-SEED-Text-Instruct-0.5B-q4_k.gguf`
129
+ - **Output & embeddings** quantized to **Q8_0**.
130
+ - All other layers quantized to **Q4_K**.
131
+ - Good for **CPU inference** with limited memory.
132
+
133
+ ### `HyperCLOVAX-SEED-Text-Instruct-0.5B-q4_k_s.gguf`
134
+ - Smallest **Q4_K** variant, using less memory at the cost of accuracy.
135
+ - Best for **very low-memory setups**.
136
+
137
+ ### `HyperCLOVAX-SEED-Text-Instruct-0.5B-q6_k.gguf`
138
+ - **Output & embeddings** quantized to **Q8_0**.
139
+ - All other layers quantized to **Q6_K** .
140
+
141
+ ### `HyperCLOVAX-SEED-Text-Instruct-0.5B-q8_0.gguf`
142
+ - Fully **Q8** quantized model for better accuracy.
143
+ - Requires **more memory** but offers higher precision.
144
+
145
+ ### `HyperCLOVAX-SEED-Text-Instruct-0.5B-iq3_xs.gguf`
146
+ - **IQ3_XS** quantization, optimized for **extreme memory efficiency**.
147
+ - Best for **ultra-low-memory devices**.
148
+
149
+ ### `HyperCLOVAX-SEED-Text-Instruct-0.5B-iq3_m.gguf`
150
+ - **IQ3_M** quantization, offering a **medium block size** for better accuracy.
151
+ - Suitable for **low-memory devices**.
152
+
153
+ ### `HyperCLOVAX-SEED-Text-Instruct-0.5B-q4_0.gguf`
154
+ - Pure **Q4_0** quantization, optimized for **ARM devices**.
155
+ - Best for **low-memory environments**.
156
+ - Prefer IQ4_NL for better accuracy.
157
+
158
+ # <span id="testllm" style="color: #7F7FFF;">๐Ÿš€ If you find these models useful</span>
159
+ โค **Please click "Like" if you find this useful!**
160
+ Help me test my **AI-Powered Network Monitor Assistant** with **quantum-ready security checks**:
161
+ ๐Ÿ‘‰ [Quantum Network Monitor](https://readyforquantum.com/dashboard/?assistant=open&utm_source=huggingface&utm_medium=referral&utm_campaign=huggingface_repo_readme)
162
+
163
+ ๐Ÿ’ฌ **How to test**:
164
+ Choose an **AI assistant type**:
165
+ - `TurboLLM` (GPT-4o-mini)
166
+ - `HugLLM` (Hugginface Open-source)
167
+ - `TestLLM` (Experimental CPU-only)
168
+
169
+ ### **What Iโ€™m Testing**
170
+ Iโ€™m pushing the limits of **small open-source models for AI network monitoring**, specifically:
171
+ - **Function calling** against live network services
172
+ - **How small can a model go** while still handling:
173
+ - Automated **Nmap scans**
174
+ - **Quantum-readiness checks**
175
+ - **Network Monitoring tasks**
176
+
177
+ ๐ŸŸก **TestLLM** โ€“ Current experimental model (llama.cpp on 2 CPU threads):
178
+ - โœ… **Zero-configuration setup**
179
+ - โณ 30s load time (slow inference but **no API costs**)
180
+ - ๐Ÿ”ง **Help wanted!** If youโ€™re into **edge-device AI**, letโ€™s collaborate!
181
+
182
+ ### **Other Assistants**
183
+ ๐ŸŸข **TurboLLM** โ€“ Uses **gpt-4o-mini** for:
184
+ - **Create custom cmd processors to run .net code on Quantum Network Monitor Agents**
185
+ - **Real-time network diagnostics and monitoring**
186
+ - **Security Audits**
187
+ - **Penetration testing** (Nmap/Metasploit)
188
+
189
+
190
+ ๐Ÿ”ต **HugLLM** โ€“ Latest Open-source models:
191
+ - ๐ŸŒ Runs on Hugging Face Inference API
192
+
193
+ ### ๐Ÿ’ก **Example commands to you could test**:
194
+ 1. `"Give me info on my websites SSL certificate"`
195
+ 2. `"Check if my server is using quantum safe encyption for communication"`
196
+ 3. `"Run a comprehensive security audit on my server"`
197
+ 4. '"Create a cmd processor to .. (what ever you want)" Note you need to install a Quantum Network Monitor Agent to run the .net code from. This is a very flexible and powerful feature. Use with caution!
198
+
199
+ ### Final Word
200
+
201
+ I fund the servers used to create these model files, run the Quantum Network Monitor service, and pay for inference from Novita and OpenAIโ€”all out of my own pocket. All the code behind the model creation and the Quantum Network Monitor project is [open source](https://github.com/Mungert69). Feel free to use whatever you find helpful.
202
+
203
+ If you appreciate the work, please consider [buying me a coffee](https://www.buymeacoffee.com/mahadeva) โ˜•. Your support helps cover service costs and allows me to raise token limits for everyone.
204
+
205
+ I'm also open to job opportunities or sponsorship.
206
+
207
+ Thank you! ๐Ÿ˜Š
208
+
209
+
210
+
211
+
212
+
213
+ ![image/png](https://cdn-uploads.huggingface.co/production/uploads/65265ab8f8db96cffcb969dc/szGAraJ_ZawG0kozH5yPi.png)
214
+
215
+
216
+ ## Overview
217
+
218
+ HyperCLOVAX-SEED-Text-Instruct-0.5B is a Text-to-Text model with instruction-following capabilities that excels in understanding Korean language and culture. Compared to external competitors of similar scale, it demonstrates improved mathematical performance and a substantial enhancement in Korean language capability. The HyperCLOVAX-SEED-Text-Instruct-0.5B is currently the smallest model released by the HyperCLOVAX, representing a lightweight solution suitable for deployment in resourceโ€‘constrained environments such as edge devices. It supports a maximum context length of 4K and functions as a versatile small model applicable to a wide range of tasks. The total cost of a single training run for HyperCLOVAX-SEED-Text-Instruct-0.5B was 4.358K A100 GPU hours (approximately USD 6.537K), which is 39 times lower than the cost of training the `QWEN2.5โ€‘0.5Bโ€‘instruct` model.
219
+
220
+
221
+ ## Basic Information
222
+
223
+ - **Architecture**: Transformerโ€‘based (Dense Model)
224
+ - **Parameters**: 0.57ย B (total); 0.45ย B (excluding token embeddings, tied embeddings)
225
+ - **Input/Output Format**: Textย /ย Text
226
+ - **Maximum Context Length**: 4ย K tokens
227
+ - **Knowledge Cutoff Date**: Trained on data up to Januaryย 2025
228
+
229
+
230
+ ## Training and Data
231
+
232
+ The training dataset for HyperCLOVAX-SEED-Text-Instruct-0.5B consists of diverse sources, including the highโ€‘quality data accumulated during the development of HyperCLOVAX-SEED-Text-Instruct-0.5B. Training was conducted in three main stages:
233
+ 1. **Pretraining**: Knowledge acquisition using highโ€‘quality data and a highโ€‘performance pretrained model.
234
+ 2. **Rejection Sampling Fineโ€‘Tuning (RFT)**: Enhancement of multiโ€‘domain knowledge and complex reasoning capabilities.
235
+ 3. **Supervised Fineโ€‘Tuning (SFT)**: Improvement of instructionโ€‘following proficiency.
236
+
237
+
238
+ ## Training Cost
239
+
240
+ HyperCLOVAX-SEED-Text-Instruct-0.5B leveraged HyperCLOVA Xโ€™s lightweight training process and highโ€‘quality data to achieve significantly lower training costs compared to industryโ€‘leading competitors of similar scale. Excluding the SFT stage, a single pretraining run incurred:
241
+
242
+ | Pretraining Cost Category | HyperCLOVAX-SEED-Text-Instruct-0.5B | QWEN2.5โ€‘0.5Bโ€‘instruct |
243
+ |---------------------------------|-----------------------------------------------|-------------------------------------|
244
+ | **A100 GPU Hours** | 4.358ย K | 169.257ย K |
245
+ | **Cost (USD)** | 6.537 K | 253.886ย K |
246
+
247
+ This represents approximately a 39ร— reduction in pretraining cost relative to `QWEN2.5โ€‘0.5B-instruct`.
248
+
249
+ ## Benchmarks
250
+
251
+ | **Model** | **KMMLU (5-shot, acc)** | **HAE-RAE (5-shot, acc)** | **CLiCK (5-shot, acc)** | **KoBEST (5-shot, acc)** |
252
+ | --- | --- | --- | --- | --- |
253
+ | HyperCLOVAX-SEED-Text-Base-0.5B | 0.4181 | 0.6370 | 0.5373 | 0.6963
254
+ | HyperCLOVAX-SEED-Text-Instruct-0.5B | 0.3815 | 0.5619 | 0.4446 | 0.6299 |
255
+ | QWEN2.5-0.5B-instruct | 0.2968 | 0.3428 | 0.3805 | 0.5025 |
256
+
257
+ ## HuggingFace Usage Example
258
+
259
+ ### Python Code
260
+ ```python
261
+ from transformers import AutoModelForCausalLM, AutoTokenizer
262
+ model = AutoModelForCausalLM.from_pretrained("naver-hyperclovax/HyperCLOVAX-SEED-Text-Instruct-0.5B").to(device="cuda")
263
+ tokenizer = AutoTokenizer.from_pretrained("naver-hyperclovax/HyperCLOVAX-SEED-Text-Instruct-0.5B")
264
+
265
+ chat = [
266
+ {"role": "tool_list", "content": ""},
267
+ {"role": "system", "content": "- AI ์–ธ์–ด๋ชจ๋ธ์˜ ์ด๋ฆ„์€ \"CLOVA X\" ์ด๋ฉฐ ๋„ค์ด๋ฒ„์—์„œ ๋งŒ๋“ค์—ˆ๋‹ค.\n- ์˜ค๋Š˜์€ 2025๋…„ 04์›” 24์ผ(๋ชฉ)์ด๋‹ค."},
268
+ {"role": "user", "content": "์Šˆ๋ขฐ๋”ฉ๊ฑฐ ๋ฐฉ์ •์‹๊ณผ ์–‘์ž์—ญํ•™์˜ ๊ด€๊ณ„๋ฅผ ์ตœ๋Œ€ํ•œ ์ž์„ธํžˆ ์•Œ๋ ค์ค˜."},
269
+ ]
270
+
271
+ inputs = tokenizer.apply_chat_template(chat, add_generation_prompt=True, return_dict=True, return_tensors="pt")
272
+ inputs = inputs.to(device="cuda")
273
+ output_ids = model.generate(**inputs, max_length=1024, stop_strings=["<|endofturn|>", "<|stop|>"], repetition_penalty=1.2, tokenizer=tokenizer)
274
+ print(tokenizer.batch_decode(output_ids))
275
+ ```
276
+
277
+ ### Result
278
+ ```bash
279
+ ['<|im_start|>tool_list\n<|im_end|>\n<|im_start|>system\n- AI ์–ธ์–ด๋ชจ๋ธ์˜ ์ด๋ฆ„์€ "CLOVA X" ์ด๋ฉฐ ๋„ค์ด๋ฒ„์—์„œ ๋งŒ๋“ค์—ˆ๋‹ค.\n- ์˜ค๋Š˜์€ 2025๋…„ 04์›” 24์ผ(๋ชฉ)์ด๋‹ค.<|im_end|>\n<|im_start|>user\n์Šˆ๋ขฐ๋”ฉ๊ฑฐ ๋ฐฉ์ •์‹๊ณผ ์–‘์ž์—ญํ•™์˜ ๊ด€๊ณ„๋ฅผ ์ตœ๋Œ€ํ•œ ์ž์„ธํžˆ ์•Œ๋ ค์ค˜.<|im_end|>\n<|im_start|>assistant\n์–‘์ž์—ญํ•™์€ ์Šˆ๋ขฐ๋”ฉ๊ฑฐ ๋ฐฉ์ •์‹์„ ํ†ตํ•ด ๋ฌผ์งˆ๊ณผ ์—๋„ˆ์ง€, ๊ณต๊ฐ„ ๋“ฑ์˜ ํ˜„์ƒ์„ ์„ค๋ช…ํ•ฉ๋‹ˆ๋‹ค.\n\n**1. ์Šˆ๋ขฐ๋”ฉ๊ฑฐ ๋ฐฉ์ •์‹**\n\n์Šˆ๋ขฐ๋”ฉ๊ฑฐ๋Š” ํŒŒ๋™ํ•จ์ˆ˜๋ฅผ ์ด์šฉํ•˜์—ฌ ์ž…์ž์˜ ์œ„์น˜์™€ ์šด๋™๋Ÿ‰์„ ๊ณ„์‚ฐํ•  ์ˆ˜ ์žˆ๋‹ค๊ณ  ์ฃผ์žฅํ–ˆ์Šต๋‹ˆ๋‹ค. ์ด๋ฅผ ์œ„ํ•ด ๋‹ค์Œ๊ณผ ๊ฐ™์€ ์‹์œผ๋กœ ํ‘œํ˜„๋ฉ๋‹ˆ๋‹ค:\n\n$$\\frac{\\partial \\psi}{\\partial t} = iH \\nabla^2 \\psi + V(x)\\psi $$\n\n์—ฌ๊ธฐ์„œ $\\psi$๋Š” ํŒŒ๋™ํ•จ์ˆ˜์ด๊ณ  $i$๋Š” ํ—ˆ์ˆ˜ ๋‹จ์œ„์ž…๋‹ˆ๋‹ค. ์—ฌ๊ธฐ์„œ $t$๋Š” ์‹œ๊ฐ„, $x$๋Š” ๊ณต๊ฐ„ ์ขŒํ‘œ์ด๋ฉฐ, $H$๋Š” ํ•ด๋ฐ€ํ„ด ์ƒ์ˆ˜๋กœ ์‹œ์Šคํ…œ์˜ ์—๋„ˆ์ง€๋ฅผ ๋‚˜ํƒ€๋ƒ…๋‹ˆ๋‹ค. ๋˜ํ•œ $V(x)$๋Š” ์™ธ๋ถ€ ํž˜์ด๋‚˜ ์žฅ๋ฒฝ์— ์˜ํ•ด ์˜ํ–ฅ์„ ๋ฐ›๋Š” ๋ถ€๋ถ„์„ ๋‚˜ํƒ€๋‚ด๋Š” ํ•จ์ˆ˜๋กœ, ์ผ๋ฐ˜์ ์œผ๋กœ ์ „์œ„์žฅ์„ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.\n\n**2. ์–‘์ž์—ญํ•™๊ณผ ์Šˆ๋ขฐ๋”ฉ๊ฑฐ ๋ฐฉ์ •์‹์˜ ๊ด€๊ณ„**\n\n์–‘์ž์—ญํ•™์—์„œ๋Š” ์Šˆ๋ขฐ๋”ฉ๊ฑฐ ๋ฐฉ์ •์‹์ด ๋งค์šฐ ์ค‘์š”ํ•œ ์—ญํ• ์„ ํ•ฉ๋‹ˆ๋‹ค. ์ด๋Š” ๋ชจ๋“  ๋ฌผ๋ฆฌ์  ์‹œ์Šคํ…œ์ด ๋ถˆํ™•์ •์„ฑ ์›๋ฆฌ์— ๋”ฐ๋ผ ํ–‰๋™์„ ํ•˜๋ฉฐ, ์ด๋Ÿฌํ•œ ์‹œ์Šคํ…œ๋“ค์€ ํ™•๋ฅ ์ ์œผ๋กœ ์ƒํƒœ๋ฅผ ๊ฐ€์งˆ ์ˆ˜๋ฐ–์— ์—†๊ธฐ ๋•Œ๋ฌธ์ž…๋‹ˆ๋‹ค. ๋”ฐ๋ผ์„œ ์Šˆ๋ขฐ๋”ฉ๊ฑฐ ๋ฐฉ์ •์‹์€ ์–‘์ž์—ญํ•™์„ ์ˆ˜ํ•™์ ์œผ๋กœ ๋ชจ๋ธ๋งํ•˜๋Š” ํ•ต์‹ฌ์ ์ธ ๋„๊ตฌ ์ค‘ ํ•˜๋‚˜์ž…๋‹ˆ๋‹ค.\n\n์˜ˆ๋ฅผ ๋“ค์–ด, ์›์žํ•ต ๋‚ด์˜ ์ „์ž๋“ค์˜ ์ƒํƒœ๋Š” ์Šˆ๋ขฐ๋”ฉ๊ฑฐ ๋ฐฉ์ •์‹์— ์˜ํ•ด ๊ฒฐ์ •๋˜๋ฉฐ, ์ด๋Š” ๋ฌผ๋ฆฌํ•™์  ๋ฒ•์น™์„ ๋”ฐ๋ฅด๋Š” ๊ฒƒ์œผ๋กœ ๋ณด์ž…๋‹ˆ๋‹ค. ๋˜ํ•œ, ๊ด‘์ „ ํšจ๊ณผ์—์„œ๋„ ์Šˆ๋ขฐ๋”ฉ๊ฑฐ ๋ฐฉ์ •์‹์€ ๋น›์ด ๋ฌผ์งˆ ๋‚ด์—์„œ ์–ด๋–ป๊ฒŒ ํก์ˆ˜๋˜๊ณ  ๋ฐ˜์‚ฌ๋˜๋Š”์ง€๋ฅผ ์˜ˆ์ธกํ•˜๋Š”๋ฐ ์‚ฌ์šฉ๋ฉ๋‹ˆ๋‹ค.\n\n**3. ์‘์šฉ ๋ถ„์•ผ**\n\n์Šˆ๋ขฐ๋”ฉ๊ฑฐ ๋ฐฉ์ •์‹์€ ๋‹ค์–‘ํ•œ ๋ถ„์•ผ์—์„œ ํ™œ์šฉ๋˜๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค. ์˜ˆ๋ฅผ ๋“ค๋ฉด, ๋ฐ˜๋„์ฒด ๊ธฐ์ˆ ์—์„œ์˜ ํŠธ๋žœ์ง€์Šคํ„ฐ ์„ค๊ณ„, ํ•ต๋ฌผ๋ฆฌํ•™์—์„œ์˜ ๋ฐฉ์‚ฌ์„ฑ ๋ถ•๊ดด ์—ฐ๊ตฌ ๋“ฑ์ด ์žˆ์œผ๋ฉฐ, ์ด๋Š” ๋ชจ๋‘ ์Šˆ๋ขฐ๋”ฉ๊ฑฐ ๋ฐฉ์ •์‹์„ ๊ธฐ๋ฐ˜์œผ๋กœ ํ•œ ์ด๋ก ์  ๊ธฐ๋ฐ˜ ์œ„์—์„œ ์ด๋ฃจ์–ด์ง‘๋‹ˆ๋‹ค.\n\n๋˜ํ•œ, ํ˜„๋Œ€ ๊ณผํ•™ ๊ธฐ์ˆ ์˜ ๋ฐœ์ „์—๋„ ํฐ ๊ธฐ์—ฌ๋ฅผ ํ•˜๊ณ  ์žˆ๋Š”๋ฐ, ํŠนํžˆ ์ธ๊ณต์ง€๋Šฅ(AI), ์ปดํ“จํ„ฐ ์‹œ๋ฎฌ๋ ˆ์ด์…˜ ๋“ฑ์—์„œ ๋ณต์žกํ•œ ๋ฌธ์ œ๋ฅผ ํ•ด๊ฒฐํ•˜๊ณ  ์ƒˆ๋กœ์šด ์ง€์‹์„ ์ฐฝ์ถœํ•˜๊ธฐ ์œ„ํ•œ ๊ธฐ์ดˆ๊ฐ€ ๋˜๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค.\n\n๊ฒฐ๋ก ์ ์œผ๋กœ, ์Šˆ๋ขฐ๋”ฉ๊ฑฐ ๋ฐฉ์ •์‹์€ ์–‘์ž์—ญํ•™์˜ ๊ธฐ๋ณธ ๊ฐœ๋…๋“ค์„ ์ดํ•ดํ•˜๊ณ  ํ•ด์„ํ•˜๋ฉฐ, ๊ทธ ๊ฒฐ๊ณผ๋กœ์„œ ๋งŽ์€ ํ˜์‹ ์ ์ด๊ณ  ์‹ค์šฉ์ ์ธ ๊ธฐ์ˆ ์„ ๊ฐ€๋Šฅํ•˜๊ฒŒ ํ–ˆ์Šต๋‹ˆ๋‹ค. ์ด๋Š” ์–‘์ž์—ญํ•™์˜ ์ค‘์š”์„ฑ์„ ๋ณด์—ฌ์ฃผ๋Š” ๋Œ€ํ‘œ์ ์ธ ์˜ˆ์‹œ๋ผ๊ณ  ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.<|im_end|><|endofturn|>']
280
+ ```