Add 4-bit quantization for Qwen3-1.7B
#5
by
Paulescu
- opened
- Qwen3-1.7B-GGUF/Q4_0.json +14 -0
Qwen3-1.7B-GGUF/Q4_0.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"inference_type": "llama.cpp/text-to-text",
|
| 3 |
+
"schema_version": "1.0.0",
|
| 4 |
+
"load_time_parameters": {
|
| 5 |
+
"model": "https://huggingface.co/unsloth/Qwen3-1.7B-GGUF/resolve/main/Qwen3-1.7B-Q4_0.gguf"
|
| 6 |
+
},
|
| 7 |
+
"generation_time_parameters": {
|
| 8 |
+
"sampling_parameters": {
|
| 9 |
+
"temperature": 0.7,
|
| 10 |
+
"top_p": 0.8,
|
| 11 |
+
"repetition_penalty": 1.5
|
| 12 |
+
}
|
| 13 |
+
}
|
| 14 |
+
}
|