Add 4-bit quantization for Qwen3-1.7B

#5
by Paulescu - opened
Files changed (1) hide show
  1. Qwen3-1.7B-GGUF/Q4_0.json +14 -0
Qwen3-1.7B-GGUF/Q4_0.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "inference_type": "llama.cpp/text-to-text",
3
+ "schema_version": "1.0.0",
4
+ "load_time_parameters": {
5
+ "model": "https://huggingface.co/unsloth/Qwen3-1.7B-GGUF/resolve/main/Qwen3-1.7B-Q4_0.gguf"
6
+ },
7
+ "generation_time_parameters": {
8
+ "sampling_parameters": {
9
+ "temperature": 0.7,
10
+ "top_p": 0.8,
11
+ "repetition_penalty": 1.5
12
+ }
13
+ }
14
+ }