Spaces:
Running
Running
Upgrade model precision to Q8_0 for better quality
Browse files- Falcon-H1 100M: Q4_K_M β Q8_0
- Gemma-3 270M: Q4_K_M β Q8_0
- ERNIE-4.5 0.3B: Q4_K_M β Q8_0
- Granite-4.0 350M: Q4_K_M β Q8_0
- LFM2 350M: Q4_K_M β Q8_0
- Hunyuan 0.5B: Q4_K_M β Q8_0
- BitCPM4 0.5B: kept at q4_0 (highest available)
- Qwen3 0.6B: kept at Q4_K_M (original precision)
Higher precision = better output quality at cost of slightly larger downloads and slower inference
app.py
CHANGED
|
@@ -33,31 +33,31 @@ AVAILABLE_MODELS = {
|
|
| 33 |
"falcon_h1_100m": {
|
| 34 |
"name": "Falcon-H1 100M",
|
| 35 |
"repo_id": "mradermacher/Falcon-H1-Tiny-Multilingual-100M-Instruct-GGUF",
|
| 36 |
-
"filename": "*
|
| 37 |
"max_context": 32768,
|
| 38 |
},
|
| 39 |
"gemma3_270m": {
|
| 40 |
"name": "Gemma-3 270M",
|
| 41 |
"repo_id": "unsloth/gemma-3-270m-it-qat-GGUF",
|
| 42 |
-
"filename": "*
|
| 43 |
"max_context": 32768,
|
| 44 |
},
|
| 45 |
"ernie_300m": {
|
| 46 |
"name": "ERNIE-4.5 0.3B (131K Context)",
|
| 47 |
"repo_id": "unsloth/ERNIE-4.5-0.3B-PT-GGUF",
|
| 48 |
-
"filename": "*
|
| 49 |
"max_context": 131072,
|
| 50 |
},
|
| 51 |
"granite_350m": {
|
| 52 |
"name": "Granite-4.0 350M",
|
| 53 |
"repo_id": "unsloth/granite-4.0-h-350m-GGUF",
|
| 54 |
-
"filename": "*
|
| 55 |
"max_context": 32768,
|
| 56 |
},
|
| 57 |
"lfm2_350m": {
|
| 58 |
"name": "LFM2 350M",
|
| 59 |
"repo_id": "LiquidAI/LFM2-350M-GGUF",
|
| 60 |
-
"filename": "*
|
| 61 |
"max_context": 32768,
|
| 62 |
},
|
| 63 |
"bitcpm4_500m": {
|
|
@@ -69,7 +69,7 @@ AVAILABLE_MODELS = {
|
|
| 69 |
"hunyuan_500m": {
|
| 70 |
"name": "Hunyuan 0.5B (256K Context)",
|
| 71 |
"repo_id": "mradermacher/Hunyuan-0.5B-Instruct-GGUF",
|
| 72 |
-
"filename": "*
|
| 73 |
"max_context": 262144,
|
| 74 |
},
|
| 75 |
"qwen3_600m_q4": {
|
|
|
|
| 33 |
"falcon_h1_100m": {
|
| 34 |
"name": "Falcon-H1 100M",
|
| 35 |
"repo_id": "mradermacher/Falcon-H1-Tiny-Multilingual-100M-Instruct-GGUF",
|
| 36 |
+
"filename": "*Q8_0.gguf",
|
| 37 |
"max_context": 32768,
|
| 38 |
},
|
| 39 |
"gemma3_270m": {
|
| 40 |
"name": "Gemma-3 270M",
|
| 41 |
"repo_id": "unsloth/gemma-3-270m-it-qat-GGUF",
|
| 42 |
+
"filename": "*Q8_0.gguf",
|
| 43 |
"max_context": 32768,
|
| 44 |
},
|
| 45 |
"ernie_300m": {
|
| 46 |
"name": "ERNIE-4.5 0.3B (131K Context)",
|
| 47 |
"repo_id": "unsloth/ERNIE-4.5-0.3B-PT-GGUF",
|
| 48 |
+
"filename": "*Q8_0.gguf",
|
| 49 |
"max_context": 131072,
|
| 50 |
},
|
| 51 |
"granite_350m": {
|
| 52 |
"name": "Granite-4.0 350M",
|
| 53 |
"repo_id": "unsloth/granite-4.0-h-350m-GGUF",
|
| 54 |
+
"filename": "*Q8_0.gguf",
|
| 55 |
"max_context": 32768,
|
| 56 |
},
|
| 57 |
"lfm2_350m": {
|
| 58 |
"name": "LFM2 350M",
|
| 59 |
"repo_id": "LiquidAI/LFM2-350M-GGUF",
|
| 60 |
+
"filename": "*Q8_0.gguf",
|
| 61 |
"max_context": 32768,
|
| 62 |
},
|
| 63 |
"bitcpm4_500m": {
|
|
|
|
| 69 |
"hunyuan_500m": {
|
| 70 |
"name": "Hunyuan 0.5B (256K Context)",
|
| 71 |
"repo_id": "mradermacher/Hunyuan-0.5B-Instruct-GGUF",
|
| 72 |
+
"filename": "*Q8_0.gguf",
|
| 73 |
"max_context": 262144,
|
| 74 |
},
|
| 75 |
"qwen3_600m_q4": {
|