Luigi commited on
Commit
bcda366
Β·
1 Parent(s): f98d497

Upgrade model precision to Q8_0 for better quality

Browse files

- Falcon-H1 100M: Q4_K_M β†’ Q8_0
- Gemma-3 270M: Q4_K_M β†’ Q8_0
- ERNIE-4.5 0.3B: Q4_K_M β†’ Q8_0
- Granite-4.0 350M: Q4_K_M β†’ Q8_0
- LFM2 350M: Q4_K_M β†’ Q8_0
- Hunyuan 0.5B: Q4_K_M β†’ Q8_0
- BitCPM4 0.5B: kept at q4_0 (highest available)
- Qwen3 0.6B: kept at Q4_K_M (original precision)

Higher precision = better output quality at cost of slightly larger downloads and slower inference

Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -33,31 +33,31 @@ AVAILABLE_MODELS = {
33
  "falcon_h1_100m": {
34
  "name": "Falcon-H1 100M",
35
  "repo_id": "mradermacher/Falcon-H1-Tiny-Multilingual-100M-Instruct-GGUF",
36
- "filename": "*Q4_K_M.gguf",
37
  "max_context": 32768,
38
  },
39
  "gemma3_270m": {
40
  "name": "Gemma-3 270M",
41
  "repo_id": "unsloth/gemma-3-270m-it-qat-GGUF",
42
- "filename": "*Q4_K_M.gguf",
43
  "max_context": 32768,
44
  },
45
  "ernie_300m": {
46
  "name": "ERNIE-4.5 0.3B (131K Context)",
47
  "repo_id": "unsloth/ERNIE-4.5-0.3B-PT-GGUF",
48
- "filename": "*Q4_K_M.gguf",
49
  "max_context": 131072,
50
  },
51
  "granite_350m": {
52
  "name": "Granite-4.0 350M",
53
  "repo_id": "unsloth/granite-4.0-h-350m-GGUF",
54
- "filename": "*Q4_K_M.gguf",
55
  "max_context": 32768,
56
  },
57
  "lfm2_350m": {
58
  "name": "LFM2 350M",
59
  "repo_id": "LiquidAI/LFM2-350M-GGUF",
60
- "filename": "*Q4_K_M.gguf",
61
  "max_context": 32768,
62
  },
63
  "bitcpm4_500m": {
@@ -69,7 +69,7 @@ AVAILABLE_MODELS = {
69
  "hunyuan_500m": {
70
  "name": "Hunyuan 0.5B (256K Context)",
71
  "repo_id": "mradermacher/Hunyuan-0.5B-Instruct-GGUF",
72
- "filename": "*Q4_K_M.gguf",
73
  "max_context": 262144,
74
  },
75
  "qwen3_600m_q4": {
 
33
  "falcon_h1_100m": {
34
  "name": "Falcon-H1 100M",
35
  "repo_id": "mradermacher/Falcon-H1-Tiny-Multilingual-100M-Instruct-GGUF",
36
+ "filename": "*Q8_0.gguf",
37
  "max_context": 32768,
38
  },
39
  "gemma3_270m": {
40
  "name": "Gemma-3 270M",
41
  "repo_id": "unsloth/gemma-3-270m-it-qat-GGUF",
42
+ "filename": "*Q8_0.gguf",
43
  "max_context": 32768,
44
  },
45
  "ernie_300m": {
46
  "name": "ERNIE-4.5 0.3B (131K Context)",
47
  "repo_id": "unsloth/ERNIE-4.5-0.3B-PT-GGUF",
48
+ "filename": "*Q8_0.gguf",
49
  "max_context": 131072,
50
  },
51
  "granite_350m": {
52
  "name": "Granite-4.0 350M",
53
  "repo_id": "unsloth/granite-4.0-h-350m-GGUF",
54
+ "filename": "*Q8_0.gguf",
55
  "max_context": 32768,
56
  },
57
  "lfm2_350m": {
58
  "name": "LFM2 350M",
59
  "repo_id": "LiquidAI/LFM2-350M-GGUF",
60
+ "filename": "*Q8_0.gguf",
61
  "max_context": 32768,
62
  },
63
  "bitcpm4_500m": {
 
69
  "hunyuan_500m": {
70
  "name": "Hunyuan 0.5B (256K Context)",
71
  "repo_id": "mradermacher/Hunyuan-0.5B-Instruct-GGUF",
72
+ "filename": "*Q8_0.gguf",
73
  "max_context": 262144,
74
  },
75
  "qwen3_600m_q4": {