Spaces:
Running
Running
feat: Add 4 new models (1B-3B range) with various quantizations
Browse files- Granite 3.1 1B-A400M (Q8_0, 1.18 GB) - MoE architecture
- Granite 3.3 2B (Q4_K_M, 1.55 GB) - IBM official
- Youtu-LLM 2B (Q8_0, 2.09 GB) - Tencent, toggle reasoning
- Granite 3.1 3B-A800M (Q4_K_M, 2.02 GB) - MoE architecture
All models:
- Ordered by parameter count (1B → 2B → 2B → 3B)
- Under 4GB limit for HF Spaces
- Community-recommended inference settings
app.py
CHANGED
|
@@ -122,9 +122,9 @@ AVAILABLE_MODELS = {
|
|
| 122 |
},
|
| 123 |
},
|
| 124 |
"qwen3_600m_q4": {
|
| 125 |
-
"name": "Qwen3 0.6B Q4 (
|
| 126 |
"repo_id": "unsloth/Qwen3-0.6B-GGUF",
|
| 127 |
-
"filename": "*
|
| 128 |
"max_context": 32768,
|
| 129 |
"default_temperature": 0.6,
|
| 130 |
"supports_toggle": True,
|
|
@@ -132,7 +132,21 @@ AVAILABLE_MODELS = {
|
|
| 132 |
"temperature": 0.6,
|
| 133 |
"top_p": 0.95,
|
| 134 |
"top_k": 20,
|
| 135 |
-
"repeat_penalty": 1.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
},
|
| 137 |
},
|
| 138 |
"falcon_h1_1.5b_q4": {
|
|
@@ -149,9 +163,9 @@ AVAILABLE_MODELS = {
|
|
| 149 |
},
|
| 150 |
},
|
| 151 |
"qwen3_1.7b_q4": {
|
| 152 |
-
"name": "Qwen3 1.7B Q4",
|
| 153 |
"repo_id": "unsloth/Qwen3-1.7B-GGUF",
|
| 154 |
-
"filename": "*
|
| 155 |
"max_context": 32768,
|
| 156 |
"default_temperature": 0.6,
|
| 157 |
"supports_toggle": True,
|
|
@@ -159,18 +173,60 @@ AVAILABLE_MODELS = {
|
|
| 159 |
"temperature": 0.6,
|
| 160 |
"top_p": 0.95,
|
| 161 |
"top_k": 20,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
"repeat_penalty": 1.05,
|
| 163 |
},
|
| 164 |
},
|
| 165 |
"lfm2_2_6b_transcript": {
|
| 166 |
-
"name": "LFM2 2.6B Transcript",
|
| 167 |
-
"repo_id": "
|
| 168 |
-
"filename": "*
|
| 169 |
-
"max_context":
|
| 170 |
"default_temperature": 0.6,
|
| 171 |
"supports_toggle": False,
|
| 172 |
"inference_settings": {
|
| 173 |
-
"temperature": 0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
"top_p": 0.9,
|
| 175 |
"top_k": 40,
|
| 176 |
"repeat_penalty": 1.1,
|
|
|
|
| 122 |
},
|
| 123 |
},
|
| 124 |
"qwen3_600m_q4": {
|
| 125 |
+
"name": "Qwen3 0.6B Q4 (32K Context)",
|
| 126 |
"repo_id": "unsloth/Qwen3-0.6B-GGUF",
|
| 127 |
+
"filename": "*Q4_0.gguf",
|
| 128 |
"max_context": 32768,
|
| 129 |
"default_temperature": 0.6,
|
| 130 |
"supports_toggle": True,
|
|
|
|
| 132 |
"temperature": 0.6,
|
| 133 |
"top_p": 0.95,
|
| 134 |
"top_k": 20,
|
| 135 |
+
"repeat_penalty": 1.0,
|
| 136 |
+
},
|
| 137 |
+
},
|
| 138 |
+
"granite_3_1_1b_q8": {
|
| 139 |
+
"name": "Granite 3.1 1B-A400M Instruct (128K Context)",
|
| 140 |
+
"repo_id": "bartowski/granite-3.1-1b-a400m-instruct-GGUF",
|
| 141 |
+
"filename": "*Q8_0.gguf",
|
| 142 |
+
"max_context": 131072,
|
| 143 |
+
"default_temperature": 0.7,
|
| 144 |
+
"supports_toggle": False,
|
| 145 |
+
"inference_settings": {
|
| 146 |
+
"temperature": 0.7,
|
| 147 |
+
"top_p": 0.9,
|
| 148 |
+
"top_k": 40,
|
| 149 |
+
"repeat_penalty": 1.1,
|
| 150 |
},
|
| 151 |
},
|
| 152 |
"falcon_h1_1.5b_q4": {
|
|
|
|
| 163 |
},
|
| 164 |
},
|
| 165 |
"qwen3_1.7b_q4": {
|
| 166 |
+
"name": "Qwen3 1.7B Q4 (32K Context)",
|
| 167 |
"repo_id": "unsloth/Qwen3-1.7B-GGUF",
|
| 168 |
+
"filename": "*Q4_0.gguf",
|
| 169 |
"max_context": 32768,
|
| 170 |
"default_temperature": 0.6,
|
| 171 |
"supports_toggle": True,
|
|
|
|
| 173 |
"temperature": 0.6,
|
| 174 |
"top_p": 0.95,
|
| 175 |
"top_k": 20,
|
| 176 |
+
"repeat_penalty": 1.0,
|
| 177 |
+
},
|
| 178 |
+
},
|
| 179 |
+
"granite_3_3_2b_q4": {
|
| 180 |
+
"name": "Granite 3.3 2B Instruct (128K Context)",
|
| 181 |
+
"repo_id": "ibm-granite/granite-3.3-2b-instruct-GGUF",
|
| 182 |
+
"filename": "*Q4_K_M.gguf",
|
| 183 |
+
"max_context": 131072,
|
| 184 |
+
"default_temperature": 0.7,
|
| 185 |
+
"supports_toggle": False,
|
| 186 |
+
"inference_settings": {
|
| 187 |
+
"temperature": 0.7,
|
| 188 |
+
"top_p": 0.9,
|
| 189 |
+
"top_k": 40,
|
| 190 |
+
"repeat_penalty": 1.1,
|
| 191 |
+
},
|
| 192 |
+
},
|
| 193 |
+
"youtu_llm_2b_q8": {
|
| 194 |
+
"name": "Youtu-LLM 2B (128K Context)",
|
| 195 |
+
"repo_id": "tencent/Youtu-LLM-2B-GGUF",
|
| 196 |
+
"filename": "*Q8_0.gguf",
|
| 197 |
+
"max_context": 131072,
|
| 198 |
+
"default_temperature": 0.7,
|
| 199 |
+
"supports_toggle": True,
|
| 200 |
+
"inference_settings": {
|
| 201 |
+
"temperature": 0.7,
|
| 202 |
+
"top_p": 0.8,
|
| 203 |
+
"top_k": 20,
|
| 204 |
"repeat_penalty": 1.05,
|
| 205 |
},
|
| 206 |
},
|
| 207 |
"lfm2_2_6b_transcript": {
|
| 208 |
+
"name": "LFM2 2.6B Transcript (8K Context)",
|
| 209 |
+
"repo_id": "LiquidAI/LFM-2.6B-Transcript-GGUF",
|
| 210 |
+
"filename": "*Q4_0.gguf",
|
| 211 |
+
"max_context": 8192,
|
| 212 |
"default_temperature": 0.6,
|
| 213 |
"supports_toggle": False,
|
| 214 |
"inference_settings": {
|
| 215 |
+
"temperature": 0.6,
|
| 216 |
+
"top_p": 0.95,
|
| 217 |
+
"top_k": 20,
|
| 218 |
+
"repeat_penalty": 1.1,
|
| 219 |
+
},
|
| 220 |
+
},
|
| 221 |
+
"granite_3_1_3b_q4": {
|
| 222 |
+
"name": "Granite 3.1 3B-A800M Instruct (128K Context)",
|
| 223 |
+
"repo_id": "bartowski/granite-3.1-3b-a800m-instruct-GGUF",
|
| 224 |
+
"filename": "*Q4_K_M.gguf",
|
| 225 |
+
"max_context": 131072,
|
| 226 |
+
"default_temperature": 0.7,
|
| 227 |
+
"supports_toggle": False,
|
| 228 |
+
"inference_settings": {
|
| 229 |
+
"temperature": 0.7,
|
| 230 |
"top_p": 0.9,
|
| 231 |
"top_k": 40,
|
| 232 |
"repeat_penalty": 1.1,
|