Spaces:
Running
Running
Reorder models by total parameter count (smallest to largest)
Browse filesCorrect order:
100M β 270M β 300M β 350M β 350M β 500M β 500M β 600M β 1.5B β 1.7B β 2.6B β 4B β 7B
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
app.py
CHANGED
|
@@ -30,19 +30,6 @@ MAX_USABLE_CTX = 32768
|
|
| 30 |
|
| 31 |
# Available models registry - ordered by parameter count (smallest to largest)
|
| 32 |
AVAILABLE_MODELS = {
|
| 33 |
-
"granite4_tiny_q3": {
|
| 34 |
-
"name": "Granite 4.0 Tiny 7B (128K Context)",
|
| 35 |
-
"repo_id": "unsloth/granite-4.0-h-tiny-GGUF",
|
| 36 |
-
"filename": "*Q3_K_M.gguf",
|
| 37 |
-
"max_context": 131072,
|
| 38 |
-
"supports_toggle": False,
|
| 39 |
-
"inference_settings": {
|
| 40 |
-
"temperature": 0.0,
|
| 41 |
-
"top_p": 1.0,
|
| 42 |
-
"top_k": 0,
|
| 43 |
-
"repeat_penalty": 1.1,
|
| 44 |
-
},
|
| 45 |
-
},
|
| 46 |
"falcon_h1_100m": {
|
| 47 |
"name": "Falcon-H1 100M",
|
| 48 |
"repo_id": "mradermacher/Falcon-H1-Tiny-Multilingual-100M-Instruct-GGUF",
|
|
@@ -152,19 +139,6 @@ AVAILABLE_MODELS = {
|
|
| 152 |
"repeat_penalty": 1.05,
|
| 153 |
},
|
| 154 |
},
|
| 155 |
-
"lfm2_2_6b_transcript": {
|
| 156 |
-
"name": "LFM2 2.6B Transcript",
|
| 157 |
-
"repo_id": "mradermacher/LFM2-2.6B-Transcript-GGUF",
|
| 158 |
-
"filename": "*Q4_K_M.gguf",
|
| 159 |
-
"max_context": 32768,
|
| 160 |
-
"supports_toggle": False,
|
| 161 |
-
"inference_settings": {
|
| 162 |
-
"temperature": 0.3,
|
| 163 |
-
"top_p": 0.9,
|
| 164 |
-
"top_k": 40,
|
| 165 |
-
"repeat_penalty": 1.1,
|
| 166 |
-
},
|
| 167 |
-
},
|
| 168 |
"qwen3_1.7b_q4": {
|
| 169 |
"name": "Qwen3 1.7B Q4",
|
| 170 |
"repo_id": "unsloth/Qwen3-1.7B-GGUF",
|
|
@@ -178,6 +152,19 @@ AVAILABLE_MODELS = {
|
|
| 178 |
"repeat_penalty": 1.05,
|
| 179 |
},
|
| 180 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
"qwen3_4b_thinking_q3": {
|
| 182 |
"name": "Qwen3 4B Thinking (256K Context)",
|
| 183 |
"repo_id": "unsloth/Qwen3-4B-Thinking-2507-GGUF",
|
|
@@ -191,6 +178,19 @@ AVAILABLE_MODELS = {
|
|
| 191 |
"repeat_penalty": 1.0,
|
| 192 |
},
|
| 193 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
}
|
| 195 |
|
| 196 |
DEFAULT_MODEL_KEY = "qwen3_600m_q4"
|
|
|
|
| 30 |
|
| 31 |
# Available models registry - ordered by parameter count (smallest to largest)
|
| 32 |
AVAILABLE_MODELS = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
"falcon_h1_100m": {
|
| 34 |
"name": "Falcon-H1 100M",
|
| 35 |
"repo_id": "mradermacher/Falcon-H1-Tiny-Multilingual-100M-Instruct-GGUF",
|
|
|
|
| 139 |
"repeat_penalty": 1.05,
|
| 140 |
},
|
| 141 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
"qwen3_1.7b_q4": {
|
| 143 |
"name": "Qwen3 1.7B Q4",
|
| 144 |
"repo_id": "unsloth/Qwen3-1.7B-GGUF",
|
|
|
|
| 152 |
"repeat_penalty": 1.05,
|
| 153 |
},
|
| 154 |
},
|
| 155 |
+
"lfm2_2_6b_transcript": {
|
| 156 |
+
"name": "LFM2 2.6B Transcript",
|
| 157 |
+
"repo_id": "mradermacher/LFM2-2.6B-Transcript-GGUF",
|
| 158 |
+
"filename": "*Q4_K_M.gguf",
|
| 159 |
+
"max_context": 32768,
|
| 160 |
+
"supports_toggle": False,
|
| 161 |
+
"inference_settings": {
|
| 162 |
+
"temperature": 0.3,
|
| 163 |
+
"top_p": 0.9,
|
| 164 |
+
"top_k": 40,
|
| 165 |
+
"repeat_penalty": 1.1,
|
| 166 |
+
},
|
| 167 |
+
},
|
| 168 |
"qwen3_4b_thinking_q3": {
|
| 169 |
"name": "Qwen3 4B Thinking (256K Context)",
|
| 170 |
"repo_id": "unsloth/Qwen3-4B-Thinking-2507-GGUF",
|
|
|
|
| 178 |
"repeat_penalty": 1.0,
|
| 179 |
},
|
| 180 |
},
|
| 181 |
+
"granite4_tiny_q3": {
|
| 182 |
+
"name": "Granite 4.0 Tiny 7B (128K Context)",
|
| 183 |
+
"repo_id": "unsloth/granite-4.0-h-tiny-GGUF",
|
| 184 |
+
"filename": "*Q3_K_M.gguf",
|
| 185 |
+
"max_context": 131072,
|
| 186 |
+
"supports_toggle": False,
|
| 187 |
+
"inference_settings": {
|
| 188 |
+
"temperature": 0.0,
|
| 189 |
+
"top_p": 1.0,
|
| 190 |
+
"top_k": 0,
|
| 191 |
+
"repeat_penalty": 1.1,
|
| 192 |
+
},
|
| 193 |
+
},
|
| 194 |
}
|
| 195 |
|
| 196 |
DEFAULT_MODEL_KEY = "qwen3_600m_q4"
|