Luigi commited on
Commit
70126c5
·
1 Parent(s): cc9a1a9

feat: Add 4 new models (1B-3B range) with various quantizations

Browse files

- Granite 3.1 1B-A400M (Q8_0, 1.18 GB) - MoE architecture
- Granite 3.3 2B (Q4_K_M, 1.55 GB) - IBM official
- Youtu-LLM 2B (Q8_0, 2.09 GB) - Tencent, toggle reasoning
- Granite 3.1 3B-A800M (Q4_K_M, 2.02 GB) - MoE architecture

All models:
- Ordered by parameter count (1B → 2B → 2B → 3B)
- Under 4GB limit for HF Spaces
- Community-recommended inference settings

Files changed (1) hide show
  1. app.py +66 -10
app.py CHANGED
@@ -122,9 +122,9 @@ AVAILABLE_MODELS = {
122
  },
123
  },
124
  "qwen3_600m_q4": {
125
- "name": "Qwen3 0.6B Q4 (Default)",
126
  "repo_id": "unsloth/Qwen3-0.6B-GGUF",
127
- "filename": "*Q4_K_M.gguf",
128
  "max_context": 32768,
129
  "default_temperature": 0.6,
130
  "supports_toggle": True,
@@ -132,7 +132,21 @@ AVAILABLE_MODELS = {
132
  "temperature": 0.6,
133
  "top_p": 0.95,
134
  "top_k": 20,
135
- "repeat_penalty": 1.05,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  },
137
  },
138
  "falcon_h1_1.5b_q4": {
@@ -149,9 +163,9 @@ AVAILABLE_MODELS = {
149
  },
150
  },
151
  "qwen3_1.7b_q4": {
152
- "name": "Qwen3 1.7B Q4",
153
  "repo_id": "unsloth/Qwen3-1.7B-GGUF",
154
- "filename": "*Q4_K_M.gguf",
155
  "max_context": 32768,
156
  "default_temperature": 0.6,
157
  "supports_toggle": True,
@@ -159,18 +173,60 @@ AVAILABLE_MODELS = {
159
  "temperature": 0.6,
160
  "top_p": 0.95,
161
  "top_k": 20,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  "repeat_penalty": 1.05,
163
  },
164
  },
165
  "lfm2_2_6b_transcript": {
166
- "name": "LFM2 2.6B Transcript",
167
- "repo_id": "mradermacher/LFM2-2.6B-Transcript-GGUF",
168
- "filename": "*Q4_K_M.gguf",
169
- "max_context": 32768,
170
  "default_temperature": 0.6,
171
  "supports_toggle": False,
172
  "inference_settings": {
173
- "temperature": 0.3,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  "top_p": 0.9,
175
  "top_k": 40,
176
  "repeat_penalty": 1.1,
 
122
  },
123
  },
124
  "qwen3_600m_q4": {
125
+ "name": "Qwen3 0.6B Q4 (32K Context)",
126
  "repo_id": "unsloth/Qwen3-0.6B-GGUF",
127
+ "filename": "*Q4_0.gguf",
128
  "max_context": 32768,
129
  "default_temperature": 0.6,
130
  "supports_toggle": True,
 
132
  "temperature": 0.6,
133
  "top_p": 0.95,
134
  "top_k": 20,
135
+ "repeat_penalty": 1.0,
136
+ },
137
+ },
138
+ "granite_3_1_1b_q8": {
139
+ "name": "Granite 3.1 1B-A400M Instruct (128K Context)",
140
+ "repo_id": "bartowski/granite-3.1-1b-a400m-instruct-GGUF",
141
+ "filename": "*Q8_0.gguf",
142
+ "max_context": 131072,
143
+ "default_temperature": 0.7,
144
+ "supports_toggle": False,
145
+ "inference_settings": {
146
+ "temperature": 0.7,
147
+ "top_p": 0.9,
148
+ "top_k": 40,
149
+ "repeat_penalty": 1.1,
150
  },
151
  },
152
  "falcon_h1_1.5b_q4": {
 
163
  },
164
  },
165
  "qwen3_1.7b_q4": {
166
+ "name": "Qwen3 1.7B Q4 (32K Context)",
167
  "repo_id": "unsloth/Qwen3-1.7B-GGUF",
168
+ "filename": "*Q4_0.gguf",
169
  "max_context": 32768,
170
  "default_temperature": 0.6,
171
  "supports_toggle": True,
 
173
  "temperature": 0.6,
174
  "top_p": 0.95,
175
  "top_k": 20,
176
+ "repeat_penalty": 1.0,
177
+ },
178
+ },
179
+ "granite_3_3_2b_q4": {
180
+ "name": "Granite 3.3 2B Instruct (128K Context)",
181
+ "repo_id": "ibm-granite/granite-3.3-2b-instruct-GGUF",
182
+ "filename": "*Q4_K_M.gguf",
183
+ "max_context": 131072,
184
+ "default_temperature": 0.7,
185
+ "supports_toggle": False,
186
+ "inference_settings": {
187
+ "temperature": 0.7,
188
+ "top_p": 0.9,
189
+ "top_k": 40,
190
+ "repeat_penalty": 1.1,
191
+ },
192
+ },
193
+ "youtu_llm_2b_q8": {
194
+ "name": "Youtu-LLM 2B (128K Context)",
195
+ "repo_id": "tencent/Youtu-LLM-2B-GGUF",
196
+ "filename": "*Q8_0.gguf",
197
+ "max_context": 131072,
198
+ "default_temperature": 0.7,
199
+ "supports_toggle": True,
200
+ "inference_settings": {
201
+ "temperature": 0.7,
202
+ "top_p": 0.8,
203
+ "top_k": 20,
204
  "repeat_penalty": 1.05,
205
  },
206
  },
207
  "lfm2_2_6b_transcript": {
208
+ "name": "LFM2 2.6B Transcript (8K Context)",
209
+ "repo_id": "LiquidAI/LFM-2.6B-Transcript-GGUF",
210
+ "filename": "*Q4_0.gguf",
211
+ "max_context": 8192,
212
  "default_temperature": 0.6,
213
  "supports_toggle": False,
214
  "inference_settings": {
215
+ "temperature": 0.6,
216
+ "top_p": 0.95,
217
+ "top_k": 20,
218
+ "repeat_penalty": 1.1,
219
+ },
220
+ },
221
+ "granite_3_1_3b_q4": {
222
+ "name": "Granite 3.1 3B-A800M Instruct (128K Context)",
223
+ "repo_id": "bartowski/granite-3.1-3b-a800m-instruct-GGUF",
224
+ "filename": "*Q4_K_M.gguf",
225
+ "max_context": 131072,
226
+ "default_temperature": 0.7,
227
+ "supports_toggle": False,
228
+ "inference_settings": {
229
+ "temperature": 0.7,
230
  "top_p": 0.9,
231
  "top_k": 40,
232
  "repeat_penalty": 1.1,