Spaces:
Running
Running
feat: Reorder models by parameter count and move model dropdown in UI
Browse files- Models now ordered by parameter count (0.1B → 30B) ascending
- Moved model dropdown to top level (below Output Language, above Upload File)
- Model selection now more prominent and accessible
- Removed duplicate enable_reasoning checkbox from Advanced Settings
- Added thinking headroom calculation for max_tokens
app.py
CHANGED
|
@@ -190,20 +190,6 @@ AVAILABLE_MODELS = {
|
|
| 190 |
"repeat_penalty": 1.0,
|
| 191 |
},
|
| 192 |
},
|
| 193 |
-
"qwen3_30b_thinking_q1": {
|
| 194 |
-
"name": "Qwen3 30B Thinking (256K Context)",
|
| 195 |
-
"repo_id": "unsloth/Qwen3-30B-A3B-Thinking-2507-GGUF",
|
| 196 |
-
"filename": "*TQ1_0.gguf",
|
| 197 |
-
"max_context": 262144,
|
| 198 |
-
"default_temperature": 0.6,
|
| 199 |
-
"supports_toggle": False, # Thinking-only mode
|
| 200 |
-
"inference_settings": {
|
| 201 |
-
"temperature": 0.6,
|
| 202 |
-
"top_p": 0.95,
|
| 203 |
-
"top_k": 20,
|
| 204 |
-
"repeat_penalty": 1.0,
|
| 205 |
-
},
|
| 206 |
-
},
|
| 207 |
"granite4_tiny_q3": {
|
| 208 |
"name": "Granite 4.0 Tiny 7B (128K Context)",
|
| 209 |
"repo_id": "unsloth/granite-4.0-h-tiny-GGUF",
|
|
@@ -246,6 +232,20 @@ AVAILABLE_MODELS = {
|
|
| 246 |
"repeat_penalty": 1.05,
|
| 247 |
},
|
| 248 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
}
|
| 250 |
|
| 251 |
DEFAULT_MODEL_KEY = "qwen3_600m_q4"
|
|
@@ -1019,6 +1019,24 @@ def create_interface():
|
|
| 1019 |
info="Choose the target language for your summary"
|
| 1020 |
)
|
| 1021 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1022 |
gr.HTML('<div class="section-header" style="margin-top: 20px;"><span class="section-icon">📤</span> Upload File</div>')
|
| 1023 |
|
| 1024 |
file_input = gr.File(
|
|
@@ -1030,19 +1048,6 @@ def create_interface():
|
|
| 1030 |
|
| 1031 |
with gr.Accordion("⚙️ Advanced Settings", open=False):
|
| 1032 |
with gr.Group(elem_classes=["advanced-settings"]):
|
| 1033 |
-
model_dropdown = gr.Dropdown(
|
| 1034 |
-
choices=[(info["name"], key) for key, info in AVAILABLE_MODELS.items()],
|
| 1035 |
-
value=DEFAULT_MODEL_KEY,
|
| 1036 |
-
label="Model",
|
| 1037 |
-
info="Smaller = faster. Large files need models with bigger context."
|
| 1038 |
-
)
|
| 1039 |
-
enable_reasoning = gr.Checkbox(
|
| 1040 |
-
value=True,
|
| 1041 |
-
label="Enable Reasoning Mode",
|
| 1042 |
-
info="Uses /think for deeper analysis (slower) or /no_think for direct output (faster). Only available for Qwen3 models.",
|
| 1043 |
-
interactive=True,
|
| 1044 |
-
visible=AVAILABLE_MODELS[DEFAULT_MODEL_KEY].get("supports_toggle", False)
|
| 1045 |
-
)
|
| 1046 |
temperature_slider = gr.Slider(
|
| 1047 |
minimum=0.0,
|
| 1048 |
maximum=2.0,
|
|
|
|
| 190 |
"repeat_penalty": 1.0,
|
| 191 |
},
|
| 192 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
"granite4_tiny_q3": {
|
| 194 |
"name": "Granite 4.0 Tiny 7B (128K Context)",
|
| 195 |
"repo_id": "unsloth/granite-4.0-h-tiny-GGUF",
|
|
|
|
| 232 |
"repeat_penalty": 1.05,
|
| 233 |
},
|
| 234 |
},
|
| 235 |
+
"qwen3_30b_thinking_q1": {
|
| 236 |
+
"name": "Qwen3 30B Thinking (256K Context)",
|
| 237 |
+
"repo_id": "unsloth/Qwen3-30B-A3B-Thinking-2507-GGUF",
|
| 238 |
+
"filename": "*TQ1_0.gguf",
|
| 239 |
+
"max_context": 262144,
|
| 240 |
+
"default_temperature": 0.6,
|
| 241 |
+
"supports_toggle": False, # Thinking-only mode
|
| 242 |
+
"inference_settings": {
|
| 243 |
+
"temperature": 0.6,
|
| 244 |
+
"top_p": 0.95,
|
| 245 |
+
"top_k": 20,
|
| 246 |
+
"repeat_penalty": 1.0,
|
| 247 |
+
},
|
| 248 |
+
},
|
| 249 |
}
|
| 250 |
|
| 251 |
DEFAULT_MODEL_KEY = "qwen3_600m_q4"
|
|
|
|
| 1019 |
info="Choose the target language for your summary"
|
| 1020 |
)
|
| 1021 |
|
| 1022 |
+
# Model Selection - Moved to top level for easy access
|
| 1023 |
+
gr.HTML('<div class="section-header" style="margin-top: 20px;"><span class="section-icon">🤖</span> Model</div>')
|
| 1024 |
+
|
| 1025 |
+
model_dropdown = gr.Dropdown(
|
| 1026 |
+
choices=[(info["name"], key) for key, info in AVAILABLE_MODELS.items()],
|
| 1027 |
+
value=DEFAULT_MODEL_KEY,
|
| 1028 |
+
label="Select Model",
|
| 1029 |
+
info="Models ordered by size (0.6B to 30B). Smaller = faster. Large files need bigger context."
|
| 1030 |
+
)
|
| 1031 |
+
|
| 1032 |
+
enable_reasoning = gr.Checkbox(
|
| 1033 |
+
value=True,
|
| 1034 |
+
label="Enable Reasoning Mode",
|
| 1035 |
+
info="Uses /think for deeper analysis (slower) or /no_think for direct output (faster). Only available for Qwen3 models.",
|
| 1036 |
+
interactive=True,
|
| 1037 |
+
visible=AVAILABLE_MODELS[DEFAULT_MODEL_KEY].get("supports_toggle", False)
|
| 1038 |
+
)
|
| 1039 |
+
|
| 1040 |
gr.HTML('<div class="section-header" style="margin-top: 20px;"><span class="section-icon">📤</span> Upload File</div>')
|
| 1041 |
|
| 1042 |
file_input = gr.File(
|
|
|
|
| 1048 |
|
| 1049 |
with gr.Accordion("⚙️ Advanced Settings", open=False):
|
| 1050 |
with gr.Group(elem_classes=["advanced-settings"]):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1051 |
temperature_slider = gr.Slider(
|
| 1052 |
minimum=0.0,
|
| 1053 |
maximum=2.0,
|