Luigi commited on
Commit
53bb554
·
1 Parent(s): 510e0aa

feat: Reorder models by parameter count and move model dropdown in UI

Browse files

- Models now ordered by parameter count (0.1B → 30B) ascending
- Moved model dropdown to top level (below Output Language, above Upload File)
- Model selection now more prominent and accessible
- Removed duplicate enable_reasoning checkbox from Advanced Settings
- Added thinking headroom calculation for max_tokens

Files changed (1) hide show
  1. app.py +32 -27
app.py CHANGED
@@ -190,20 +190,6 @@ AVAILABLE_MODELS = {
190
  "repeat_penalty": 1.0,
191
  },
192
  },
193
- "qwen3_30b_thinking_q1": {
194
- "name": "Qwen3 30B Thinking (256K Context)",
195
- "repo_id": "unsloth/Qwen3-30B-A3B-Thinking-2507-GGUF",
196
- "filename": "*TQ1_0.gguf",
197
- "max_context": 262144,
198
- "default_temperature": 0.6,
199
- "supports_toggle": False, # Thinking-only mode
200
- "inference_settings": {
201
- "temperature": 0.6,
202
- "top_p": 0.95,
203
- "top_k": 20,
204
- "repeat_penalty": 1.0,
205
- },
206
- },
207
  "granite4_tiny_q3": {
208
  "name": "Granite 4.0 Tiny 7B (128K Context)",
209
  "repo_id": "unsloth/granite-4.0-h-tiny-GGUF",
@@ -246,6 +232,20 @@ AVAILABLE_MODELS = {
246
  "repeat_penalty": 1.05,
247
  },
248
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
  }
250
 
251
  DEFAULT_MODEL_KEY = "qwen3_600m_q4"
@@ -1019,6 +1019,24 @@ def create_interface():
1019
  info="Choose the target language for your summary"
1020
  )
1021
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1022
  gr.HTML('<div class="section-header" style="margin-top: 20px;"><span class="section-icon">📤</span> Upload File</div>')
1023
 
1024
  file_input = gr.File(
@@ -1030,19 +1048,6 @@ def create_interface():
1030
 
1031
  with gr.Accordion("⚙️ Advanced Settings", open=False):
1032
  with gr.Group(elem_classes=["advanced-settings"]):
1033
- model_dropdown = gr.Dropdown(
1034
- choices=[(info["name"], key) for key, info in AVAILABLE_MODELS.items()],
1035
- value=DEFAULT_MODEL_KEY,
1036
- label="Model",
1037
- info="Smaller = faster. Large files need models with bigger context."
1038
- )
1039
- enable_reasoning = gr.Checkbox(
1040
- value=True,
1041
- label="Enable Reasoning Mode",
1042
- info="Uses /think for deeper analysis (slower) or /no_think for direct output (faster). Only available for Qwen3 models.",
1043
- interactive=True,
1044
- visible=AVAILABLE_MODELS[DEFAULT_MODEL_KEY].get("supports_toggle", False)
1045
- )
1046
  temperature_slider = gr.Slider(
1047
  minimum=0.0,
1048
  maximum=2.0,
 
190
  "repeat_penalty": 1.0,
191
  },
192
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  "granite4_tiny_q3": {
194
  "name": "Granite 4.0 Tiny 7B (128K Context)",
195
  "repo_id": "unsloth/granite-4.0-h-tiny-GGUF",
 
232
  "repeat_penalty": 1.05,
233
  },
234
  },
235
+ "qwen3_30b_thinking_q1": {
236
+ "name": "Qwen3 30B Thinking (256K Context)",
237
+ "repo_id": "unsloth/Qwen3-30B-A3B-Thinking-2507-GGUF",
238
+ "filename": "*TQ1_0.gguf",
239
+ "max_context": 262144,
240
+ "default_temperature": 0.6,
241
+ "supports_toggle": False, # Thinking-only mode
242
+ "inference_settings": {
243
+ "temperature": 0.6,
244
+ "top_p": 0.95,
245
+ "top_k": 20,
246
+ "repeat_penalty": 1.0,
247
+ },
248
+ },
249
  }
250
 
251
  DEFAULT_MODEL_KEY = "qwen3_600m_q4"
 
1019
  info="Choose the target language for your summary"
1020
  )
1021
 
1022
+ # Model Selection - Moved to top level for easy access
1023
+ gr.HTML('<div class="section-header" style="margin-top: 20px;"><span class="section-icon">🤖</span> Model</div>')
1024
+
1025
+ model_dropdown = gr.Dropdown(
1026
+ choices=[(info["name"], key) for key, info in AVAILABLE_MODELS.items()],
1027
+ value=DEFAULT_MODEL_KEY,
1028
+ label="Select Model",
1029
+ info="Models ordered by size (0.6B to 30B). Smaller = faster. Large files need bigger context."
1030
+ )
1031
+
1032
+ enable_reasoning = gr.Checkbox(
1033
+ value=True,
1034
+ label="Enable Reasoning Mode",
1035
+ info="Uses /think for deeper analysis (slower) or /no_think for direct output (faster). Only available for Qwen3 models.",
1036
+ interactive=True,
1037
+ visible=AVAILABLE_MODELS[DEFAULT_MODEL_KEY].get("supports_toggle", False)
1038
+ )
1039
+
1040
  gr.HTML('<div class="section-header" style="margin-top: 20px;"><span class="section-icon">📤</span> Upload File</div>')
1041
 
1042
  file_input = gr.File(
 
1048
 
1049
  with gr.Accordion("⚙️ Advanced Settings", open=False):
1050
  with gr.Group(elem_classes=["advanced-settings"]):
 
 
 
 
 
 
 
 
 
 
 
 
 
1051
  temperature_slider = gr.Slider(
1052
  minimum=0.0,
1053
  maximum=2.0,