Claw Web commited on
Commit
04f258d
·
1 Parent(s): 4d96cf9

feat: DeepInfra Qwen3-Coder-480B-Turbo + provider UI + thinking mode + 30+ model aliases

Browse files

- Default model: Qwen3-Coder-480B-A35B-Instruct-Turbo (comparable to Claude Sonnet)
- Added DeepInfra as provider with API key/base URL fields in Settings
- Added 30+ model aliases (qwen-coder, hermes, nemotron, step, llama-4, etc.)
- Added MODEL_CONTEXT_WINDOWS for all DeepInfra models
- Added reasoning_content handling for thinking models (Qwen3 Thinking, DeepSeek-R1)
- Dynamic max_tokens: 32k for HuggingFace, 65k for DeepInfra
- Quick Select updated: Qwen Coder 480B, Hermes 70B, Step Flash, Nemotron, Llama 4
- Provider selector: DeepInfra, HuggingFace, OpenRouter, OpenAI, Groq, Ollama
- All previous fixes verified intact

client/src/components/SettingsPanel.tsx CHANGED
@@ -178,7 +178,7 @@ export function SettingsPanel({ open, onClose }: SettingsPanelProps) {
178
  className="font-mono text-sm"
179
  />
180
  <p className="text-[10px] text-muted-foreground mt-1">
181
- Use aliases (mimo, llama, deepseek, qwen) or full model IDs. Default: MiMo-V2-Flash
182
  </p>
183
  </div>
184
 
@@ -188,12 +188,14 @@ export function SettingsPanel({ open, onClose }: SettingsPanelProps) {
188
  </label>
189
  <div className="grid grid-cols-2 gap-1.5">
190
  {[
191
- { id: "XiaomiMiMo/MiMo-V2-Flash", label: "MiMo Flash" },
192
- { id: "Qwen/Qwen3-8B", label: "Qwen3 8B" },
193
- { id: "meta-llama/Llama-3.3-70B-Instruct", label: "Llama 70B" },
194
- { id: "deepseek-ai/DeepSeek-V3.2", label: "DeepSeek V3" },
195
- { id: "deepseek-ai/DeepSeek-R1", label: "DeepSeek R1" },
196
- { id: "Qwen/Qwen3-Coder-30B-A3B-Instruct", label: "Qwen Coder" },
 
 
197
  ].map((m) => (
198
  <button
199
  key={m.id}
@@ -222,6 +224,91 @@ export function SettingsPanel({ open, onClose }: SettingsPanelProps) {
222
  <Save className="size-3.5 mr-1.5" />
223
  Save Model
224
  </Button>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  </div>
226
  )}
227
 
 
178
  className="font-mono text-sm"
179
  />
180
  <p className="text-[10px] text-muted-foreground mt-1">
181
+ Use aliases (qwen-coder, deepseek, hermes, llama) or full model IDs. Default: Qwen3-Coder-480B-Turbo
182
  </p>
183
  </div>
184
 
 
188
  </label>
189
  <div className="grid grid-cols-2 gap-1.5">
190
  {[
191
+ { id: "Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo", label: "\u2B50 Qwen Coder 480B" },
192
+ { id: "Qwen/Qwen3-235B-A22B-Instruct-2507", label: "Qwen3 235B" },
193
+ { id: "deepseek-ai/DeepSeek-V3.2", label: "DeepSeek V3.2" },
194
+ { id: "NousResearch/Hermes-3-Llama-3.1-70B", label: "\uD83D\uDD13 Hermes 70B" },
195
+ { id: "stepfun-ai/Step-3.5-Flash", label: "Step 3.5 Flash" },
196
+ { id: "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B", label: "Nemotron 120B" },
197
+ { id: "meta-llama/Llama-4-Maverick-17B-128E", label: "Llama 4 1M ctx" },
198
+ { id: "Qwen/Qwen3.5-397B-A17B", label: "Qwen3.5 397B" },
199
  ].map((m) => (
200
  <button
201
  key={m.id}
 
224
  <Save className="size-3.5 mr-1.5" />
225
  Save Model
226
  </Button>
227
+
228
+ {/* API Provider */}
229
+ <div className="pt-3 border-t border-border">
230
+ <label className="text-xs font-medium text-muted-foreground mb-1.5 block">
231
+ API Provider
232
+ </label>
233
+ <div className="grid grid-cols-3 gap-1.5">
234
+ {[
235
+ { id: "deepinfra", label: "DeepInfra" },
236
+ { id: "huggingface", label: "HuggingFace" },
237
+ { id: "openrouter", label: "OpenRouter" },
238
+ { id: "openai", label: "OpenAI" },
239
+ { id: "groq", label: "Groq" },
240
+ { id: "ollama", label: "Ollama" },
241
+ ].map((p) => (
242
+ <button
243
+ key={p.id}
244
+ onClick={() => {
245
+ setFormState((s) => ({ ...s, apiProvider: p.id }));
246
+ saveField("apiProvider", p.id);
247
+ }}
248
+ className={cn(
249
+ "text-xs px-2 py-1.5 rounded-md border transition-colors",
250
+ formState.apiProvider === p.id
251
+ ? "border-primary bg-primary/10 text-primary"
252
+ : "border-border hover:border-primary/50"
253
+ )}
254
+ >
255
+ {p.label}
256
+ </button>
257
+ ))}
258
+ </div>
259
+ </div>
260
+
261
+ {/* API Key */}
262
+ <div>
263
+ <label className="text-xs font-medium text-muted-foreground mb-1.5 block">
264
+ API Key
265
+ </label>
266
+ <Input
267
+ type="password"
268
+ value={formState.apiKey || ""}
269
+ onChange={(e) =>
270
+ setFormState((s) => ({ ...s, apiKey: e.target.value }))
271
+ }
272
+ placeholder="Enter your API key"
273
+ className="font-mono text-sm"
274
+ />
275
+ <Button
276
+ size="sm"
277
+ className="mt-1.5"
278
+ onClick={() => saveField("apiKey", formState.apiKey)}
279
+ disabled={updateSettings.isPending}
280
+ >
281
+ <Save className="size-3.5 mr-1.5" />
282
+ Save Key
283
+ </Button>
284
+ </div>
285
+
286
+ {/* API Base URL (optional) */}
287
+ <div>
288
+ <label className="text-xs font-medium text-muted-foreground mb-1.5 block">
289
+ API Base URL (optional)
290
+ </label>
291
+ <Input
292
+ value={formState.apiBaseUrl || ""}
293
+ onChange={(e) =>
294
+ setFormState((s) => ({ ...s, apiBaseUrl: e.target.value }))
295
+ }
296
+ placeholder="Auto-detected from provider"
297
+ className="font-mono text-sm"
298
+ />
299
+ <p className="text-[10px] text-muted-foreground mt-1">
300
+ Leave empty to use provider default. For custom endpoints only.
301
+ </p>
302
+ <Button
303
+ size="sm"
304
+ className="mt-1.5"
305
+ onClick={() => saveField("apiBaseUrl", formState.apiBaseUrl)}
306
+ disabled={updateSettings.isPending}
307
+ >
308
+ <Save className="size-3.5 mr-1.5" />
309
+ Save URL
310
+ </Button>
311
+ </div>
312
  </div>
313
  )}
314
 
server/_core/llm.ts CHANGED
@@ -265,7 +265,7 @@ function resolveApiKey(overrideKey?: string): string {
265
 
266
  // ─── Default model ─────────────────────────────────────────────────────────
267
 
268
- const DEFAULT_MODEL = process.env.DEFAULT_MODEL || "XiaomiMiMo/MiMo-V2-Flash";
269
  const DEFAULT_MAX_TOKENS = 16384;
270
 
271
  // ─── Non-streaming invoke (for slash commands) ─────────────────────────────
 
265
 
266
  // ─── Default model ─────────────────────────────────────────────────────────
267
 
268
+ const DEFAULT_MODEL = process.env.DEFAULT_MODEL || "Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo";
269
  const DEFAULT_MAX_TOKENS = 16384;
270
 
271
  // ─── Non-streaming invoke (for slash commands) ─────────────────────────────
server/runtime/agent.ts CHANGED
@@ -18,18 +18,46 @@ import { execSync } from "child_process";
18
 
19
  // Context window sizes for known models (used for proactive compaction)
20
  const MODEL_CONTEXT_WINDOWS: Record<string, number> = {
 
21
  "XiaomiMiMo/MiMo-V2-Flash": 262144,
 
 
 
 
 
 
 
22
  "Qwen/Qwen3-Coder-Next": 131072,
 
23
  "Qwen/Qwen3-8B": 32768,
24
  "Qwen/Qwen3-Coder-30B-A3B-Instruct": 131072,
 
25
  "meta-llama/Llama-3.3-70B-Instruct": 131072,
26
- "deepseek-ai/DeepSeek-V3.2": 131072,
 
 
 
 
27
  "deepseek-ai/DeepSeek-R1": 131072,
 
 
 
 
 
 
 
 
 
28
  "claude-opus-4-6": 200000,
29
  "claude-sonnet-4-6": 200000,
 
30
  "gpt-5.4": 1048576,
31
  "gpt-4.1": 1048576,
 
32
  "grok-3": 131072,
 
 
 
33
  };
34
 
35
  const DEFAULT_CONTEXT_WINDOW = 131072;
@@ -214,35 +242,55 @@ const RETRY_DELAY_MS = 2000; // fixed 2 second interval, no backoff
214
  function resolveApiConfig(config: AgentConfig) {
215
  // ─── HARDCODED FALLBACK — always works even if settings are corrupted ───
216
  const FALLBACK_URL = "https://router.huggingface.co/v1";
217
- const FALLBACK_MODEL = "XiaomiMiMo/MiMo-V2-Flash";
218
 
219
  // Resolve model aliases (used for both default and custom paths)
220
  const aliasMap: Record<string, string> = {
221
- // Xiaomi MiMo (default)
222
  mimo: "XiaomiMiMo/MiMo-V2-Flash",
223
  "mimo-flash": "XiaomiMiMo/MiMo-V2-Flash",
224
  "mimo-v2": "XiaomiMiMo/MiMo-V2-Flash",
225
- // Qwen models
226
- "qwen-coder": "Qwen/Qwen3-Coder-Next",
 
 
 
 
 
 
227
  "qwen3-8b": "Qwen/Qwen3-8B",
228
- "qwen3-coder": "Qwen/Qwen3-Coder-30B-A3B-Instruct",
229
  // Llama
230
  llama: "meta-llama/Llama-3.3-70B-Instruct",
231
  "llama-70b": "meta-llama/Llama-3.3-70B-Instruct",
 
232
  // DeepSeek
233
  deepseek: "deepseek-ai/DeepSeek-V3.2",
234
- "deepseek-r1": "deepseek-ai/DeepSeek-R1",
235
- // OpenAI GPT-5.x family (if user has OpenAI key)
 
 
 
 
 
 
 
 
 
 
236
  "gpt5": "gpt-5.4",
237
  "gpt-5": "gpt-5.4",
238
  "gpt54": "gpt-5.4",
239
- // Anthropic aliases (for compatibility)
240
  opus: "claude-opus-4-6",
241
  sonnet: "claude-sonnet-4-6",
242
  haiku: "claude-haiku-4-5-20251213",
243
  // xAI
244
  grok: "grok-3",
245
  "grok-3": "grok-3",
 
 
 
246
  };
247
 
248
  // Treat empty, null, masked, or built-in providers as "use server default"
@@ -265,11 +313,14 @@ function resolveApiConfig(config: AgentConfig) {
265
  let baseUrl = config.apiBaseUrl || "";
266
  if (!baseUrl) {
267
  const providers: Record<string, string> = {
 
268
  huggingface: "https://router.huggingface.co/v1",
269
  xai: "https://api.x.ai/v1",
270
  openrouter: "https://openrouter.ai/api/v1",
271
  openai: "https://api.openai.com/v1",
272
  anthropic: "https://api.anthropic.com/v1",
 
 
273
  ollama: "http://localhost:11434/v1",
274
  };
275
  baseUrl = providers[config.apiProvider] || FALLBACK_URL;
@@ -415,6 +466,14 @@ export async function runAgentLoop(
415
  }
416
 
417
  // Build API request
 
 
 
 
 
 
 
 
418
  const payload: Record<string, unknown> = {
419
  model: apiConfig.model,
420
  messages: conversationMessages.map((m) => {
@@ -426,12 +485,17 @@ export async function runAgentLoop(
426
  }),
427
  tools: allTools,
428
  tool_choice: "auto",
429
- max_tokens: Math.min(cfg.maxTokens, 32000), // HuggingFace Router limit: 0-32000
430
- temperature: cfg.temperature,
431
  top_p: cfg.topP,
432
  stream: true,
433
  };
434
 
 
 
 
 
 
435
  sendSSE(res, "message_start", { iteration: iterations });
436
 
437
  try {
@@ -980,6 +1044,12 @@ async function processStream(
980
  continue;
981
  }
982
 
 
 
 
 
 
 
983
  // Text content streaming
984
  if (delta.content) {
985
  content += delta.content;
 
18
 
19
  // Context window sizes for known models (used for proactive compaction)
20
  const MODEL_CONTEXT_WINDOWS: Record<string, number> = {
21
+ // Xiaomi MiMo
22
  "XiaomiMiMo/MiMo-V2-Flash": 262144,
23
+ // Qwen models (DeepInfra + HuggingFace)
24
+ "Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo": 262144,
25
+ "Qwen/Qwen3-Coder-480B-A35B-Instruct": 262144,
26
+ "Qwen/Qwen3-235B-A22B-Instruct-2507": 262144,
27
+ "Qwen/Qwen3-235B-A22B-Thinking-2507": 262144,
28
+ "Qwen/Qwen3.5-397B-A17B": 262144,
29
+ "Qwen/Qwen3.5-122B-A10B": 262144,
30
  "Qwen/Qwen3-Coder-Next": 131072,
31
+ "Qwen/Qwen3-32B": 40960,
32
  "Qwen/Qwen3-8B": 32768,
33
  "Qwen/Qwen3-Coder-30B-A3B-Instruct": 131072,
34
+ // Meta Llama
35
  "meta-llama/Llama-3.3-70B-Instruct": 131072,
36
+ "meta-llama/Llama-4-Maverick-17B-128E": 1048576,
37
+ "meta-llama/Llama-4-Scout-17B-16E": 327680,
38
+ // DeepSeek
39
+ "deepseek-ai/DeepSeek-V3.2": 163840,
40
+ "deepseek-ai/DeepSeek-V3.1": 163840,
41
  "deepseek-ai/DeepSeek-R1": 131072,
42
+ "deepseek-ai/DeepSeek-R1-0528": 163840,
43
+ // NVIDIA Nemotron
44
+ "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B": 262144,
45
+ // StepFun
46
+ "stepfun-ai/Step-3.5-Flash": 262144,
47
+ // NousResearch (uncensored)
48
+ "NousResearch/Hermes-3-Llama-3.1-70B": 131072,
49
+ "NousResearch/Hermes-3-Llama-3.1-405B": 131072,
50
+ // Anthropic
51
  "claude-opus-4-6": 200000,
52
  "claude-sonnet-4-6": 200000,
53
+ // OpenAI
54
  "gpt-5.4": 1048576,
55
  "gpt-4.1": 1048576,
56
+ // xAI
57
  "grok-3": 131072,
58
+ // Google
59
+ "google/gemini-2.5-flash": 1000000,
60
+ "google/gemini-2.5-pro": 1000000,
61
  };
62
 
63
  const DEFAULT_CONTEXT_WINDOW = 131072;
 
242
  function resolveApiConfig(config: AgentConfig) {
243
  // ─── HARDCODED FALLBACK — always works even if settings are corrupted ───
244
  const FALLBACK_URL = "https://router.huggingface.co/v1";
245
+ const FALLBACK_MODEL = "Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo";
246
 
247
  // Resolve model aliases (used for both default and custom paths)
248
  const aliasMap: Record<string, string> = {
249
+ // Xiaomi MiMo
250
  mimo: "XiaomiMiMo/MiMo-V2-Flash",
251
  "mimo-flash": "XiaomiMiMo/MiMo-V2-Flash",
252
  "mimo-v2": "XiaomiMiMo/MiMo-V2-Flash",
253
+ // Qwen models (DeepInfra)
254
+ "qwen-coder": "Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo",
255
+ "qwen-coder-turbo": "Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo",
256
+ "qwen-coder-480b": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
257
+ "qwen3-235b": "Qwen/Qwen3-235B-A22B-Instruct-2507",
258
+ "qwen3-thinking": "Qwen/Qwen3-235B-A22B-Thinking-2507",
259
+ "qwen3.5": "Qwen/Qwen3.5-397B-A17B",
260
+ "qwen3-32b": "Qwen/Qwen3-32B",
261
  "qwen3-8b": "Qwen/Qwen3-8B",
262
+ "qwen3-coder": "Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo",
263
  // Llama
264
  llama: "meta-llama/Llama-3.3-70B-Instruct",
265
  "llama-70b": "meta-llama/Llama-3.3-70B-Instruct",
266
+ "llama-4": "meta-llama/Llama-4-Maverick-17B-128E",
267
  // DeepSeek
268
  deepseek: "deepseek-ai/DeepSeek-V3.2",
269
+ "deepseek-r1": "deepseek-ai/DeepSeek-R1-0528",
270
+ "deepseek-v3": "deepseek-ai/DeepSeek-V3.2",
271
+ // NVIDIA
272
+ nemotron: "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B",
273
+ // StepFun
274
+ step: "stepfun-ai/Step-3.5-Flash",
275
+ "step-flash": "stepfun-ai/Step-3.5-Flash",
276
+ // Uncensored
277
+ hermes: "NousResearch/Hermes-3-Llama-3.1-70B",
278
+ "hermes-405b": "NousResearch/Hermes-3-Llama-3.1-405B",
279
+ uncensored: "NousResearch/Hermes-3-Llama-3.1-70B",
280
+ // OpenAI GPT-5.x family
281
  "gpt5": "gpt-5.4",
282
  "gpt-5": "gpt-5.4",
283
  "gpt54": "gpt-5.4",
284
+ // Anthropic aliases
285
  opus: "claude-opus-4-6",
286
  sonnet: "claude-sonnet-4-6",
287
  haiku: "claude-haiku-4-5-20251213",
288
  // xAI
289
  grok: "grok-3",
290
  "grok-3": "grok-3",
291
+ // Google
292
+ gemini: "google/gemini-2.5-flash",
293
+ "gemini-pro": "google/gemini-2.5-pro",
294
  };
295
 
296
  // Treat empty, null, masked, or built-in providers as "use server default"
 
313
  let baseUrl = config.apiBaseUrl || "";
314
  if (!baseUrl) {
315
  const providers: Record<string, string> = {
316
+ deepinfra: "https://api.deepinfra.com/v1/openai",
317
  huggingface: "https://router.huggingface.co/v1",
318
  xai: "https://api.x.ai/v1",
319
  openrouter: "https://openrouter.ai/api/v1",
320
  openai: "https://api.openai.com/v1",
321
  anthropic: "https://api.anthropic.com/v1",
322
+ groq: "https://api.groq.com/openai/v1",
323
+ cerebras: "https://api.cerebras.ai/v1",
324
  ollama: "http://localhost:11434/v1",
325
  };
326
  baseUrl = providers[config.apiProvider] || FALLBACK_URL;
 
466
  }
467
 
468
  // Build API request
469
+ // Determine max_tokens limit based on provider
470
+ const isDeepInfra = apiConfig.url.includes("deepinfra.com");
471
+ const isHuggingFace = apiConfig.url.includes("huggingface.co");
472
+ const maxTokensLimit = isHuggingFace ? 32000 : (isDeepInfra ? 65536 : 65536);
473
+
474
+ // Detect if model supports thinking/reasoning mode (Qwen3 Thinking, DeepSeek-R1)
475
+ const isThinkingModel = apiConfig.model.includes("Thinking") || apiConfig.model.includes("R1");
476
+
477
  const payload: Record<string, unknown> = {
478
  model: apiConfig.model,
479
  messages: conversationMessages.map((m) => {
 
485
  }),
486
  tools: allTools,
487
  tool_choice: "auto",
488
+ max_tokens: Math.min(cfg.maxTokens, maxTokensLimit),
489
+ temperature: isThinkingModel ? 0.6 : cfg.temperature, // thinking models need lower temp
490
  top_p: cfg.topP,
491
  stream: true,
492
  };
493
 
494
+ // Enable thinking/reasoning for supported models (Qwen3 Thinking, DeepSeek-R1)
495
+ if (isThinkingModel && isDeepInfra) {
496
+ (payload as any).extra_body = { enable_thinking: true };
497
+ }
498
+
499
  sendSSE(res, "message_start", { iteration: iterations });
500
 
501
  try {
 
1044
  continue;
1045
  }
1046
 
1047
+ // Reasoning/thinking content (Qwen3 Thinking, DeepSeek-R1)
1048
+ // These models return reasoning in delta.reasoning_content before the actual response
1049
+ if (delta.reasoning_content) {
1050
+ sendSSE(res, "thinking_delta", { text: delta.reasoning_content });
1051
+ }
1052
+
1053
  // Text content streaming
1054
  if (delta.content) {
1055
  content += delta.content;