claw-web-v2

Sleeping

Claw Web commited on Apr 3

Commit

04f258d

1 Parent(s): 4d96cf9

feat: DeepInfra Qwen3-Coder-480B-Turbo + provider UI + thinking mode + 30+ model aliases

- Default model: Qwen3-Coder-480B-A35B-Instruct-Turbo (comparable to Claude Sonnet)
- Added DeepInfra as provider with API key/base URL fields in Settings
- Added 30+ model aliases (qwen-coder, hermes, nemotron, step, llama-4, etc.)
- Added MODEL_CONTEXT_WINDOWS for all DeepInfra models
- Added reasoning_content handling for thinking models (Qwen3 Thinking, DeepSeek-R1)
- Dynamic max_tokens: 32k for HuggingFace, 65k for DeepInfra
- Quick Select updated: Qwen Coder 480B, Hermes 70B, Step Flash, Nemotron, Llama 4
- Provider selector: DeepInfra, HuggingFace, OpenRouter, OpenAI, Groq, Ollama
- All previous fixes verified intact

Files changed (3) hide show

client/src/components/SettingsPanel.tsx +94 -7
server/_core/llm.ts +1 -1
server/runtime/agent.ts +81 -11

client/src/components/SettingsPanel.tsx CHANGED Viewed

@@ -178,7 +178,7 @@ export function SettingsPanel({ open, onClose }: SettingsPanelProps) {
                   className="font-mono text-sm"
                 />
                 <p className="text-[10px] text-muted-foreground mt-1">
-                  Use aliases (mimo, llama, deepseek, qwen) or full model IDs. Default: MiMo-V2-Flash
                 </p>
               </div>
@@ -188,12 +188,14 @@ export function SettingsPanel({ open, onClose }: SettingsPanelProps) {
                 </label>
                 <div className="grid grid-cols-2 gap-1.5">
                   {[
-                    { id: "XiaomiMiMo/MiMo-V2-Flash", label: "MiMo Flash" },
-                    { id: "Qwen/Qwen3-8B", label: "Qwen3 8B" },
-                    { id: "meta-llama/Llama-3.3-70B-Instruct", label: "Llama 70B" },
-                    { id: "deepseek-ai/DeepSeek-V3.2", label: "DeepSeek V3" },
-                    { id: "deepseek-ai/DeepSeek-R1", label: "DeepSeek R1" },
-                    { id: "Qwen/Qwen3-Coder-30B-A3B-Instruct", label: "Qwen Coder" },
                   ].map((m) => (
                     <button
                       key={m.id}
@@ -222,6 +224,91 @@ export function SettingsPanel({ open, onClose }: SettingsPanelProps) {
                 <Save className="size-3.5 mr-1.5" />
                 Save Model
               </Button>
             </div>
           )}

                   className="font-mono text-sm"
                 />
                 <p className="text-[10px] text-muted-foreground mt-1">
+                  Use aliases (qwen-coder, deepseek, hermes, llama) or full model IDs. Default: Qwen3-Coder-480B-Turbo
                 </p>
               </div>
                 </label>
                 <div className="grid grid-cols-2 gap-1.5">
                   {[
+                    { id: "Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo", label: "\u2B50 Qwen Coder 480B" },
+                    { id: "Qwen/Qwen3-235B-A22B-Instruct-2507", label: "Qwen3 235B" },
+                    { id: "deepseek-ai/DeepSeek-V3.2", label: "DeepSeek V3.2" },
+                    { id: "NousResearch/Hermes-3-Llama-3.1-70B", label: "\uD83D\uDD13 Hermes 70B" },
+                    { id: "stepfun-ai/Step-3.5-Flash", label: "Step 3.5 Flash" },
+                    { id: "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B", label: "Nemotron 120B" },
+                    { id: "meta-llama/Llama-4-Maverick-17B-128E", label: "Llama 4 1M ctx" },
+                    { id: "Qwen/Qwen3.5-397B-A17B", label: "Qwen3.5 397B" },
                   ].map((m) => (
                     <button
                       key={m.id}
                 <Save className="size-3.5 mr-1.5" />
                 Save Model
               </Button>
+              {/* API Provider */}
+              <div className="pt-3 border-t border-border">
+                <label className="text-xs font-medium text-muted-foreground mb-1.5 block">
+                  API Provider
+                </label>
+                <div className="grid grid-cols-3 gap-1.5">
+                  {[
+                    { id: "deepinfra", label: "DeepInfra" },
+                    { id: "huggingface", label: "HuggingFace" },
+                    { id: "openrouter", label: "OpenRouter" },
+                    { id: "openai", label: "OpenAI" },
+                    { id: "groq", label: "Groq" },
+                    { id: "ollama", label: "Ollama" },
+                  ].map((p) => (
+                    <button
+                      key={p.id}
+                      onClick={() => {
+                        setFormState((s) => ({ ...s, apiProvider: p.id }));
+                        saveField("apiProvider", p.id);
+                      }}
+                      className={cn(
+                        "text-xs px-2 py-1.5 rounded-md border transition-colors",
+                        formState.apiProvider === p.id
+                          ? "border-primary bg-primary/10 text-primary"
+                          : "border-border hover:border-primary/50"
+                      )}
+                    >
+                      {p.label}
+                    </button>
+                  ))}
+                </div>
+              </div>
+              {/* API Key */}
+              <div>
+                <label className="text-xs font-medium text-muted-foreground mb-1.5 block">
+                  API Key
+                </label>
+                <Input
+                  type="password"
+                  value={formState.apiKey || ""}
+                  onChange={(e) =>
+                    setFormState((s) => ({ ...s, apiKey: e.target.value }))
+                  }
+                  placeholder="Enter your API key"
+                  className="font-mono text-sm"
+                />
+                <Button
+                  size="sm"
+                  className="mt-1.5"
+                  onClick={() => saveField("apiKey", formState.apiKey)}
+                  disabled={updateSettings.isPending}
+                >
+                  <Save className="size-3.5 mr-1.5" />
+                  Save Key
+                </Button>
+              </div>
+              {/* API Base URL (optional) */}
+              <div>
+                <label className="text-xs font-medium text-muted-foreground mb-1.5 block">
+                  API Base URL (optional)
+                </label>
+                <Input
+                  value={formState.apiBaseUrl || ""}
+                  onChange={(e) =>
+                    setFormState((s) => ({ ...s, apiBaseUrl: e.target.value }))
+                  }
+                  placeholder="Auto-detected from provider"
+                  className="font-mono text-sm"
+                />
+                <p className="text-[10px] text-muted-foreground mt-1">
+                  Leave empty to use provider default. For custom endpoints only.
+                </p>
+                <Button
+                  size="sm"
+                  className="mt-1.5"
+                  onClick={() => saveField("apiBaseUrl", formState.apiBaseUrl)}
+                  disabled={updateSettings.isPending}
+                >
+                  <Save className="size-3.5 mr-1.5" />
+                  Save URL
+                </Button>
+              </div>
             </div>
           )}

server/_core/llm.ts CHANGED Viewed

@@ -265,7 +265,7 @@ function resolveApiKey(overrideKey?: string): string {
 // ─── Default model ─────────────────────────────────────────────────────────
-const DEFAULT_MODEL = process.env.DEFAULT_MODEL || "XiaomiMiMo/MiMo-V2-Flash";
 const DEFAULT_MAX_TOKENS = 16384;
 // ─── Non-streaming invoke (for slash commands) ─────────────────────────────

 // ─── Default model ─────────────────────────────────────────────────────────
+const DEFAULT_MODEL = process.env.DEFAULT_MODEL || "Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo";
 const DEFAULT_MAX_TOKENS = 16384;
 // ─── Non-streaming invoke (for slash commands) ─────────────────────────────

server/runtime/agent.ts CHANGED Viewed

@@ -18,18 +18,46 @@ import { execSync } from "child_process";
 // Context window sizes for known models (used for proactive compaction)
 const MODEL_CONTEXT_WINDOWS: Record<string, number> = {
   "XiaomiMiMo/MiMo-V2-Flash": 262144,
   "Qwen/Qwen3-Coder-Next": 131072,
   "Qwen/Qwen3-8B": 32768,
   "Qwen/Qwen3-Coder-30B-A3B-Instruct": 131072,
   "meta-llama/Llama-3.3-70B-Instruct": 131072,
-  "deepseek-ai/DeepSeek-V3.2": 131072,
   "deepseek-ai/DeepSeek-R1": 131072,
   "claude-opus-4-6": 200000,
   "claude-sonnet-4-6": 200000,
   "gpt-5.4": 1048576,
   "gpt-4.1": 1048576,
   "grok-3": 131072,
 };
 const DEFAULT_CONTEXT_WINDOW = 131072;
@@ -214,35 +242,55 @@ const RETRY_DELAY_MS = 2000; // fixed 2 second interval, no backoff
 function resolveApiConfig(config: AgentConfig) {
   // ─── HARDCODED FALLBACK — always works even if settings are corrupted ───
   const FALLBACK_URL = "https://router.huggingface.co/v1";
-  const FALLBACK_MODEL = "XiaomiMiMo/MiMo-V2-Flash";
   // Resolve model aliases (used for both default and custom paths)
   const aliasMap: Record<string, string> = {
-    // Xiaomi MiMo (default)
     mimo: "XiaomiMiMo/MiMo-V2-Flash",
     "mimo-flash": "XiaomiMiMo/MiMo-V2-Flash",
     "mimo-v2": "XiaomiMiMo/MiMo-V2-Flash",
-    // Qwen models
-    "qwen-coder": "Qwen/Qwen3-Coder-Next",
     "qwen3-8b": "Qwen/Qwen3-8B",
-    "qwen3-coder": "Qwen/Qwen3-Coder-30B-A3B-Instruct",
     // Llama
     llama: "meta-llama/Llama-3.3-70B-Instruct",
     "llama-70b": "meta-llama/Llama-3.3-70B-Instruct",
     // DeepSeek
     deepseek: "deepseek-ai/DeepSeek-V3.2",
-    "deepseek-r1": "deepseek-ai/DeepSeek-R1",
-    // OpenAI GPT-5.x family (if user has OpenAI key)
     "gpt5": "gpt-5.4",
     "gpt-5": "gpt-5.4",
     "gpt54": "gpt-5.4",
-    // Anthropic aliases (for compatibility)
     opus: "claude-opus-4-6",
     sonnet: "claude-sonnet-4-6",
     haiku: "claude-haiku-4-5-20251213",
     // xAI
     grok: "grok-3",
     "grok-3": "grok-3",
   };
   // Treat empty, null, masked, or built-in providers as "use server default"
@@ -265,11 +313,14 @@ function resolveApiConfig(config: AgentConfig) {
   let baseUrl = config.apiBaseUrl || "";
   if (!baseUrl) {
     const providers: Record<string, string> = {
       huggingface: "https://router.huggingface.co/v1",
       xai: "https://api.x.ai/v1",
       openrouter: "https://openrouter.ai/api/v1",
       openai: "https://api.openai.com/v1",
       anthropic: "https://api.anthropic.com/v1",
       ollama: "http://localhost:11434/v1",
     };
     baseUrl = providers[config.apiProvider] || FALLBACK_URL;
@@ -415,6 +466,14 @@ export async function runAgentLoop(
     }
     // Build API request
     const payload: Record<string, unknown> = {
       model: apiConfig.model,
       messages: conversationMessages.map((m) => {
@@ -426,12 +485,17 @@ export async function runAgentLoop(
       }),
       tools: allTools,
       tool_choice: "auto",
-      max_tokens: Math.min(cfg.maxTokens, 32000), // HuggingFace Router limit: 0-32000
-      temperature: cfg.temperature,
       top_p: cfg.topP,
       stream: true,
     };
     sendSSE(res, "message_start", { iteration: iterations });
     try {
@@ -980,6 +1044,12 @@ async function processStream(
             continue;
           }
           // Text content streaming
           if (delta.content) {
             content += delta.content;

 // Context window sizes for known models (used for proactive compaction)
 const MODEL_CONTEXT_WINDOWS: Record<string, number> = {
+  // Xiaomi MiMo
   "XiaomiMiMo/MiMo-V2-Flash": 262144,
+  // Qwen models (DeepInfra + HuggingFace)
+  "Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo": 262144,
+  "Qwen/Qwen3-Coder-480B-A35B-Instruct": 262144,
+  "Qwen/Qwen3-235B-A22B-Instruct-2507": 262144,
+  "Qwen/Qwen3-235B-A22B-Thinking-2507": 262144,
+  "Qwen/Qwen3.5-397B-A17B": 262144,
+  "Qwen/Qwen3.5-122B-A10B": 262144,
   "Qwen/Qwen3-Coder-Next": 131072,
+  "Qwen/Qwen3-32B": 40960,
   "Qwen/Qwen3-8B": 32768,
   "Qwen/Qwen3-Coder-30B-A3B-Instruct": 131072,
+  // Meta Llama
   "meta-llama/Llama-3.3-70B-Instruct": 131072,
+  "meta-llama/Llama-4-Maverick-17B-128E": 1048576,
+  "meta-llama/Llama-4-Scout-17B-16E": 327680,
+  // DeepSeek
+  "deepseek-ai/DeepSeek-V3.2": 163840,
+  "deepseek-ai/DeepSeek-V3.1": 163840,
   "deepseek-ai/DeepSeek-R1": 131072,
+  "deepseek-ai/DeepSeek-R1-0528": 163840,
+  // NVIDIA Nemotron
+  "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B": 262144,
+  // StepFun
+  "stepfun-ai/Step-3.5-Flash": 262144,
+  // NousResearch (uncensored)
+  "NousResearch/Hermes-3-Llama-3.1-70B": 131072,
+  "NousResearch/Hermes-3-Llama-3.1-405B": 131072,
+  // Anthropic
   "claude-opus-4-6": 200000,
   "claude-sonnet-4-6": 200000,
+  // OpenAI
   "gpt-5.4": 1048576,
   "gpt-4.1": 1048576,
+  // xAI
   "grok-3": 131072,
+  // Google
+  "google/gemini-2.5-flash": 1000000,
+  "google/gemini-2.5-pro": 1000000,
 };
 const DEFAULT_CONTEXT_WINDOW = 131072;
 function resolveApiConfig(config: AgentConfig) {
   // ─── HARDCODED FALLBACK — always works even if settings are corrupted ───
   const FALLBACK_URL = "https://router.huggingface.co/v1";
+  const FALLBACK_MODEL = "Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo";
   // Resolve model aliases (used for both default and custom paths)
   const aliasMap: Record<string, string> = {
+    // Xiaomi MiMo
     mimo: "XiaomiMiMo/MiMo-V2-Flash",
     "mimo-flash": "XiaomiMiMo/MiMo-V2-Flash",
     "mimo-v2": "XiaomiMiMo/MiMo-V2-Flash",
+    // Qwen models (DeepInfra)
+    "qwen-coder": "Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo",
+    "qwen-coder-turbo": "Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo",
+    "qwen-coder-480b": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
+    "qwen3-235b": "Qwen/Qwen3-235B-A22B-Instruct-2507",
+    "qwen3-thinking": "Qwen/Qwen3-235B-A22B-Thinking-2507",
+    "qwen3.5": "Qwen/Qwen3.5-397B-A17B",
+    "qwen3-32b": "Qwen/Qwen3-32B",
     "qwen3-8b": "Qwen/Qwen3-8B",
+    "qwen3-coder": "Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo",
     // Llama
     llama: "meta-llama/Llama-3.3-70B-Instruct",
     "llama-70b": "meta-llama/Llama-3.3-70B-Instruct",
+    "llama-4": "meta-llama/Llama-4-Maverick-17B-128E",
     // DeepSeek
     deepseek: "deepseek-ai/DeepSeek-V3.2",
+    "deepseek-r1": "deepseek-ai/DeepSeek-R1-0528",
+    "deepseek-v3": "deepseek-ai/DeepSeek-V3.2",
+    // NVIDIA
+    nemotron: "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B",
+    // StepFun
+    step: "stepfun-ai/Step-3.5-Flash",
+    "step-flash": "stepfun-ai/Step-3.5-Flash",
+    // Uncensored
+    hermes: "NousResearch/Hermes-3-Llama-3.1-70B",
+    "hermes-405b": "NousResearch/Hermes-3-Llama-3.1-405B",
+    uncensored: "NousResearch/Hermes-3-Llama-3.1-70B",
+    // OpenAI GPT-5.x family
     "gpt5": "gpt-5.4",
     "gpt-5": "gpt-5.4",
     "gpt54": "gpt-5.4",
+    // Anthropic aliases
     opus: "claude-opus-4-6",
     sonnet: "claude-sonnet-4-6",
     haiku: "claude-haiku-4-5-20251213",
     // xAI
     grok: "grok-3",
     "grok-3": "grok-3",
+    // Google
+    gemini: "google/gemini-2.5-flash",
+    "gemini-pro": "google/gemini-2.5-pro",
   };
   // Treat empty, null, masked, or built-in providers as "use server default"
   let baseUrl = config.apiBaseUrl || "";
   if (!baseUrl) {
     const providers: Record<string, string> = {
+      deepinfra: "https://api.deepinfra.com/v1/openai",
       huggingface: "https://router.huggingface.co/v1",
       xai: "https://api.x.ai/v1",
       openrouter: "https://openrouter.ai/api/v1",
       openai: "https://api.openai.com/v1",
       anthropic: "https://api.anthropic.com/v1",
+      groq: "https://api.groq.com/openai/v1",
+      cerebras: "https://api.cerebras.ai/v1",
       ollama: "http://localhost:11434/v1",
     };
     baseUrl = providers[config.apiProvider] || FALLBACK_URL;
     }
     // Build API request
+    // Determine max_tokens limit based on provider
+    const isDeepInfra = apiConfig.url.includes("deepinfra.com");
+    const isHuggingFace = apiConfig.url.includes("huggingface.co");
+    const maxTokensLimit = isHuggingFace ? 32000 : (isDeepInfra ? 65536 : 65536);
+    // Detect if model supports thinking/reasoning mode (Qwen3 Thinking, DeepSeek-R1)
+    const isThinkingModel = apiConfig.model.includes("Thinking") || apiConfig.model.includes("R1");
     const payload: Record<string, unknown> = {
       model: apiConfig.model,
       messages: conversationMessages.map((m) => {
       }),
       tools: allTools,
       tool_choice: "auto",
+      max_tokens: Math.min(cfg.maxTokens, maxTokensLimit),
+      temperature: isThinkingModel ? 0.6 : cfg.temperature, // thinking models need lower temp
       top_p: cfg.topP,
       stream: true,
     };
+    // Enable thinking/reasoning for supported models (Qwen3 Thinking, DeepSeek-R1)
+    if (isThinkingModel && isDeepInfra) {
+      (payload as any).extra_body = { enable_thinking: true };
+    }
     sendSSE(res, "message_start", { iteration: iterations });
     try {
             continue;
           }
+          // Reasoning/thinking content (Qwen3 Thinking, DeepSeek-R1)
+          // These models return reasoning in delta.reasoning_content before the actual response
+          if (delta.reasoning_content) {
+            sendSSE(res, "thinking_delta", { text: delta.reasoning_content });
+          }
           // Text content streaming
           if (delta.content) {
             content += delta.content;