openskynet / src /auto-reply /reply.directive.directive-behavior.defaults-think-low-reasoning-capable-models-no.test.ts
| import "./reply.directive.directive-behavior.e2e-mocks.js"; | |
| import { describe, expect, it, vi } from "vitest"; | |
| import { loadSessionStore } from "../config/sessions.js"; | |
| import { | |
| assertModelSelection, | |
| installDirectiveBehaviorE2EHooks, | |
| loadModelCatalog, | |
| makeEmbeddedTextResult, | |
| makeWhatsAppDirectiveConfig, | |
| mockEmbeddedTextResult, | |
| replyText, | |
| replyTexts, | |
| runEmbeddedPiAgent, | |
| sessionStorePath, | |
| withTempHome, | |
| } from "./reply.directive.directive-behavior.e2e-harness.js"; | |
| import { runModelDirectiveText } from "./reply.directive.directive-behavior.model-directive-test-utils.js"; | |
| import { getReplyFromConfig } from "./reply.js"; | |
| function makeDefaultModelConfig(home: string) { | |
| return makeWhatsAppDirectiveConfig(home, { | |
| model: { primary: "anthropic/claude-opus-4-5" }, | |
| models: { | |
| "anthropic/claude-opus-4-5": {}, | |
| "openai/gpt-4.1-mini": {}, | |
| }, | |
| }); | |
| } | |
| async function runReplyToCurrentCase(home: string, text: string) { | |
| vi.mocked(runEmbeddedPiAgent).mockResolvedValue(makeEmbeddedTextResult(text)); | |
| const res = await getReplyFromConfig( | |
| { | |
| Body: "ping", | |
| From: "+1004", | |
| To: "+2000", | |
| MessageSid: "msg-123", | |
| }, | |
| {}, | |
| makeWhatsAppDirectiveConfig(home, { model: "anthropic/claude-opus-4-5" }), | |
| ); | |
| return Array.isArray(res) ? res[0] : res; | |
| } | |
| async function expectThinkStatusForReasoningModel(params: { | |
| home: string; | |
| reasoning: boolean; | |
| expectedLevel: "low" | "off"; | |
| }): Promise<void> { | |
| vi.mocked(loadModelCatalog).mockResolvedValueOnce([ | |
| { | |
| id: "claude-opus-4-5", | |
| name: "Opus 4.5", | |
| provider: "anthropic", | |
| reasoning: params.reasoning, | |
| }, | |
| ]); | |
| const res = await getReplyFromConfig( | |
| { Body: "/think", From: "+1222", To: "+1222", CommandAuthorized: true }, | |
| {}, | |
| makeWhatsAppDirectiveConfig(params.home, { model: "anthropic/claude-opus-4-5" }), | |
| ); | |
| const text = replyText(res); | |
| expect(text).toContain(`Current thinking level: ${params.expectedLevel}`); | |
| expect(text).toContain("Options: off, minimal, low, medium, high, adaptive."); | |
| } | |
| function mockReasoningCapableCatalog() { | |
| vi.mocked(loadModelCatalog).mockResolvedValueOnce([ | |
| { | |
| id: "claude-opus-4-5", | |
| name: "Opus 4.5", | |
| provider: "anthropic", | |
| reasoning: true, | |
| }, | |
| ]); | |
| } | |
| async function runReasoningDefaultCase(params: { | |
| home: string; | |
| expectedThinkLevel: "low" | "off"; | |
| expectedReasoningLevel: "off" | "on"; | |
| thinkingDefault?: "off" | "low" | "medium" | "high"; | |
| }) { | |
| vi.mocked(runEmbeddedPiAgent).mockClear(); | |
| mockEmbeddedTextResult("done"); | |
| mockReasoningCapableCatalog(); | |
| await getReplyFromConfig( | |
| { | |
| Body: "hello", | |
| From: "+1004", | |
| To: "+2000", | |
| }, | |
| {}, | |
| makeWhatsAppDirectiveConfig(params.home, { | |
| model: { primary: "anthropic/claude-opus-4-5" }, | |
| ...(params.thinkingDefault ? { thinkingDefault: params.thinkingDefault } : {}), | |
| }), | |
| ); | |
| expect(runEmbeddedPiAgent).toHaveBeenCalledOnce(); | |
| const call = vi.mocked(runEmbeddedPiAgent).mock.calls[0]?.[0]; | |
| expect(call?.thinkLevel).toBe(params.expectedThinkLevel); | |
| expect(call?.reasoningLevel).toBe(params.expectedReasoningLevel); | |
| } | |
| describe("directive behavior", () => { | |
| installDirectiveBehaviorE2EHooks(); | |
| it("covers /think status and reasoning defaults for reasoning and non-reasoning models", async () => { | |
| await withTempHome(async (home) => { | |
| await expectThinkStatusForReasoningModel({ | |
| home, | |
| reasoning: true, | |
| expectedLevel: "low", | |
| }); | |
| await expectThinkStatusForReasoningModel({ | |
| home, | |
| reasoning: false, | |
| expectedLevel: "off", | |
| }); | |
| expect(runEmbeddedPiAgent).not.toHaveBeenCalled(); | |
| vi.mocked(runEmbeddedPiAgent).mockClear(); | |
| for (const scenario of [ | |
| { | |
| expectedThinkLevel: "low" as const, | |
| expectedReasoningLevel: "off" as const, | |
| }, | |
| { | |
| expectedThinkLevel: "off" as const, | |
| expectedReasoningLevel: "on" as const, | |
| thinkingDefault: "off" as const, | |
| }, | |
| ]) { | |
| await runReasoningDefaultCase({ | |
| home, | |
| ...scenario, | |
| }); | |
| } | |
| }); | |
| }); | |
| it("renders model list and status variants across catalog/config combinations", async () => { | |
| await withTempHome(async (home) => { | |
| const aliasText = await runModelDirectiveText(home, "/model list"); | |
| expect(aliasText).toContain("Providers:"); | |
| expect(aliasText).toContain("- anthropic"); | |
| expect(aliasText).toContain("- openai"); | |
| expect(aliasText).toContain("Use: /models <provider>"); | |
| expect(aliasText).toContain("Switch: /model <provider/model>"); | |
| vi.mocked(loadModelCatalog).mockResolvedValueOnce([]); | |
| const unavailableCatalogText = await runModelDirectiveText(home, "/model"); | |
| expect(unavailableCatalogText).toContain("Current: anthropic/claude-opus-4-5"); | |
| expect(unavailableCatalogText).toContain("Switch: /model <provider/model>"); | |
| expect(unavailableCatalogText).toContain( | |
| "Browse: /models (providers) or /models <provider> (models)", | |
| ); | |
| expect(unavailableCatalogText).toContain("More: /model status"); | |
| const allowlistedStatusText = await runModelDirectiveText(home, "/model status", { | |
| includeSessionStore: false, | |
| }); | |
| expect(allowlistedStatusText).toContain("anthropic/claude-opus-4-5"); | |
| expect(allowlistedStatusText).toContain("openai/gpt-4.1-mini"); | |
| expect(allowlistedStatusText).not.toContain("claude-sonnet-4-1"); | |
| expect(allowlistedStatusText).toContain("auth:"); | |
| vi.mocked(loadModelCatalog).mockResolvedValue([ | |
| { id: "claude-opus-4-5", name: "Opus 4.5", provider: "anthropic" }, | |
| { id: "gpt-4.1-mini", name: "GPT-4.1 Mini", provider: "openai" }, | |
| { id: "grok-4", name: "Grok 4", provider: "xai" }, | |
| ]); | |
| const noAllowlistText = await runModelDirectiveText(home, "/model list", { | |
| defaults: { | |
| model: { | |
| primary: "anthropic/claude-opus-4-5", | |
| fallbacks: ["openai/gpt-4.1-mini"], | |
| }, | |
| imageModel: { primary: "minimax/MiniMax-M2.5" }, | |
| models: undefined, | |
| }, | |
| }); | |
| expect(noAllowlistText).toContain("Providers:"); | |
| expect(noAllowlistText).toContain("- anthropic"); | |
| expect(noAllowlistText).toContain("- openai"); | |
| expect(noAllowlistText).toContain("- xai"); | |
| expect(noAllowlistText).toContain("Use: /models <provider>"); | |
| vi.mocked(loadModelCatalog).mockResolvedValueOnce([ | |
| { | |
| provider: "anthropic", | |
| id: "claude-opus-4-5", | |
| name: "Claude Opus 4.5", | |
| }, | |
| { provider: "openai", id: "gpt-4.1-mini", name: "GPT-4.1 mini" }, | |
| ]); | |
| const configOnlyProviderText = await runModelDirectiveText(home, "/models minimax", { | |
| defaults: { | |
| models: { | |
| "anthropic/claude-opus-4-5": {}, | |
| "openai/gpt-4.1-mini": {}, | |
| "minimax/MiniMax-M2.5": { alias: "minimax" }, | |
| }, | |
| }, | |
| extra: { | |
| models: { | |
| mode: "merge", | |
| providers: { | |
| minimax: { | |
| baseUrl: "https://api.minimax.io/anthropic", | |
| api: "anthropic-messages", | |
| models: [{ id: "MiniMax-M2.5", name: "MiniMax M2.5" }], | |
| }, | |
| }, | |
| }, | |
| }, | |
| }); | |
| expect(configOnlyProviderText).toContain("Models (minimax"); | |
| expect(configOnlyProviderText).toContain("minimax/MiniMax-M2.5"); | |
| const missingAuthText = await runModelDirectiveText(home, "/model list", { | |
| defaults: { | |
| models: { | |
| "anthropic/claude-opus-4-5": {}, | |
| }, | |
| }, | |
| }); | |
| expect(missingAuthText).toContain("Providers:"); | |
| expect(missingAuthText).not.toContain("missing (missing)"); | |
| expect(runEmbeddedPiAgent).not.toHaveBeenCalled(); | |
| }); | |
| }); | |
| it("sets model override on /model directive", async () => { | |
| await withTempHome(async (home) => { | |
| const storePath = sessionStorePath(home); | |
| await getReplyFromConfig( | |
| { Body: "/model openai/gpt-4.1-mini", From: "+1222", To: "+1222", CommandAuthorized: true }, | |
| {}, | |
| makeWhatsAppDirectiveConfig( | |
| home, | |
| { | |
| model: { primary: "anthropic/claude-opus-4-5" }, | |
| models: { | |
| "anthropic/claude-opus-4-5": {}, | |
| "openai/gpt-4.1-mini": {}, | |
| }, | |
| }, | |
| { session: { store: storePath } }, | |
| ), | |
| ); | |
| assertModelSelection(storePath, { | |
| model: "gpt-4.1-mini", | |
| provider: "openai", | |
| }); | |
| expect(runEmbeddedPiAgent).not.toHaveBeenCalled(); | |
| }); | |
| }); | |
| it("ignores inline /model and /think directives while still running agent content", async () => { | |
| await withTempHome(async (home) => { | |
| mockEmbeddedTextResult("done"); | |
| const inlineModelRes = await getReplyFromConfig( | |
| { | |
| Body: "please sync /model openai/gpt-4.1-mini now", | |
| From: "+1004", | |
| To: "+2000", | |
| }, | |
| {}, | |
| makeDefaultModelConfig(home), | |
| ); | |
| const texts = replyTexts(inlineModelRes); | |
| expect(texts).toContain("done"); | |
| expect(runEmbeddedPiAgent).toHaveBeenCalledOnce(); | |
| const call = vi.mocked(runEmbeddedPiAgent).mock.calls[0]?.[0]; | |
| expect(call?.provider).toBe("anthropic"); | |
| expect(call?.model).toBe("claude-opus-4-5"); | |
| vi.mocked(runEmbeddedPiAgent).mockClear(); | |
| mockEmbeddedTextResult("done"); | |
| const inlineThinkRes = await getReplyFromConfig( | |
| { | |
| Body: "please sync /think:high now", | |
| From: "+1004", | |
| To: "+2000", | |
| }, | |
| {}, | |
| makeWhatsAppDirectiveConfig(home, { model: { primary: "anthropic/claude-opus-4-5" } }), | |
| ); | |
| expect(replyTexts(inlineThinkRes)).toContain("done"); | |
| expect(runEmbeddedPiAgent).toHaveBeenCalledOnce(); | |
| }); | |
| }); | |
| it("passes elevated defaults when sender is approved", async () => { | |
| await withTempHome(async (home) => { | |
| mockEmbeddedTextResult("done"); | |
| await getReplyFromConfig( | |
| { | |
| Body: "hello", | |
| From: "+1004", | |
| To: "+2000", | |
| Provider: "whatsapp", | |
| SenderE164: "+1004", | |
| }, | |
| {}, | |
| makeWhatsAppDirectiveConfig( | |
| home, | |
| { model: { primary: "anthropic/claude-opus-4-5" } }, | |
| { | |
| tools: { | |
| elevated: { | |
| allowFrom: { whatsapp: ["+1004"] }, | |
| }, | |
| }, | |
| }, | |
| ), | |
| ); | |
| expect(runEmbeddedPiAgent).toHaveBeenCalledOnce(); | |
| const call = vi.mocked(runEmbeddedPiAgent).mock.calls[0]?.[0]; | |
| expect(call?.bashElevated).toEqual({ | |
| enabled: true, | |
| allowed: true, | |
| defaultLevel: "on", | |
| }); | |
| }); | |
| }); | |
| it("persists /reasoning off on discord even when model defaults reasoning on", async () => { | |
| await withTempHome(async (home) => { | |
| const storePath = sessionStorePath(home); | |
| mockEmbeddedTextResult("done"); | |
| vi.mocked(loadModelCatalog).mockResolvedValue([ | |
| { | |
| id: "x-ai/grok-4.1-fast", | |
| name: "Grok 4.1 Fast", | |
| provider: "openrouter", | |
| reasoning: true, | |
| }, | |
| ]); | |
| const config = makeWhatsAppDirectiveConfig( | |
| home, | |
| { | |
| model: "openrouter/x-ai/grok-4.1-fast", | |
| }, | |
| { | |
| channels: { | |
| discord: { allowFrom: ["*"] }, | |
| }, | |
| session: { store: storePath }, | |
| }, | |
| ); | |
| const offRes = await getReplyFromConfig( | |
| { | |
| Body: "/reasoning off", | |
| From: "discord:user:1004", | |
| To: "channel:general", | |
| Provider: "discord", | |
| Surface: "discord", | |
| CommandSource: "text", | |
| CommandAuthorized: true, | |
| }, | |
| {}, | |
| config, | |
| ); | |
| expect(replyText(offRes)).toContain("Reasoning visibility disabled."); | |
| const store = loadSessionStore(storePath); | |
| const entry = Object.values(store)[0]; | |
| expect(entry?.reasoningLevel).toBe("off"); | |
| await getReplyFromConfig( | |
| { | |
| Body: "hello", | |
| From: "discord:user:1004", | |
| To: "channel:general", | |
| Provider: "discord", | |
| Surface: "discord", | |
| CommandSource: "text", | |
| CommandAuthorized: true, | |
| }, | |
| {}, | |
| config, | |
| ); | |
| expect(runEmbeddedPiAgent).toHaveBeenCalledOnce(); | |
| const call = vi.mocked(runEmbeddedPiAgent).mock.calls[0]?.[0]; | |
| expect(call?.reasoningLevel).toBe("off"); | |
| }); | |
| }); | |
| it("handles reply_to_current tags and explicit reply_to precedence", async () => { | |
| await withTempHome(async (home) => { | |
| for (const replyTag of ["[[reply_to_current]]", "[[ reply_to_current ]]"]) { | |
| const payload = await runReplyToCurrentCase(home, `hello ${replyTag}`); | |
| expect(payload?.text).toBe("hello"); | |
| expect(payload?.replyToId).toBe("msg-123"); | |
| } | |
| vi.mocked(runEmbeddedPiAgent).mockResolvedValue( | |
| makeEmbeddedTextResult("hi [[reply_to_current]] [[reply_to:abc-456]]"), | |
| ); | |
| const res = await getReplyFromConfig( | |
| { | |
| Body: "ping", | |
| From: "+1004", | |
| To: "+2000", | |
| MessageSid: "msg-123", | |
| }, | |
| {}, | |
| makeWhatsAppDirectiveConfig(home, { model: { primary: "anthropic/claude-opus-4-5" } }), | |
| ); | |
| const payload = Array.isArray(res) ? res[0] : res; | |
| expect(payload?.text).toBe("hi"); | |
| expect(payload?.replyToId).toBe("abc-456"); | |
| }); | |
| }); | |
| }); | |