openskynet / src /auto-reply /reply.directive.directive-behavior.defaults-think-low-reasoning-capable-models-no.test.ts

Mirror OpenSkyNet workspace snapshot from Git HEAD

fc93158 verified 17 days ago

13.7 kB

	import "./reply.directive.directive-behavior.e2e-mocks.js";
	import { describe, expect, it, vi } from "vitest";
	import { loadSessionStore } from "../config/sessions.js";
	import {
	assertModelSelection,
	installDirectiveBehaviorE2EHooks,
	loadModelCatalog,
	makeEmbeddedTextResult,
	makeWhatsAppDirectiveConfig,
	mockEmbeddedTextResult,
	replyText,
	replyTexts,
	runEmbeddedPiAgent,
	sessionStorePath,
	withTempHome,
	} from "./reply.directive.directive-behavior.e2e-harness.js";
	import { runModelDirectiveText } from "./reply.directive.directive-behavior.model-directive-test-utils.js";
	import { getReplyFromConfig } from "./reply.js";

	function makeDefaultModelConfig(home: string) {
	return makeWhatsAppDirectiveConfig(home, {
	model: { primary: "anthropic/claude-opus-4-5" },
	models: {
	"anthropic/claude-opus-4-5": {},
	"openai/gpt-4.1-mini": {},
	},
	});
	}

	async function runReplyToCurrentCase(home: string, text: string) {
	vi.mocked(runEmbeddedPiAgent).mockResolvedValue(makeEmbeddedTextResult(text));

	const res = await getReplyFromConfig(
	{
	Body: "ping",
	From: "+1004",
	To: "+2000",
	MessageSid: "msg-123",
	},
	{},
	makeWhatsAppDirectiveConfig(home, { model: "anthropic/claude-opus-4-5" }),
	);

	return Array.isArray(res) ? res[0] : res;
	}

	async function expectThinkStatusForReasoningModel(params: {
	home: string;
	reasoning: boolean;
	expectedLevel: "low" \| "off";
	}): Promise<void> {
	vi.mocked(loadModelCatalog).mockResolvedValueOnce([
	{
	id: "claude-opus-4-5",
	name: "Opus 4.5",
	provider: "anthropic",
	reasoning: params.reasoning,
	},
	]);

	const res = await getReplyFromConfig(
	{ Body: "/think", From: "+1222", To: "+1222", CommandAuthorized: true },
	{},
	makeWhatsAppDirectiveConfig(params.home, { model: "anthropic/claude-opus-4-5" }),
	);

	const text = replyText(res);
	expect(text).toContain(`Current thinking level: ${params.expectedLevel}`);
	expect(text).toContain("Options: off, minimal, low, medium, high, adaptive.");
	}

	function mockReasoningCapableCatalog() {
	vi.mocked(loadModelCatalog).mockResolvedValueOnce([
	{
	id: "claude-opus-4-5",
	name: "Opus 4.5",
	provider: "anthropic",
	reasoning: true,
	},
	]);
	}

	async function runReasoningDefaultCase(params: {
	home: string;
	expectedThinkLevel: "low" \| "off";
	expectedReasoningLevel: "off" \| "on";
	thinkingDefault?: "off" \| "low" \| "medium" \| "high";
	}) {
	vi.mocked(runEmbeddedPiAgent).mockClear();
	mockEmbeddedTextResult("done");
	mockReasoningCapableCatalog();

	await getReplyFromConfig(
	{
	Body: "hello",
	From: "+1004",
	To: "+2000",
	},
	{},
	makeWhatsAppDirectiveConfig(params.home, {
	model: { primary: "anthropic/claude-opus-4-5" },
	...(params.thinkingDefault ? { thinkingDefault: params.thinkingDefault } : {}),
	}),
	);

	expect(runEmbeddedPiAgent).toHaveBeenCalledOnce();
	const call = vi.mocked(runEmbeddedPiAgent).mock.calls[0]?.[0];
	expect(call?.thinkLevel).toBe(params.expectedThinkLevel);
	expect(call?.reasoningLevel).toBe(params.expectedReasoningLevel);
	}

	describe("directive behavior", () => {
	installDirectiveBehaviorE2EHooks();

	it("covers /think status and reasoning defaults for reasoning and non-reasoning models", async () => {
	await withTempHome(async (home) => {
	await expectThinkStatusForReasoningModel({
	home,
	reasoning: true,
	expectedLevel: "low",
	});
	await expectThinkStatusForReasoningModel({
	home,
	reasoning: false,
	expectedLevel: "off",
	});
	expect(runEmbeddedPiAgent).not.toHaveBeenCalled();

	vi.mocked(runEmbeddedPiAgent).mockClear();

	for (const scenario of [
	{
	expectedThinkLevel: "low" as const,
	expectedReasoningLevel: "off" as const,
	},
	{
	expectedThinkLevel: "off" as const,
	expectedReasoningLevel: "on" as const,
	thinkingDefault: "off" as const,
	},
	]) {
	await runReasoningDefaultCase({
	home,
	...scenario,
	});
	}
	});
	});
	it("renders model list and status variants across catalog/config combinations", async () => {
	await withTempHome(async (home) => {
	const aliasText = await runModelDirectiveText(home, "/model list");
	expect(aliasText).toContain("Providers:");
	expect(aliasText).toContain("- anthropic");
	expect(aliasText).toContain("- openai");
	expect(aliasText).toContain("Use: /models <provider>");
	expect(aliasText).toContain("Switch: /model <provider/model>");

	vi.mocked(loadModelCatalog).mockResolvedValueOnce([]);
	const unavailableCatalogText = await runModelDirectiveText(home, "/model");
	expect(unavailableCatalogText).toContain("Current: anthropic/claude-opus-4-5");
	expect(unavailableCatalogText).toContain("Switch: /model <provider/model>");
	expect(unavailableCatalogText).toContain(
	"Browse: /models (providers) or /models <provider> (models)",
	);
	expect(unavailableCatalogText).toContain("More: /model status");

	const allowlistedStatusText = await runModelDirectiveText(home, "/model status", {
	includeSessionStore: false,
	});
	expect(allowlistedStatusText).toContain("anthropic/claude-opus-4-5");
	expect(allowlistedStatusText).toContain("openai/gpt-4.1-mini");
	expect(allowlistedStatusText).not.toContain("claude-sonnet-4-1");
	expect(allowlistedStatusText).toContain("auth:");

	vi.mocked(loadModelCatalog).mockResolvedValue([
	{ id: "claude-opus-4-5", name: "Opus 4.5", provider: "anthropic" },
	{ id: "gpt-4.1-mini", name: "GPT-4.1 Mini", provider: "openai" },
	{ id: "grok-4", name: "Grok 4", provider: "xai" },
	]);
	const noAllowlistText = await runModelDirectiveText(home, "/model list", {
	defaults: {
	model: {
	primary: "anthropic/claude-opus-4-5",
	fallbacks: ["openai/gpt-4.1-mini"],
	},
	imageModel: { primary: "minimax/MiniMax-M2.5" },
	models: undefined,
	},
	});
	expect(noAllowlistText).toContain("Providers:");
	expect(noAllowlistText).toContain("- anthropic");
	expect(noAllowlistText).toContain("- openai");
	expect(noAllowlistText).toContain("- xai");
	expect(noAllowlistText).toContain("Use: /models <provider>");

	vi.mocked(loadModelCatalog).mockResolvedValueOnce([
	{
	provider: "anthropic",
	id: "claude-opus-4-5",
	name: "Claude Opus 4.5",
	},
	{ provider: "openai", id: "gpt-4.1-mini", name: "GPT-4.1 mini" },
	]);
	const configOnlyProviderText = await runModelDirectiveText(home, "/models minimax", {
	defaults: {
	models: {
	"anthropic/claude-opus-4-5": {},
	"openai/gpt-4.1-mini": {},
	"minimax/MiniMax-M2.5": { alias: "minimax" },
	},
	},
	extra: {
	models: {
	mode: "merge",
	providers: {
	minimax: {
	baseUrl: "https://api.minimax.io/anthropic",
	api: "anthropic-messages",
	models: [{ id: "MiniMax-M2.5", name: "MiniMax M2.5" }],
	},
	},
	},
	},
	});
	expect(configOnlyProviderText).toContain("Models (minimax");
	expect(configOnlyProviderText).toContain("minimax/MiniMax-M2.5");

	const missingAuthText = await runModelDirectiveText(home, "/model list", {
	defaults: {
	models: {
	"anthropic/claude-opus-4-5": {},
	},
	},
	});
	expect(missingAuthText).toContain("Providers:");
	expect(missingAuthText).not.toContain("missing (missing)");
	expect(runEmbeddedPiAgent).not.toHaveBeenCalled();
	});
	});
	it("sets model override on /model directive", async () => {
	await withTempHome(async (home) => {
	const storePath = sessionStorePath(home);

	await getReplyFromConfig(
	{ Body: "/model openai/gpt-4.1-mini", From: "+1222", To: "+1222", CommandAuthorized: true },
	{},
	makeWhatsAppDirectiveConfig(
	home,
	{
	model: { primary: "anthropic/claude-opus-4-5" },
	models: {
	"anthropic/claude-opus-4-5": {},
	"openai/gpt-4.1-mini": {},
	},
	},
	{ session: { store: storePath } },
	),
	);

	assertModelSelection(storePath, {
	model: "gpt-4.1-mini",
	provider: "openai",
	});
	expect(runEmbeddedPiAgent).not.toHaveBeenCalled();
	});
	});
	it("ignores inline /model and /think directives while still running agent content", async () => {
	await withTempHome(async (home) => {
	mockEmbeddedTextResult("done");

	const inlineModelRes = await getReplyFromConfig(
	{
	Body: "please sync /model openai/gpt-4.1-mini now",
	From: "+1004",
	To: "+2000",
	},
	{},
	makeDefaultModelConfig(home),
	);

	const texts = replyTexts(inlineModelRes);
	expect(texts).toContain("done");
	expect(runEmbeddedPiAgent).toHaveBeenCalledOnce();
	const call = vi.mocked(runEmbeddedPiAgent).mock.calls[0]?.[0];
	expect(call?.provider).toBe("anthropic");
	expect(call?.model).toBe("claude-opus-4-5");
	vi.mocked(runEmbeddedPiAgent).mockClear();

	mockEmbeddedTextResult("done");
	const inlineThinkRes = await getReplyFromConfig(
	{
	Body: "please sync /think:high now",
	From: "+1004",
	To: "+2000",
	},
	{},
	makeWhatsAppDirectiveConfig(home, { model: { primary: "anthropic/claude-opus-4-5" } }),
	);

	expect(replyTexts(inlineThinkRes)).toContain("done");
	expect(runEmbeddedPiAgent).toHaveBeenCalledOnce();
	});
	});
	it("passes elevated defaults when sender is approved", async () => {
	await withTempHome(async (home) => {
	mockEmbeddedTextResult("done");

	await getReplyFromConfig(
	{
	Body: "hello",
	From: "+1004",
	To: "+2000",
	Provider: "whatsapp",
	SenderE164: "+1004",
	},
	{},
	makeWhatsAppDirectiveConfig(
	home,
	{ model: { primary: "anthropic/claude-opus-4-5" } },
	{
	tools: {
	elevated: {
	allowFrom: { whatsapp: ["+1004"] },
	},
	},
	},
	),
	);

	expect(runEmbeddedPiAgent).toHaveBeenCalledOnce();
	const call = vi.mocked(runEmbeddedPiAgent).mock.calls[0]?.[0];
	expect(call?.bashElevated).toEqual({
	enabled: true,
	allowed: true,
	defaultLevel: "on",
	});
	});
	});
	it("persists /reasoning off on discord even when model defaults reasoning on", async () => {
	await withTempHome(async (home) => {
	const storePath = sessionStorePath(home);
	mockEmbeddedTextResult("done");
	vi.mocked(loadModelCatalog).mockResolvedValue([
	{
	id: "x-ai/grok-4.1-fast",
	name: "Grok 4.1 Fast",
	provider: "openrouter",
	reasoning: true,
	},
	]);

	const config = makeWhatsAppDirectiveConfig(
	home,
	{
	model: "openrouter/x-ai/grok-4.1-fast",
	},
	{
	channels: {
	discord: { allowFrom: ["*"] },
	},
	session: { store: storePath },
	},
	);

	const offRes = await getReplyFromConfig(
	{
	Body: "/reasoning off",
	From: "discord:user:1004",
	To: "channel:general",
	Provider: "discord",
	Surface: "discord",
	CommandSource: "text",
	CommandAuthorized: true,
	},
	{},
	config,
	);
	expect(replyText(offRes)).toContain("Reasoning visibility disabled.");

	const store = loadSessionStore(storePath);
	const entry = Object.values(store)[0];
	expect(entry?.reasoningLevel).toBe("off");

	await getReplyFromConfig(
	{
	Body: "hello",
	From: "discord:user:1004",
	To: "channel:general",
	Provider: "discord",
	Surface: "discord",
	CommandSource: "text",
	CommandAuthorized: true,
	},
	{},
	config,
	);

	expect(runEmbeddedPiAgent).toHaveBeenCalledOnce();
	const call = vi.mocked(runEmbeddedPiAgent).mock.calls[0]?.[0];
	expect(call?.reasoningLevel).toBe("off");
	});
	});
	it("handles reply_to_current tags and explicit reply_to precedence", async () => {
	await withTempHome(async (home) => {
	for (const replyTag of ["[[reply_to_current]]", "[[ reply_to_current ]]"]) {
	const payload = await runReplyToCurrentCase(home, `hello ${replyTag}`);
	expect(payload?.text).toBe("hello");
	expect(payload?.replyToId).toBe("msg-123");
	}

	vi.mocked(runEmbeddedPiAgent).mockResolvedValue(
	makeEmbeddedTextResult("hi [[reply_to_current]] [[reply_to:abc-456]]"),
	);

	const res = await getReplyFromConfig(
	{
	Body: "ping",
	From: "+1004",
	To: "+2000",
	MessageSid: "msg-123",
	},
	{},
	makeWhatsAppDirectiveConfig(home, { model: { primary: "anthropic/claude-opus-4-5" } }),
	);

	const payload = Array.isArray(res) ? res[0] : res;
	expect(payload?.text).toBe("hi");
	expect(payload?.replyToId).toBe("abc-456");
	});
	});
	});