import { completeSimple } from "@mariozechner/pi-ai"; import { describe, expect, it, vi, beforeEach } from "vitest"; import { getApiKeyForModel } from "../agents/model-auth.js"; import { resolveModel } from "../agents/pi-embedded-runner/model.js"; import * as tts from "./tts.js"; vi.mock("@mariozechner/pi-ai", () => ({ completeSimple: vi.fn(), })); vi.mock("../agents/pi-embedded-runner/model.js", () => ({ resolveModel: vi.fn((provider: string, modelId: string) => ({ model: { provider, id: modelId, name: modelId, api: "openai-completions", reasoning: false, input: ["text"], cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, contextWindow: 128000, maxTokens: 8192, }, authStorage: { profiles: {} }, modelRegistry: { find: vi.fn() }, })), })); vi.mock("../agents/model-auth.js", () => ({ getApiKeyForModel: vi.fn(async () => ({ apiKey: "test-api-key", source: "test", mode: "api-key", })), requireApiKey: vi.fn((auth: { apiKey?: string }) => auth.apiKey ?? ""), })); const { _test, resolveTtsConfig, maybeApplyTtsToPayload, getTtsProvider } = tts; const { isValidVoiceId, isValidOpenAIVoice, isValidOpenAIModel, OPENAI_TTS_MODELS, OPENAI_TTS_VOICES, parseTtsDirectives, resolveModelOverridePolicy, summarizeText, resolveOutputFormat, resolveEdgeOutputFormat, } = _test; describe("tts", () => { beforeEach(() => { vi.clearAllMocks(); vi.mocked(completeSimple).mockResolvedValue({ content: [{ type: "text", text: "Summary" }], }); }); describe("isValidVoiceId", () => { it("accepts valid ElevenLabs voice IDs", () => { expect(isValidVoiceId("pMsXgVXv3BLzUgSXRplE")).toBe(true); expect(isValidVoiceId("21m00Tcm4TlvDq8ikWAM")).toBe(true); expect(isValidVoiceId("EXAVITQu4vr4xnSDxMaL")).toBe(true); }); it("accepts voice IDs of varying valid lengths", () => { expect(isValidVoiceId("a1b2c3d4e5")).toBe(true); expect(isValidVoiceId("a".repeat(40))).toBe(true); }); it("rejects too short voice IDs", () => { expect(isValidVoiceId("")).toBe(false); expect(isValidVoiceId("abc")).toBe(false); expect(isValidVoiceId("123456789")).toBe(false); }); it("rejects too long voice IDs", () => { expect(isValidVoiceId("a".repeat(41))).toBe(false); expect(isValidVoiceId("a".repeat(100))).toBe(false); }); it("rejects voice IDs with invalid characters", () => { expect(isValidVoiceId("pMsXgVXv3BLz-gSXRplE")).toBe(false); expect(isValidVoiceId("pMsXgVXv3BLz_gSXRplE")).toBe(false); expect(isValidVoiceId("pMsXgVXv3BLz gSXRplE")).toBe(false); expect(isValidVoiceId("../../../etc/passwd")).toBe(false); expect(isValidVoiceId("voice?param=value")).toBe(false); }); }); describe("isValidOpenAIVoice", () => { it("accepts all valid OpenAI voices", () => { for (const voice of OPENAI_TTS_VOICES) { expect(isValidOpenAIVoice(voice)).toBe(true); } }); it("rejects invalid voice names", () => { expect(isValidOpenAIVoice("invalid")).toBe(false); expect(isValidOpenAIVoice("")).toBe(false); expect(isValidOpenAIVoice("ALLOY")).toBe(false); expect(isValidOpenAIVoice("alloy ")).toBe(false); expect(isValidOpenAIVoice(" alloy")).toBe(false); }); }); describe("isValidOpenAIModel", () => { it("accepts supported models", () => { expect(isValidOpenAIModel("gpt-4o-mini-tts")).toBe(true); expect(isValidOpenAIModel("tts-1")).toBe(true); expect(isValidOpenAIModel("tts-1-hd")).toBe(true); }); it("rejects unsupported models", () => { expect(isValidOpenAIModel("invalid")).toBe(false); expect(isValidOpenAIModel("")).toBe(false); expect(isValidOpenAIModel("gpt-4")).toBe(false); }); }); describe("OPENAI_TTS_MODELS", () => { it("contains supported models", () => { expect(OPENAI_TTS_MODELS).toContain("gpt-4o-mini-tts"); expect(OPENAI_TTS_MODELS).toContain("tts-1"); expect(OPENAI_TTS_MODELS).toContain("tts-1-hd"); expect(OPENAI_TTS_MODELS).toHaveLength(3); }); it("is a non-empty array", () => { expect(Array.isArray(OPENAI_TTS_MODELS)).toBe(true); expect(OPENAI_TTS_MODELS.length).toBeGreaterThan(0); }); }); describe("resolveOutputFormat", () => { it("uses Opus for Telegram", () => { const output = resolveOutputFormat("telegram"); expect(output.openai).toBe("opus"); expect(output.elevenlabs).toBe("opus_48000_64"); expect(output.extension).toBe(".opus"); expect(output.voiceCompatible).toBe(true); }); it("uses MP3 for other channels", () => { const output = resolveOutputFormat("discord"); expect(output.openai).toBe("mp3"); expect(output.elevenlabs).toBe("mp3_44100_128"); expect(output.extension).toBe(".mp3"); expect(output.voiceCompatible).toBe(false); }); }); describe("resolveEdgeOutputFormat", () => { const baseCfg = { agents: { defaults: { model: { primary: "openai/gpt-4o-mini" } } }, messages: { tts: {} }, }; it("uses default output format when edge output format is not configured", () => { const config = resolveTtsConfig(baseCfg); expect(resolveEdgeOutputFormat(config)).toBe("audio-24khz-48kbitrate-mono-mp3"); }); it("uses configured output format when provided", () => { const config = resolveTtsConfig({ ...baseCfg, messages: { tts: { edge: { outputFormat: "audio-24khz-96kbitrate-mono-mp3" }, }, }, }); expect(resolveEdgeOutputFormat(config)).toBe("audio-24khz-96kbitrate-mono-mp3"); }); }); describe("parseTtsDirectives", () => { it("extracts overrides and strips directives when enabled", () => { const policy = resolveModelOverridePolicy({ enabled: true }); const input = "Hello [[tts:provider=elevenlabs voiceId=pMsXgVXv3BLzUgSXRplE stability=0.4 speed=1.1]] world\n\n" + "[[tts:text]](laughs) Read the song once more.[[/tts:text]]"; const result = parseTtsDirectives(input, policy); expect(result.cleanedText).not.toContain("[[tts:"); expect(result.ttsText).toBe("(laughs) Read the song once more."); expect(result.overrides.provider).toBe("elevenlabs"); expect(result.overrides.elevenlabs?.voiceId).toBe("pMsXgVXv3BLzUgSXRplE"); expect(result.overrides.elevenlabs?.voiceSettings?.stability).toBe(0.4); expect(result.overrides.elevenlabs?.voiceSettings?.speed).toBe(1.1); }); it("accepts edge as provider override", () => { const policy = resolveModelOverridePolicy({ enabled: true }); const input = "Hello [[tts:provider=edge]] world"; const result = parseTtsDirectives(input, policy); expect(result.overrides.provider).toBe("edge"); }); it("keeps text intact when overrides are disabled", () => { const policy = resolveModelOverridePolicy({ enabled: false }); const input = "Hello [[tts:voice=alloy]] world"; const result = parseTtsDirectives(input, policy); expect(result.cleanedText).toBe(input); expect(result.overrides.provider).toBeUndefined(); }); }); describe("summarizeText", () => { const baseCfg = { agents: { defaults: { model: { primary: "openai/gpt-4o-mini" } } }, messages: { tts: {} }, }; const baseConfig = resolveTtsConfig(baseCfg); it("summarizes text and returns result with metrics", async () => { const mockSummary = "This is a summarized version of the text."; vi.mocked(completeSimple).mockResolvedValue({ content: [{ type: "text", text: mockSummary }], }); const longText = "A".repeat(2000); const result = await summarizeText({ text: longText, targetLength: 1500, cfg: baseCfg, config: baseConfig, timeoutMs: 30_000, }); expect(result.summary).toBe(mockSummary); expect(result.inputLength).toBe(2000); expect(result.outputLength).toBe(mockSummary.length); expect(result.latencyMs).toBeGreaterThanOrEqual(0); expect(completeSimple).toHaveBeenCalledTimes(1); }); it("calls the summary model with the expected parameters", async () => { await summarizeText({ text: "Long text to summarize", targetLength: 500, cfg: baseCfg, config: baseConfig, timeoutMs: 30_000, }); const callArgs = vi.mocked(completeSimple).mock.calls[0]; expect(callArgs?.[1]?.messages?.[0]?.role).toBe("user"); expect(callArgs?.[2]?.maxTokens).toBe(250); expect(callArgs?.[2]?.temperature).toBe(0.3); expect(getApiKeyForModel).toHaveBeenCalledTimes(1); }); it("uses summaryModel override when configured", async () => { const cfg = { agents: { defaults: { model: { primary: "anthropic/claude-opus-4-5" } } }, messages: { tts: { summaryModel: "openai/gpt-4.1-mini" } }, }; const config = resolveTtsConfig(cfg); await summarizeText({ text: "Long text to summarize", targetLength: 500, cfg, config, timeoutMs: 30_000, }); expect(resolveModel).toHaveBeenCalledWith("openai", "gpt-4.1-mini", undefined, cfg); }); it("rejects targetLength below minimum (100)", async () => { await expect( summarizeText({ text: "text", targetLength: 99, cfg: baseCfg, config: baseConfig, timeoutMs: 30_000, }), ).rejects.toThrow("Invalid targetLength: 99"); }); it("rejects targetLength above maximum (10000)", async () => { await expect( summarizeText({ text: "text", targetLength: 10001, cfg: baseCfg, config: baseConfig, timeoutMs: 30_000, }), ).rejects.toThrow("Invalid targetLength: 10001"); }); it("accepts targetLength at boundaries", async () => { await expect( summarizeText({ text: "text", targetLength: 100, cfg: baseCfg, config: baseConfig, timeoutMs: 30_000, }), ).resolves.toBeDefined(); await expect( summarizeText({ text: "text", targetLength: 10000, cfg: baseCfg, config: baseConfig, timeoutMs: 30_000, }), ).resolves.toBeDefined(); }); it("throws error when no summary is returned", async () => { vi.mocked(completeSimple).mockResolvedValue({ content: [], }); await expect( summarizeText({ text: "text", targetLength: 500, cfg: baseCfg, config: baseConfig, timeoutMs: 30_000, }), ).rejects.toThrow("No summary returned"); }); it("throws error when summary content is empty", async () => { vi.mocked(completeSimple).mockResolvedValue({ content: [{ type: "text", text: " " }], }); await expect( summarizeText({ text: "text", targetLength: 500, cfg: baseCfg, config: baseConfig, timeoutMs: 30_000, }), ).rejects.toThrow("No summary returned"); }); }); describe("getTtsProvider", () => { const baseCfg = { agents: { defaults: { model: { primary: "openai/gpt-4o-mini" } } }, messages: { tts: {} }, }; const restoreEnv = (snapshot: Record) => { const keys = ["OPENAI_API_KEY", "ELEVENLABS_API_KEY", "XI_API_KEY"] as const; for (const key of keys) { const value = snapshot[key]; if (value === undefined) { delete process.env[key]; } else { process.env[key] = value; } } }; const withEnv = (env: Record, run: () => void) => { const snapshot = { OPENAI_API_KEY: process.env.OPENAI_API_KEY, ELEVENLABS_API_KEY: process.env.ELEVENLABS_API_KEY, XI_API_KEY: process.env.XI_API_KEY, }; try { for (const [key, value] of Object.entries(env)) { if (value === undefined) { delete process.env[key]; } else { process.env[key] = value; } } run(); } finally { restoreEnv(snapshot); } }; it("prefers OpenAI when no provider is configured and API key exists", () => { withEnv( { OPENAI_API_KEY: "test-openai-key", ELEVENLABS_API_KEY: undefined, XI_API_KEY: undefined, }, () => { const config = resolveTtsConfig(baseCfg); const provider = getTtsProvider(config, "/tmp/tts-prefs-openai.json"); expect(provider).toBe("openai"); }, ); }); it("prefers ElevenLabs when OpenAI is missing and ElevenLabs key exists", () => { withEnv( { OPENAI_API_KEY: undefined, ELEVENLABS_API_KEY: "test-elevenlabs-key", XI_API_KEY: undefined, }, () => { const config = resolveTtsConfig(baseCfg); const provider = getTtsProvider(config, "/tmp/tts-prefs-elevenlabs.json"); expect(provider).toBe("elevenlabs"); }, ); }); it("falls back to Edge when no API keys are present", () => { withEnv( { OPENAI_API_KEY: undefined, ELEVENLABS_API_KEY: undefined, XI_API_KEY: undefined, }, () => { const config = resolveTtsConfig(baseCfg); const provider = getTtsProvider(config, "/tmp/tts-prefs-edge.json"); expect(provider).toBe("edge"); }, ); }); }); describe("maybeApplyTtsToPayload", () => { const baseCfg = { agents: { defaults: { model: { primary: "openai/gpt-4o-mini" } } }, messages: { tts: { auto: "inbound", provider: "openai", openai: { apiKey: "test-key", model: "gpt-4o-mini-tts", voice: "alloy" }, }, }, }; it("skips auto-TTS when inbound audio gating is on and the message is not audio", async () => { const prevPrefs = process.env.OPENCLAW_TTS_PREFS; process.env.OPENCLAW_TTS_PREFS = `/tmp/tts-test-${Date.now()}.json`; const originalFetch = globalThis.fetch; const fetchMock = vi.fn(async () => ({ ok: true, arrayBuffer: async () => new ArrayBuffer(1), })); globalThis.fetch = fetchMock as unknown as typeof fetch; const payload = { text: "Hello world" }; const result = await maybeApplyTtsToPayload({ payload, cfg: baseCfg, kind: "final", inboundAudio: false, }); expect(result).toBe(payload); expect(fetchMock).not.toHaveBeenCalled(); globalThis.fetch = originalFetch; process.env.OPENCLAW_TTS_PREFS = prevPrefs; }); it("attempts auto-TTS when inbound audio gating is on and the message is audio", async () => { const prevPrefs = process.env.OPENCLAW_TTS_PREFS; process.env.OPENCLAW_TTS_PREFS = `/tmp/tts-test-${Date.now()}.json`; const originalFetch = globalThis.fetch; const fetchMock = vi.fn(async () => ({ ok: true, arrayBuffer: async () => new ArrayBuffer(1), })); globalThis.fetch = fetchMock as unknown as typeof fetch; const result = await maybeApplyTtsToPayload({ payload: { text: "Hello world" }, cfg: baseCfg, kind: "final", inboundAudio: true, }); expect(result.mediaUrl).toBeDefined(); expect(fetchMock).toHaveBeenCalledTimes(1); globalThis.fetch = originalFetch; process.env.OPENCLAW_TTS_PREFS = prevPrefs; }); it("skips auto-TTS in tagged mode unless a tts tag is present", async () => { const prevPrefs = process.env.OPENCLAW_TTS_PREFS; process.env.OPENCLAW_TTS_PREFS = `/tmp/tts-test-${Date.now()}.json`; const originalFetch = globalThis.fetch; const fetchMock = vi.fn(async () => ({ ok: true, arrayBuffer: async () => new ArrayBuffer(1), })); globalThis.fetch = fetchMock as unknown as typeof fetch; const cfg = { ...baseCfg, messages: { ...baseCfg.messages, tts: { ...baseCfg.messages.tts, auto: "tagged" }, }, }; const payload = { text: "Hello world" }; const result = await maybeApplyTtsToPayload({ payload, cfg, kind: "final", }); expect(result).toBe(payload); expect(fetchMock).not.toHaveBeenCalled(); globalThis.fetch = originalFetch; process.env.OPENCLAW_TTS_PREFS = prevPrefs; }); it("runs auto-TTS in tagged mode when tags are present", async () => { const prevPrefs = process.env.OPENCLAW_TTS_PREFS; process.env.OPENCLAW_TTS_PREFS = `/tmp/tts-test-${Date.now()}.json`; const originalFetch = globalThis.fetch; const fetchMock = vi.fn(async () => ({ ok: true, arrayBuffer: async () => new ArrayBuffer(1), })); globalThis.fetch = fetchMock as unknown as typeof fetch; const cfg = { ...baseCfg, messages: { ...baseCfg.messages, tts: { ...baseCfg.messages.tts, auto: "tagged" }, }, }; const result = await maybeApplyTtsToPayload({ payload: { text: "[[tts:text]]Hello world[[/tts:text]]" }, cfg, kind: "final", }); expect(result.mediaUrl).toBeDefined(); expect(fetchMock).toHaveBeenCalledTimes(1); globalThis.fetch = originalFetch; process.env.OPENCLAW_TTS_PREFS = prevPrefs; }); }); });