| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import { Content, GenerativeContentBlob, Part } from "@google/generative-ai"; |
| import { EventEmitter } from "eventemitter3"; |
| import { difference } from "lodash"; |
| import { |
| ClientContentMessage, |
| isInterrupted, |
| isModelTurn, |
| isServerContentMessage, |
| isSetupCompleteMessage, |
| isToolCallCancellationMessage, |
| isToolCallMessage, |
| isTurnComplete, |
| LiveIncomingMessage, |
| ModelTurn, |
| RealtimeInputMessage, |
| ServerContent, |
| SetupMessage, |
| StreamingLog, |
| ToolCall, |
| ToolCallCancellation, |
| ToolResponseMessage, |
| type LiveConfig, |
| } from "../multimodal-live-types"; |
| import { blobToJSON, base64ToArrayBuffer } from "./utils"; |
|
|
| |
| |
| |
| interface MultimodalLiveClientEventTypes { |
| open: () => void; |
| log: (log: StreamingLog) => void; |
| close: (event: CloseEvent) => void; |
| audio: (data: ArrayBuffer) => void; |
| content: (data: ServerContent) => void; |
| interrupted: () => void; |
| setupcomplete: () => void; |
| turncomplete: () => void; |
| toolcall: (toolCall: ToolCall) => void; |
| toolcallcancellation: (toolcallCancellation: ToolCallCancellation) => void; |
| } |
|
|
| export type MultimodalLiveAPIClientConnection = { |
| url?: string; |
| apiKey?: string; |
| }; |
|
|
| |
| |
| |
| |
| |
| export class MultimodalLiveClient extends EventEmitter<MultimodalLiveClientEventTypes> { |
| public ws: WebSocket | null = null; |
| protected config: LiveConfig | null = null; |
| public url: string; |
|
|
| constructor({ url, apiKey }: MultimodalLiveAPIClientConnection = {}) { |
| super(); |
| console.log('🔧 Initializing MultimodalLiveClient with URL:', url || `${window.location.protocol === 'https:' ? 'wss:' : 'ws:'}//${window.location.host}/ws`); |
| this.url = url || `${window.location.protocol === 'https:' ? 'wss:' : 'ws:'}//${window.location.host}/ws`; |
| this.send = this.send.bind(this); |
| } |
|
|
| log(type: string, message: StreamingLog["message"]) { |
| const log: StreamingLog = { |
| date: new Date(), |
| type, |
| message, |
| }; |
| this.emit("log", log); |
| } |
|
|
| connect(config: LiveConfig): Promise<boolean> { |
| console.log('🔌 Attempting WebSocket connection to:', this.url); |
| this.config = config; |
| console.log('🔗 MultimodalLiveClient: Starting WebSocket connection to:', this.url); |
|
|
| const ws = new WebSocket(this.url); |
|
|
| ws.addEventListener("message", async (evt: MessageEvent) => { |
| console.log('📨 Received WebSocket message:', evt.data instanceof Blob ? 'Blob data' : evt.data); |
| if (evt.data instanceof Blob) { |
| console.log('📩 MultimodalLiveClient: Received blob message'); |
| this.receive(evt.data); |
| } else { |
| console.log("non blob message", evt); |
| } |
| }); |
| return new Promise((resolve, reject) => { |
| const onError = (ev: Event) => { |
| this.disconnect(ws); |
| const message = `Could not connect to "${this.url}"`; |
| this.log(`server.${ev.type}`, message); |
| reject(new Error(message)); |
| }; |
| ws.addEventListener("error", onError); |
| ws.addEventListener("open", (ev: Event) => { |
| console.log('✅ WebSocket connection opened successfully'); |
| if (!this.config) { |
| reject("Invalid config sent to `connect(config)`"); |
| return; |
| } |
| console.log('✨ MultimodalLiveClient: WebSocket connection established'); |
| this.log(`client.${ev.type}`, `connected to socket`); |
| this.emit("open"); |
|
|
| this.ws = ws; |
|
|
| const setupMessage: SetupMessage = { |
| setup: this.config, |
| }; |
| this._sendDirect(setupMessage); |
| this.log("client.send", "setup"); |
|
|
| ws.removeEventListener("error", onError); |
| ws.addEventListener("close", (ev: CloseEvent) => { |
| console.log(ev); |
| this.disconnect(ws); |
| let reason = ev.reason || ""; |
| if (reason.toLowerCase().includes("error")) { |
| const prelude = "ERROR]"; |
| const preludeIndex = reason.indexOf(prelude); |
| if (preludeIndex > 0) { |
| reason = reason.slice( |
| preludeIndex + prelude.length + 1, |
| Infinity, |
| ); |
| } |
| } |
| console.log('📝 Close reason:', reason || 'No reason provided'); |
| this.log( |
| `server.${ev.type}`, |
| `disconnected ${reason ? `with reason: ${reason}` : ``}`, |
| ); |
| this.emit("close", ev); |
| }); |
| resolve(true); |
| }); |
| }); |
| } |
|
|
| disconnect(ws?: WebSocket) { |
| console.log('🔌 Attempting to disconnect WebSocket'); |
| |
| |
| if ((!ws || this.ws === ws) && this.ws) { |
| console.log('🔒 Closing WebSocket connection'); |
| this.ws.close(); |
| this.ws = null; |
| this.log("client.close", `Disconnected`); |
| return true; |
| } |
| console.log('⚠️ No active WebSocket to disconnect'); |
| return false; |
| } |
|
|
| protected async receive(blob: Blob) { |
| const response: LiveIncomingMessage = (await blobToJSON( |
| blob, |
| )) as LiveIncomingMessage; |
| console.log('📥 Received message:', response); |
| if (isToolCallMessage(response)) { |
| console.log('🛠️ MultimodalLiveClient: Received tool call'); |
| this.log("server.toolCall", response); |
| this.emit("toolcall", response.toolCall); |
| return; |
| } |
| if (isToolCallCancellationMessage(response)) { |
| console.log('🚫 MultimodalLiveClient: Received tool call cancellation'); |
| this.log("receive.toolCallCancellation", response); |
| this.emit("toolcallcancellation", response.toolCallCancellation); |
| return; |
| } |
|
|
| if (isSetupCompleteMessage(response)) { |
| console.log('🎉 MultimodalLiveClient: Setup complete received'); |
| this.log("server.send", "setupComplete"); |
| this.emit("setupcomplete"); |
| return; |
| } |
|
|
| |
| |
| if (isServerContentMessage(response)) { |
| const { serverContent } = response; |
| if (isInterrupted(serverContent)) { |
| this.log("receive.serverContent", "interrupted"); |
| this.emit("interrupted"); |
| return; |
| } |
| if (isTurnComplete(serverContent)) { |
| this.log("server.send", "turnComplete"); |
| this.emit("turncomplete"); |
| |
| } |
|
|
| if (isModelTurn(serverContent)) { |
| let parts: Part[] = serverContent.modelTurn.parts; |
|
|
| |
| const audioParts = parts.filter( |
| (p) => p.inlineData && p.inlineData.mimeType.startsWith("audio/pcm"), |
| ); |
| const base64s = audioParts.map((p) => p.inlineData?.data); |
|
|
| |
| const otherParts = difference(parts, audioParts); |
| |
|
|
| base64s.forEach((b64) => { |
| if (b64) { |
| const data = base64ToArrayBuffer(b64); |
| this.emit("audio", data); |
| this.log(`server.audio`, `buffer (${data.byteLength})`); |
| } |
| }); |
| if (!otherParts.length) { |
| return; |
| } |
|
|
| parts = otherParts; |
|
|
| const content: ModelTurn = { modelTurn: { parts } }; |
| this.emit("content", content); |
| this.log(`server.content`, response); |
| } |
| } else { |
| console.log("received unmatched message", response); |
| } |
| } |
|
|
| |
| |
| |
| sendRealtimeInput(chunks: GenerativeContentBlob[]) { |
| let hasAudio = false; |
| let hasVideo = false; |
| for (let i = 0; i < chunks.length; i++) { |
| const ch = chunks[i]; |
| if (ch.mimeType.includes("audio")) { |
| hasAudio = true; |
| } |
| if (ch.mimeType.includes("image")) { |
| hasVideo = true; |
| } |
| if (hasAudio && hasVideo) { |
| break; |
| } |
| } |
| const message = |
| hasAudio && hasVideo |
| ? "audio + video" |
| : hasAudio |
| ? "audio" |
| : hasVideo |
| ? "video" |
| : "unknown"; |
|
|
| const data: RealtimeInputMessage = { |
| realtimeInput: { |
| mediaChunks: chunks, |
| }, |
| }; |
| this._sendDirect(data); |
| this.log(`client.realtimeInput`, message); |
| } |
|
|
| |
| |
| |
| sendToolResponse(toolResponse: ToolResponseMessage["toolResponse"]) { |
| const message: ToolResponseMessage = { |
| toolResponse, |
| }; |
|
|
| this._sendDirect(message); |
| this.log(`client.toolResponse`, message); |
| } |
|
|
| |
| |
| |
| send(parts: Part | Part[], turnComplete: boolean = true) { |
| parts = Array.isArray(parts) ? parts : [parts]; |
| const content: Content = { |
| role: "user", |
| parts, |
| }; |
|
|
| const clientContentRequest: ClientContentMessage = { |
| clientContent: { |
| turns: [content], |
| turnComplete, |
| }, |
| }; |
|
|
| this._sendDirect(clientContentRequest); |
| this.log(`client.send`, clientContentRequest); |
| } |
|
|
| |
| |
| |
| |
| _sendDirect(request: object) { |
| if (!this.ws) { |
| throw new Error("WebSocket is not connected"); |
| } |
| const str = JSON.stringify(request); |
| this.ws.send(str); |
| } |
| } |
|
|