Spaces:

coyotte508
/

chat-ui

Running

chat-ui / src /lib /server /endpoints /openai /openAIChatToTextGenerationStream.ts

coyotte508

A new start

fc69895 about 1 month ago

6.09 kB

	import type { TextGenerationStreamOutput } from "@huggingface/inference";
	import type OpenAI from "openai";
	import type { Stream } from "openai/streaming";

	/**
	* Transform a stream of OpenAI.Chat.ChatCompletion into a stream of TextGenerationStreamOutput
	*/
	export async function* openAIChatToTextGenerationStream(
	completionStream: Stream<OpenAI.Chat.Completions.ChatCompletionChunk>,
	getRouterMetadata?: () => { route?: string; model?: string; provider?: string }
	) {
	let generatedText = "";
	let tokenId = 0;
	let toolBuffer = ""; // legacy hack kept harmless
	let metadataYielded = false;
	let thinkOpen = false;

	for await (const completion of completionStream) {
	const retyped = completion as {
	"x-router-metadata"?: { route: string; model: string; provider?: string };
	};
	// Check if this chunk contains router metadata (first chunk from llm-router)
	if (!metadataYielded && retyped["x-router-metadata"]) {
	const metadata = retyped["x-router-metadata"];
	yield {
	token: {
	id: tokenId++,
	text: "",
	logprob: 0,
	special: true,
	},
	generated_text: null,
	details: null,
	routerMetadata: {
	route: metadata.route,
	model: metadata.model,
	provider: metadata.provider,
	},
	} as TextGenerationStreamOutput & {
	routerMetadata: { route: string; model: string; provider?: string };
	};
	metadataYielded = true;
	// Skip processing this chunk as content since it's just metadata
	if (
	!completion.choices \|\|
	completion.choices.length === 0 \|\|
	!completion.choices[0].delta?.content
	) {
	continue;
	}
	}
	const { choices } = completion;
	const delta: OpenAI.Chat.Completions.ChatCompletionChunk.Choice.Delta & {
	reasoning?: string;
	reasoning_content?: string;
	} = choices?.[0]?.delta ?? {};
	const content: string = delta.content ?? "";
	const reasoning: string =
	typeof delta?.reasoning === "string"
	? (delta.reasoning as string)
	: typeof delta?.reasoning_content === "string"
	? (delta.reasoning_content as string)
	: "";
	const last = choices?.[0]?.finish_reason === "stop" \|\| choices?.[0]?.finish_reason === "length";

	// if the last token is a stop and the tool buffer is not empty, yield it as a generated_text
	if (choices?.[0]?.finish_reason === "stop" && toolBuffer.length > 0) {
	yield {
	token: {
	id: tokenId++,
	special: true,
	logprob: 0,
	text: "",
	},
	generated_text: toolBuffer,
	details: null,
	} as TextGenerationStreamOutput;
	break;
	}

	// weird bug where the parameters are streamed in like this
	if (choices?.[0]?.delta?.tool_calls) {
	const calls = Array.isArray(choices[0].delta.tool_calls)
	? choices[0].delta.tool_calls
	: [choices[0].delta.tool_calls];

	if (
	calls.length === 1 &&
	calls[0].index === 0 &&
	calls[0].id === "" &&
	calls[0].type === "function" &&
	!!calls[0].function &&
	calls[0].function.name === null
	) {
	toolBuffer += calls[0].function.arguments;
	continue;
	}
	}

	let combined = "";
	if (reasoning && reasoning.length > 0) {
	if (!thinkOpen) {
	combined += "<think>" + reasoning;
	thinkOpen = true;
	} else {
	combined += reasoning;
	}
	}

	if (content && content.length > 0) {
	const trimmed = content.trim();
	// Allow <think> tags in content to pass through (for models like DeepSeek R1)
	if (thinkOpen && trimmed === "</think>") {
	// close once without duplicating the tag
	combined += "</think>";
	thinkOpen = false;
	} else if (thinkOpen) {
	combined += "</think>" + content;
	thinkOpen = false;
	} else {
	combined += content;
	}
	}

	// Accumulate the combined token into the full text
	generatedText += combined;
	const output: TextGenerationStreamOutput = {
	token: {
	id: tokenId++,
	text: combined,
	logprob: 0,
	special: last,
	},
	generated_text: last ? generatedText : null,
	details: null,
	};
	yield output;

	// Tools removed: ignore tool_calls deltas
	}

	// If metadata wasn't yielded from chunks (e.g., from headers), yield it at the end
	if (!metadataYielded && getRouterMetadata) {
	const routerMetadata = getRouterMetadata();
	// Yield if we have either complete router metadata OR just provider info
	if (
	(routerMetadata && routerMetadata.route && routerMetadata.model) \|\|
	routerMetadata?.provider
	) {
	yield {
	token: {
	id: tokenId++,
	text: "",
	logprob: 0,
	special: true,
	},
	generated_text: null,
	details: null,
	routerMetadata,
	} as TextGenerationStreamOutput & {
	routerMetadata: { route?: string; model?: string; provider?: string };
	};
	}
	}
	}

	/**
	* Transform a non-streaming OpenAI chat completion into a stream of TextGenerationStreamOutput
	*/
	export async function* openAIChatToTextGenerationSingle(
	completion: OpenAI.Chat.Completions.ChatCompletion,
	getRouterMetadata?: () => { route?: string; model?: string; provider?: string }
	) {
	const message: NonNullable<OpenAI.Chat.Completions.ChatCompletion.Choice>["message"] & {
	reasoning?: string;
	reasoning_content?: string;
	} = completion.choices?.[0]?.message ?? {};
	let content: string = message?.content \|\| "";
	// Provider-dependent reasoning shapes (non-streaming)
	const r: string =
	typeof message?.reasoning === "string"
	? (message.reasoning as string)
	: typeof message?.reasoning_content === "string"
	? (message.reasoning_content as string)
	: "";
	if (r && r.length > 0) {
	content = `<think>${r}</think>` + content;
	}
	const tokenId = 0;

	// Yield the content as a single token
	yield {
	token: {
	id: tokenId,
	text: content,
	logprob: 0,
	special: false,
	},
	generated_text: content,
	details: null,
	...(getRouterMetadata
	? (() => {
	const metadata = getRouterMetadata();
	return (metadata && metadata.route && metadata.model) \|\| metadata?.provider
	? { routerMetadata: metadata }
	: {};
	})()
	: {}),
	} as TextGenerationStreamOutput & {
	routerMetadata?: { route?: string; model?: string; provider?: string };
	};
	}