Spaces:
Running
Running
| import type { MediaUnderstandingOutput } from "./types.js"; | |
| const MEDIA_PLACEHOLDER_RE = /^<media:[^>]+>(\s*\([^)]*\))?$/i; | |
| const MEDIA_PLACEHOLDER_TOKEN_RE = /^<media:[^>]+>(\s*\([^)]*\))?\s*/i; | |
| export function extractMediaUserText(body?: string): string | undefined { | |
| const trimmed = body?.trim() ?? ""; | |
| if (!trimmed) { | |
| return undefined; | |
| } | |
| if (MEDIA_PLACEHOLDER_RE.test(trimmed)) { | |
| return undefined; | |
| } | |
| const cleaned = trimmed.replace(MEDIA_PLACEHOLDER_TOKEN_RE, "").trim(); | |
| return cleaned || undefined; | |
| } | |
| function formatSection( | |
| title: string, | |
| kind: "Transcript" | "Description", | |
| text: string, | |
| userText?: string, | |
| ): string { | |
| const lines = [`[${title}]`]; | |
| if (userText) { | |
| lines.push(`User text:\n${userText}`); | |
| } | |
| lines.push(`${kind}:\n${text}`); | |
| return lines.join("\n"); | |
| } | |
| export function formatMediaUnderstandingBody(params: { | |
| body?: string; | |
| outputs: MediaUnderstandingOutput[]; | |
| }): string { | |
| const outputs = params.outputs.filter((output) => output.text.trim()); | |
| if (outputs.length === 0) { | |
| return params.body ?? ""; | |
| } | |
| const userText = extractMediaUserText(params.body); | |
| const sections: string[] = []; | |
| if (userText && outputs.length > 1) { | |
| sections.push(`User text:\n${userText}`); | |
| } | |
| const counts = new Map<MediaUnderstandingOutput["kind"], number>(); | |
| for (const output of outputs) { | |
| counts.set(output.kind, (counts.get(output.kind) ?? 0) + 1); | |
| } | |
| const seen = new Map<MediaUnderstandingOutput["kind"], number>(); | |
| for (const output of outputs) { | |
| const count = counts.get(output.kind) ?? 1; | |
| const next = (seen.get(output.kind) ?? 0) + 1; | |
| seen.set(output.kind, next); | |
| const suffix = count > 1 ? ` ${next}/${count}` : ""; | |
| if (output.kind === "audio.transcription") { | |
| sections.push( | |
| formatSection( | |
| `Audio${suffix}`, | |
| "Transcript", | |
| output.text, | |
| outputs.length === 1 ? userText : undefined, | |
| ), | |
| ); | |
| continue; | |
| } | |
| if (output.kind === "image.description") { | |
| sections.push( | |
| formatSection( | |
| `Image${suffix}`, | |
| "Description", | |
| output.text, | |
| outputs.length === 1 ? userText : undefined, | |
| ), | |
| ); | |
| continue; | |
| } | |
| sections.push( | |
| formatSection( | |
| `Video${suffix}`, | |
| "Description", | |
| output.text, | |
| outputs.length === 1 ? userText : undefined, | |
| ), | |
| ); | |
| } | |
| return sections.join("\n\n").trim(); | |
| } | |
| export function formatAudioTranscripts(outputs: MediaUnderstandingOutput[]): string { | |
| if (outputs.length === 1) { | |
| return outputs[0].text; | |
| } | |
| return outputs.map((output, index) => `Audio ${index + 1}:\n${output.text}`).join("\n\n"); | |
| } | |