ai-lab-tube

Sleeping

ai-lab-tube / src /app /api /v1 /edit /dialogues /processShot.ts

Julian Bilcke

improve prompts

58b1ffb almost 2 years ago

2.95 kB


	import {
	ClapProject,
	ClapSegment,
	getClapAssetSourceType,
	filterSegments,
	ClapSegmentFilteringMode,
	ClapSegmentCategory
	} from "@aitube/clap"
	import { ClapCompletionMode } from "@aitube/client"
	import { getSpeechBackgroundAudioPrompt } from "@aitube/engine"

	import { generateSpeechWithParlerTTS } from "@/app/api/generators/speech/generateVoiceWithParlerTTS"
	import { getMediaInfo } from "@/app/api/utils/getMediaInfo"

	export async function processShot({
	shotSegment,
	existingClap,
	newerClap,
	mode,
	turbo,
	}: {
	shotSegment: ClapSegment
	existingClap: ClapProject
	newerClap: ClapProject
	mode: ClapCompletionMode
	turbo: boolean
	}): Promise<void> {

	const shotSegments: ClapSegment[] = filterSegments(
	ClapSegmentFilteringMode.BOTH,
	shotSegment,
	existingClap.segments
	)

	const shotDialogueSegments: ClapSegment[] = shotSegments.filter(s =>
	s.category === ClapSegmentCategory.DIALOGUE
	)

	let shotDialogueSegment: ClapSegment \| undefined = shotDialogueSegments.at(0)

	console.log(`[api/edit/dialogues] processShot: shot [${shotSegment.startTimeInMs}:${shotSegment.endTimeInMs}] has ${shotSegments.length} segments (${shotDialogueSegments.length} dialogues)`)

	if (shotDialogueSegment && !shotDialogueSegment.assetUrl) {
	// console.log(`[api/edit/dialogues] generating audio..`)

	try {
	// this generates a mp3
	shotDialogueSegment.assetUrl = await generateSpeechWithParlerTTS({
	text: shotDialogueSegment.prompt,
	audioId: getSpeechBackgroundAudioPrompt(
	shotSegments,
	existingClap.entityIndex,
	// TODO: use the entity description if it exists
	["high quality", "crisp", "detailed"]
	),
	debug: true,
	})
	shotDialogueSegment.assetSourceType = getClapAssetSourceType(shotDialogueSegment.assetUrl)

	const { durationInMs, durationInSec, hasAudio } = await getMediaInfo(shotDialogueSegment.assetUrl)

	if (hasAudio && durationInMs > 1000) {
	shotDialogueSegment.assetDurationInMs = durationInMs
	shotSegment.assetDurationInMs = durationInMs

	// we update the duration of all the segments for this shot
	// (it is possible that this makes the two previous lines redundant)
	existingClap.segments.forEach(s => {
	s.assetDurationInMs = durationInMs
	})
	}

	} catch (err) {
	console.log(`[api/edit/dialogues] processShot: failed to generate audio: ${err}`)
	throw err
	}

	console.log(`[api/edit/dialogues] processShot: generated dialogue audio: ${shotDialogueSegment?.assetUrl?.slice?.(0, 50)}...`)

	// if it's partial, we need to manually add it
	if (mode !== ClapCompletionMode.FULL) {
	newerClap.segments.push(shotDialogueSegment)
	}
	} else {
	console.log(`[api/edit/dialogues] processShot: there is already a dialogue audio: ${shotDialogueSegment?.assetUrl?.slice?.(0, 50)}...`)
	}
	}