ai-lab-tube

Sleeping

ai-lab-tube / src /app /api /v1 /edit /entities /index.ts

Julian Bilcke

improve prompts

58b1ffb almost 2 years ago

5.98 kB


	import { ClapProject, ClapSegmentCategory, getClapAssetSourceType, getValidNumber, newEntity } from "@aitube/clap"
	import { ClapCompletionMode, ClapEntityPrompt } from "@aitube/client"

	import { generateImageID } from "./generateImageID"
	import { generateAudioID } from "./generateAudioID"
	import { generateEntityPrompts } from "./generateEntityPrompts"
	import { clapToLatentStory } from "./clapToLatentStory"

	export async function editEntities({
	existingClap,
	newerClap,
	entityPrompts = [],
	mode = ClapCompletionMode.PARTIAL,
	turbo = false,
	}: {
	existingClap: ClapProject
	newerClap: ClapProject
	entityPrompts?: ClapEntityPrompt[]
	mode?: ClapCompletionMode
	turbo?: boolean
	}) {

	// note that we can only handle either FULL or PARTIAL
	// other modes such as MERGE, REPLACE.. are irrelevant since those are client-side modes
	// so from a server point of view those correspond to PARTIAL
	//
	// it is also worth noting that the use of FULL should be discouraged
	const isFull = mode === ClapCompletionMode.FULL
	const isPartial = !isFull

	// if we don't have existing entities, and user passed none,
	// then we need to hallucinate them
	if (existingClap.entities.length === 0 && entityPrompts.length === 0) {

	const entityPromptsWithShots = await generateEntityPrompts({
	prompt: existingClap.meta.description,
	latentStory: await clapToLatentStory(existingClap),
	turbo,
	})

	const allShots = existingClap.segments.filter(s => s.category === ClapSegmentCategory.CAMERA)

	for (const {
	entityPrompt: { name, category, age, variant, region, identityImage, identityVoice },
	shots: entityShots
	} of entityPromptsWithShots) {
	const newEnt = newEntity({
	category,
	triggerName: name,
	label: name,
	description: name,
	author: "auto",
	thumbnailUrl: "",

	imagePrompt: "",
	imageSourceType: getClapAssetSourceType(identityImage),
	imageEngine: "SD Lightning",
	imageId: identityImage,
	audioPrompt: "",
	audioSourceType: getClapAssetSourceType(identityVoice),
	audioEngine: "Parler-TTS", // <- TODO: use OpenVoice 2, that way it can be personalized
	audioId: identityVoice,

	// note: using a numeric age should be deprecated,
	// instead we should be able to specify things using text,
	// eg. "8 months", "25 years old", "12th century"
	age: getValidNumber(age, 0, 120, 25),

	// TODO: delete gender and appearance, replace by a single concept of "variant"
	gender: "",
	appearance: variant,
	region: region,
	})

	existingClap.entities.push(newEnt)

	// now let's assign our entity to shots!
	//
	// warning: the shot assignment is the responsibility of the LLM.
	// if the LLM hallucinates non-existing shot ids, it will cause trouble!
	for (const shotId of entityShots) {
	if (allShots[shotId]) {
	allShots[shotId].entityId = newEnt.id
	} else {
	console.log(`[api/v1/edit/entities] warning: the LLM generated a non-existing shot (shot "${shotId}", but we only have ${allShots.length} shots)`)
	}
	}
	}
	}

	// otherwise try to add what's new
	for (const { name, category, age, variant, region, identityImage, identityVoice } of entityPrompts) {
	const newEnt = newEntity({
	category,
	triggerName: name,
	label: name,
	description: name,
	author: "auto",
	thumbnailUrl: "",

	imagePrompt: "",
	imageSourceType: getClapAssetSourceType(identityImage),
	imageEngine: "SD Lightning",
	imageId: identityImage,
	audioPrompt: "",
	audioSourceType: getClapAssetSourceType(identityVoice),
	audioEngine: "Parler-TTS", // <- TODO: use OpenVoice 2, that way it can be personalized
	audioId: identityVoice,

	// note: using a numeric age should be deprecated,
	// instead we should be able to specify things using text,
	// eg. "8 months", "25 years old", "12th century"
	age: getValidNumber(age, 0, 120, 25),

	// TODO: delete gender and appearance, replace by a single concept of "variant"
	gender: "",
	appearance: variant,
	region: region,
	})

	existingClap.entities.push(newEnt)
	}

	if (!existingClap.entities.length) { throw new Error(`please provide at least one entity`) }

	// then we try to automatically repair, edit, complete.. all the existing entities

	for (const entity of existingClap.entities) {

	let entityHasBeenModified = false

	// TASK 1: GENERATE THE IMAGE PROMPT IF MISSING
	if (!entity.imagePrompt) {
	entity.imagePrompt = "a man with a beard"
	entityHasBeenModified = true
	}

	// TASK 2: GENERATE THE IMAGE ID IF MISSING
	if (!entity.imageId) {
	entity.imageId = await generateImageID({
	prompt: entity.imagePrompt,
	seed: entity.seed,
	turbo,
	})
	entity.imageSourceType = getClapAssetSourceType(entity.imageId)
	entityHasBeenModified = true
	}

	// TASK 3: GENERATE THE AUDIO PROMPT IF MISSING
	if (!entity.audioPrompt) {
	entity.audioPrompt = "a man with a beard"
	entityHasBeenModified = true
	}

	// TASK 4: GENERATE THE AUDIO ID IF MISSING

	// TODO here: call Parler-TTS or a generic audio generator
	if (!entity.audioId) {
	entity.audioId = await generateAudioID({
	prompt: entity.audioPrompt,
	seed: entity.seed
	})
	entity.audioSourceType = getClapAssetSourceType(entity.audioId)
	entityHasBeenModified = true
	}

	// in case we are doing a partial update
	if (mode !== ClapCompletionMode.FULL && entityHasBeenModified && !newerClap.entityIndex[entity.id]) {
	newerClap.entities.push(entity)
	newerClap.entityIndex[entity.id] = entity
	}
	}

	// console.log(`api/edit/entities(): returning the newerClap`)

	return newerClap
	}