ai-lab-tube

Sleeping

App Files Files Community

Julian Bilcke commited on Apr 29, 2024

Commit

67f97d0

1 Parent(s): 81d1fba

fix the mp3 generator

Browse files

Files changed (3) hide show

src/app/api/generators/speech/generateVoiceWithParlerTTS.ts +7 -2
src/app/api/generators/speech/generateVoiceWithXTTS2.txt +27 -24
src/app/api/v1/edit/dialogues/route.ts +1 -0

src/app/api/generators/speech/generateVoiceWithParlerTTS.ts CHANGED Viewed

@@ -5,6 +5,12 @@ const gradioSpaceApiUrl = `https://jbilcke-hf-ai-tube-model-parler-tts-mini.hf.s
 const huggingFaceSpace = "jbilcke-hf/ai-tube-model-parler-tts-mini"
 const apiKey = `${process.env.MICROSERVICE_API_SECRET_TOKEN || ""}`
 export async function generateSpeechWithParlerTTS({
   text,
   audioId,
@@ -17,7 +23,6 @@ export async function generateSpeechWithParlerTTS({
   neverThrow?: boolean
 }): Promise<string> {
   const actualFunction = async () => {
     const res = await fetch(gradioSpaceApiUrl + (gradioSpaceApiUrl.endsWith("/") ? "" : "/") + "api/predict", {
@@ -58,7 +63,7 @@ export async function generateSpeechWithParlerTTS({
       throw new Error(`the returned audio was empty`)
     }
-    return addBase64Header(data[0] as string, "wav")
   }
   try {

 const huggingFaceSpace = "jbilcke-hf/ai-tube-model-parler-tts-mini"
 const apiKey = `${process.env.MICROSERVICE_API_SECRET_TOKEN || ""}`
+/**
+ * Note: this generates a MP3 file
+ *
+ * @param param0
+ * @returns
+ */
 export async function generateSpeechWithParlerTTS({
   text,
   audioId,
   neverThrow?: boolean
 }): Promise<string> {
   const actualFunction = async () => {
     const res = await fetch(gradioSpaceApiUrl + (gradioSpaceApiUrl.endsWith("/") ? "" : "/") + "api/predict", {
       throw new Error(`the returned audio was empty`)
     }
+    return addBase64Header(data[0] as string, "mp3")
   }
   try {

src/app/api/generators/speech/generateVoiceWithXTTS2.txt CHANGED Viewed

@@ -1,35 +1,37 @@
-import { StoryLine } from "../../types/structures.mts"
-import { tryApiCalls } from "../../utils/tryApiCalls.mts"
-import { promptToGenerateAudioStory } from "../prompts/prompts.mts"
-import { microserviceApiKey } from "../../config.mts"
-import { addBase64Header } from "../../base64/addBase64.mts"
 // TODO delete this? we don't need an env var for this I think?
-const aiStoryServerApiUrl = `https://jbilcke-hf-ai-story-server.hf.space`
 const huggingFaceSpace = "jbilcke-hf/ai-story-server"
-export async function generateAudioStory({
-  prompt,
-  voice,
-  // maxLines,
-  neverThrow,
-  debug,
 }: {
-  prompt: string
-  voice?: string
-  // maxLines: number
-  neverThrow?: boolean
   debug?: boolean
-}): Promise<StoryLine[]> {
   const actualFunction = async () => {
-    const cropped = prompt.slice(0, 30)
     // console.log(`user requested "${cropped}${cropped !== prompt ? "..." : ""}"`)
     // positivePrompt = filterOutBadWords(positivePrompt)
-    const res = await fetch(aiStoryServerApiUrl + (aiStoryServerApiUrl.endsWith("/") ? "" : "/") + "api/predict", {
       method: "POST",
       headers: {
         "Content-Type": "application/json",
@@ -38,12 +40,13 @@ export async function generateAudioStory({
       body: JSON.stringify({
         fn_index: 0, // <- important!
         data: [
-          microserviceApiKey,
           promptToGenerateAudioStory,
           prompt,
           // TODO: add support for custom wav
-          voice === "Julian" ? "Julian" : "Cloée",
           // maxLines,
         ],
@@ -65,7 +68,7 @@ export async function generateAudioStory({
     return stories.map(line => ({
       text: line.text.replaceAll(" .", ".").replaceAll(" ?", "?").replaceAll(" !", "!").trim(),
-      audio: addBase64Header(line.audio, "mp4")
     }))
   }
@@ -80,11 +83,11 @@ export async function generateAudioStory({
       debug,
       failureMessage: "failed to generate the audio story"
     })
-    return result
   } catch (err) {
     if (neverThrow) {
       console.error(`generateAudioStory():`, err)
-      return []
     } else {
       throw err
     }

+import { addBase64Header } from "@/lib/data/addBase64Header"
+import { tryApiCalls } from "../../utils/tryApiCall"
+export type StoryLine = {
+  text: string
+  audio: string // in base64
+}
 // TODO delete this? we don't need an env var for this I think?
+const gradioSpaceApiUrl = `https://jbilcke-hf-ai-story-server.hf.space`
 const huggingFaceSpace = "jbilcke-hf/ai-story-server"
+const apiKey = `${process.env.MICROSERVICE_API_SECRET_TOKEN || ""}`
+export async function generateSpeechWithParlerTTS({
+  text,
+  audioId,
+  debug = false,
+  neverThrow = false,
 }: {
+  text: string
+  audioId: string
   debug?: boolean
+  neverThrow?: boolean
+}): Promise<string> {
   const actualFunction = async () => {
+    const prompt = text.slice(0, 30)
     // console.log(`user requested "${cropped}${cropped !== prompt ? "..." : ""}"`)
     // positivePrompt = filterOutBadWords(positivePrompt)
+    const promptToGenerateAudioStory = ``
+    const res = await fetch(gradioSpaceApiUrl + (gradioSpaceApiUrl.endsWith("/") ? "" : "/") + "api/predict", {
       method: "POST",
       headers: {
         "Content-Type": "application/json",
       body: JSON.stringify({
         fn_index: 0, // <- important!
         data: [
+          apiKey,
           promptToGenerateAudioStory,
           prompt,
           // TODO: add support for custom wav
+          // voice === "Julian" ? "Julian" : "Cloée",
+          "Julian",
           // maxLines,
         ],
     return stories.map(line => ({
       text: line.text.replaceAll(" .", ".").replaceAll(" ?", "?").replaceAll(" !", "!").trim(),
+      audio: addBase64Header(line.audio, "mp3")
     }))
   }
       debug,
       failureMessage: "failed to generate the audio story"
     })
+    return BROKEN
   } catch (err) {
     if (neverThrow) {
       console.error(`generateAudioStory():`, err)
+      return ""
     } else {
       throw err
     }

src/app/api/v1/edit/dialogues/route.ts CHANGED Viewed

@@ -46,6 +46,7 @@ export async function POST(req: NextRequest) {
       // console.log(`[api/generate/dialogues] generating audio..`)
       try {
         shotDialogueSegment.assetUrl = await generateSpeechWithParlerTTS({
           text: shotDialogueSegment.prompt,
           audioId: getSpeechBackgroundAudioPrompt(shotSegments, clap.entityIndex, ["high quality", "crisp", "detailed"]),

       // console.log(`[api/generate/dialogues] generating audio..`)
       try {
+        // this generates a mp3
         shotDialogueSegment.assetUrl = await generateSpeechWithParlerTTS({
           text: shotDialogueSegment.prompt,
           audioId: getSpeechBackgroundAudioPrompt(shotSegments, clap.entityIndex, ["high quality", "crisp", "detailed"]),