Spaces:

ttsEmo
/

TTS_API

Sleeping

App Files Files Community

MariaKaiser commited on Mar 19

Commit

ea76117

verified ·

1 Parent(s): c11f46d

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -28

app.py CHANGED Viewed

@@ -96,43 +96,25 @@ class StoryCreationDTO(BaseModel):
 #-----------------------------------------------------------
-def tts_arabic(text: str, audio_file: str) -> str:
-    gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=[audio_file])
-    out = model.inference(
-        text=text,
-        language="ar",
-        gpt_cond_latent=gpt_cond_latent,
-        speaker_embedding=speaker_embedding,
-        temperature=model.config.temperature,
-        top_k=model.config.top_k,
-        length_penalty=model.config.length_penalty,
-        repetition_penalty=model.config.repetition_penalty,
-        top_p=model.config.top_p,
-    )
-    output_wav = os.path.join(OUTPUT_DIR, "output.wav")
-    torchaudio.save(output_wav, torch.tensor(out["wav"]).unsqueeze(0), 24000)
-    return output_wav
-app = FastAPI(title="EGTTS Arabic TTS API")
-@app.get("/")
-def root():
-    return {"message": "Welcome! Visit /docs for Swagger UI."}
-#-----------------------------------------------------------
 #__________ func to get file from supabase__________________
 import httpx
-async def download_file_from_url(url: str) -> bytes:
     async with httpx.AsyncClient() as client:
         response = await client.get(url)
         if response.status_code != 200:
             raise RuntimeError(f"Failed to fetch file: {response.text}")
-        return response.content
 #-----------------------------------------------------------
@@ -155,6 +137,75 @@ async def test_download(url: str = Query(...)):
         return {"error": str(e)}
 #_________________________________________
 ########## creating a dummy audio file
 import torchaudio
 import torch

 #-----------------------------------------------------------
 #__________ func to get file from supabase__________________
 import httpx
+import tempfile
+async def download_file_from_url(url: str) -> str:
+    """
+    Downloads a file from a URL and returns the path to a temporary file.
+    """
     async with httpx.AsyncClient() as client:
         response = await client.get(url)
         if response.status_code != 200:
             raise RuntimeError(f"Failed to fetch file: {response.text}")
+    # Save to a temporary file
+    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
+    temp_file.write(response.content)
+    temp_file.close()
+    return temp_file.name
 #-----------------------------------------------------------
         return {"error": str(e)}
 #_________________________________________
+#takes the text to be said and path to the prosody audio and path to save the generated audio and returns path to the generated audio
+# (save_path -> full path including the filename, not just a folder.)
+def inference_by_model(text: str, audio_file: str, save_path: str) -> str:
+    gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=[audio_file])
+    out = model.inference(
+        text=text,
+        language="ar",
+        gpt_cond_latent=gpt_cond_latent,
+        speaker_embedding=speaker_embedding,
+        temperature=model.config.temperature,
+        top_k=model.config.top_k,
+        length_penalty=model.config.length_penalty,
+        repetition_penalty=model.config.repetition_penalty,
+        top_p=model.config.top_p,
+    )
+    os.makedirs(os.path.dirname(save_path), exist_ok=True)
+    torchaudio.save(save_path, torch.tensor(out["wav"]).unsqueeze(0), 24000)
+    return save_path
+#_______________generate audios and folder structure_______________________
+async def generate_story_audios(story: StoryCreationDTO, base_output: str = "stories"):
+    """
+    Generates audio files and folders for the entire story
+    """
+    story_dir = Path(base_output) / story.storyId
+    story_dir.mkdir(parents=True, exist_ok=True)
+    for chapter in story.chapters:
+        chapter_dir = story_dir / chapter.chapterId
+        chapter_dir.mkdir(exist_ok=True)
+        # --- Chapter title audio ---
+        prosody_file_title = await download_file_from_url(chapter.title.prosodyReference)
+        title_save_path = chapter_dir / "title.wav"
+        title_generated_audio_path = inference_by_model(
+            text=chapter.title.sentence,
+            audio_file=prosody_file_title,
+            save_path=title_save_path
+        )
+        os.remove(prosody_file_title)
+        for scene in chapter.scenes:
+            scene_dir = chapter_dir / scene.sceneId
+            scene_dir.mkdir(exist_ok=True)
+            # --- Sentences audio ---
+            for sentence in scene.sentences:
+                # Download the prosody reference audio from Supabase
+                prosody_file = await download_file_from_url(sentence.prosodyReference)
+                sentence_save_path = scene_dir / f"{sentence.sentenceId}.wav"
+                sentence_generated_audio_path = inference_by_model(
+                    text=sentence.sentence,
+                    audio_file=prosody_file,
+                    save_path=sentence_save_path
+                )
+                os.remove(prosody_file)
+app = FastAPI(title="EGTTS Arabic TTS API")
+@app.get("/")
+def root():
+    return {"message": "Welcome! Visit /docs for Swagger UI."}
+#-----------------------------------------------------------
 ########## creating a dummy audio file
 import torchaudio
 import torch