Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -96,43 +96,25 @@ class StoryCreationDTO(BaseModel):
|
|
| 96 |
|
| 97 |
#-----------------------------------------------------------
|
| 98 |
|
| 99 |
-
def tts_arabic(text: str, audio_file: str) -> str:
|
| 100 |
-
gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=[audio_file])
|
| 101 |
-
out = model.inference(
|
| 102 |
-
text=text,
|
| 103 |
-
language="ar",
|
| 104 |
-
gpt_cond_latent=gpt_cond_latent,
|
| 105 |
-
speaker_embedding=speaker_embedding,
|
| 106 |
-
temperature=model.config.temperature,
|
| 107 |
-
top_k=model.config.top_k,
|
| 108 |
-
length_penalty=model.config.length_penalty,
|
| 109 |
-
repetition_penalty=model.config.repetition_penalty,
|
| 110 |
-
top_p=model.config.top_p,
|
| 111 |
-
)
|
| 112 |
-
output_wav = os.path.join(OUTPUT_DIR, "output.wav")
|
| 113 |
-
torchaudio.save(output_wav, torch.tensor(out["wav"]).unsqueeze(0), 24000)
|
| 114 |
-
return output_wav
|
| 115 |
-
|
| 116 |
-
app = FastAPI(title="EGTTS Arabic TTS API")
|
| 117 |
-
|
| 118 |
-
@app.get("/")
|
| 119 |
-
def root():
|
| 120 |
-
return {"message": "Welcome! Visit /docs for Swagger UI."}
|
| 121 |
-
|
| 122 |
-
#-----------------------------------------------------------
|
| 123 |
-
|
| 124 |
#__________ func to get file from supabase__________________
|
| 125 |
|
| 126 |
import httpx
|
|
|
|
| 127 |
|
| 128 |
-
async def download_file_from_url(url: str) ->
|
|
|
|
|
|
|
|
|
|
| 129 |
async with httpx.AsyncClient() as client:
|
| 130 |
response = await client.get(url)
|
| 131 |
-
|
| 132 |
if response.status_code != 200:
|
| 133 |
raise RuntimeError(f"Failed to fetch file: {response.text}")
|
| 134 |
|
| 135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
#-----------------------------------------------------------
|
| 138 |
|
|
@@ -155,6 +137,75 @@ async def test_download(url: str = Query(...)):
|
|
| 155 |
return {"error": str(e)}
|
| 156 |
#_________________________________________
|
| 157 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
########## creating a dummy audio file
|
| 159 |
import torchaudio
|
| 160 |
import torch
|
|
|
|
| 96 |
|
| 97 |
#-----------------------------------------------------------
|
| 98 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
#__________ func to get file from supabase__________________
|
| 100 |
|
| 101 |
import httpx
|
| 102 |
+
import tempfile
|
| 103 |
|
| 104 |
+
async def download_file_from_url(url: str) -> str:
|
| 105 |
+
"""
|
| 106 |
+
Downloads a file from a URL and returns the path to a temporary file.
|
| 107 |
+
"""
|
| 108 |
async with httpx.AsyncClient() as client:
|
| 109 |
response = await client.get(url)
|
|
|
|
| 110 |
if response.status_code != 200:
|
| 111 |
raise RuntimeError(f"Failed to fetch file: {response.text}")
|
| 112 |
|
| 113 |
+
# Save to a temporary file
|
| 114 |
+
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
|
| 115 |
+
temp_file.write(response.content)
|
| 116 |
+
temp_file.close()
|
| 117 |
+
return temp_file.name
|
| 118 |
|
| 119 |
#-----------------------------------------------------------
|
| 120 |
|
|
|
|
| 137 |
return {"error": str(e)}
|
| 138 |
#_________________________________________
|
| 139 |
|
| 140 |
+
|
| 141 |
+
#takes the text to be said and path to the prosody audio and path to save the generated audio and returns path to the generated audio
|
| 142 |
+
# (save_path -> full path including the filename, not just a folder.)
|
| 143 |
+
def inference_by_model(text: str, audio_file: str, save_path: str) -> str:
|
| 144 |
+
gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=[audio_file])
|
| 145 |
+
out = model.inference(
|
| 146 |
+
text=text,
|
| 147 |
+
language="ar",
|
| 148 |
+
gpt_cond_latent=gpt_cond_latent,
|
| 149 |
+
speaker_embedding=speaker_embedding,
|
| 150 |
+
temperature=model.config.temperature,
|
| 151 |
+
top_k=model.config.top_k,
|
| 152 |
+
length_penalty=model.config.length_penalty,
|
| 153 |
+
repetition_penalty=model.config.repetition_penalty,
|
| 154 |
+
top_p=model.config.top_p,
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
os.makedirs(os.path.dirname(save_path), exist_ok=True)
|
| 158 |
+
torchaudio.save(save_path, torch.tensor(out["wav"]).unsqueeze(0), 24000)
|
| 159 |
+
return save_path
|
| 160 |
+
|
| 161 |
+
#_______________generate audios and folder structure_______________________
|
| 162 |
+
|
| 163 |
+
async def generate_story_audios(story: StoryCreationDTO, base_output: str = "stories"):
|
| 164 |
+
"""
|
| 165 |
+
Generates audio files and folders for the entire story
|
| 166 |
+
"""
|
| 167 |
+
story_dir = Path(base_output) / story.storyId
|
| 168 |
+
story_dir.mkdir(parents=True, exist_ok=True)
|
| 169 |
+
|
| 170 |
+
for chapter in story.chapters:
|
| 171 |
+
chapter_dir = story_dir / chapter.chapterId
|
| 172 |
+
chapter_dir.mkdir(exist_ok=True)
|
| 173 |
+
|
| 174 |
+
# --- Chapter title audio ---
|
| 175 |
+
prosody_file_title = await download_file_from_url(chapter.title.prosodyReference)
|
| 176 |
+
title_save_path = chapter_dir / "title.wav"
|
| 177 |
+
title_generated_audio_path = inference_by_model(
|
| 178 |
+
text=chapter.title.sentence,
|
| 179 |
+
audio_file=prosody_file_title,
|
| 180 |
+
save_path=title_save_path
|
| 181 |
+
)
|
| 182 |
+
os.remove(prosody_file_title)
|
| 183 |
+
|
| 184 |
+
for scene in chapter.scenes:
|
| 185 |
+
scene_dir = chapter_dir / scene.sceneId
|
| 186 |
+
scene_dir.mkdir(exist_ok=True)
|
| 187 |
+
|
| 188 |
+
# --- Sentences audio ---
|
| 189 |
+
for sentence in scene.sentences:
|
| 190 |
+
# Download the prosody reference audio from Supabase
|
| 191 |
+
prosody_file = await download_file_from_url(sentence.prosodyReference)
|
| 192 |
+
sentence_save_path = scene_dir / f"{sentence.sentenceId}.wav"
|
| 193 |
+
sentence_generated_audio_path = inference_by_model(
|
| 194 |
+
text=sentence.sentence,
|
| 195 |
+
audio_file=prosody_file,
|
| 196 |
+
save_path=sentence_save_path
|
| 197 |
+
)
|
| 198 |
+
os.remove(prosody_file)
|
| 199 |
+
|
| 200 |
+
app = FastAPI(title="EGTTS Arabic TTS API")
|
| 201 |
+
|
| 202 |
+
@app.get("/")
|
| 203 |
+
def root():
|
| 204 |
+
return {"message": "Welcome! Visit /docs for Swagger UI."}
|
| 205 |
+
|
| 206 |
+
#-----------------------------------------------------------
|
| 207 |
+
|
| 208 |
+
|
| 209 |
########## creating a dummy audio file
|
| 210 |
import torchaudio
|
| 211 |
import torch
|