MariaKaiser commited on
Commit
ea76117
·
verified ·
1 Parent(s): c11f46d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -28
app.py CHANGED
@@ -96,43 +96,25 @@ class StoryCreationDTO(BaseModel):
96
 
97
  #-----------------------------------------------------------
98
 
99
- def tts_arabic(text: str, audio_file: str) -> str:
100
- gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=[audio_file])
101
- out = model.inference(
102
- text=text,
103
- language="ar",
104
- gpt_cond_latent=gpt_cond_latent,
105
- speaker_embedding=speaker_embedding,
106
- temperature=model.config.temperature,
107
- top_k=model.config.top_k,
108
- length_penalty=model.config.length_penalty,
109
- repetition_penalty=model.config.repetition_penalty,
110
- top_p=model.config.top_p,
111
- )
112
- output_wav = os.path.join(OUTPUT_DIR, "output.wav")
113
- torchaudio.save(output_wav, torch.tensor(out["wav"]).unsqueeze(0), 24000)
114
- return output_wav
115
-
116
- app = FastAPI(title="EGTTS Arabic TTS API")
117
-
118
- @app.get("/")
119
- def root():
120
- return {"message": "Welcome! Visit /docs for Swagger UI."}
121
-
122
- #-----------------------------------------------------------
123
-
124
  #__________ func to get file from supabase__________________
125
 
126
  import httpx
 
127
 
128
- async def download_file_from_url(url: str) -> bytes:
 
 
 
129
  async with httpx.AsyncClient() as client:
130
  response = await client.get(url)
131
-
132
  if response.status_code != 200:
133
  raise RuntimeError(f"Failed to fetch file: {response.text}")
134
 
135
- return response.content
 
 
 
 
136
 
137
  #-----------------------------------------------------------
138
 
@@ -155,6 +137,75 @@ async def test_download(url: str = Query(...)):
155
  return {"error": str(e)}
156
  #_________________________________________
157
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  ########## creating a dummy audio file
159
  import torchaudio
160
  import torch
 
96
 
97
  #-----------------------------------------------------------
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  #__________ func to get file from supabase__________________
100
 
101
  import httpx
102
+ import tempfile
103
 
104
+ async def download_file_from_url(url: str) -> str:
105
+ """
106
+ Downloads a file from a URL and returns the path to a temporary file.
107
+ """
108
  async with httpx.AsyncClient() as client:
109
  response = await client.get(url)
 
110
  if response.status_code != 200:
111
  raise RuntimeError(f"Failed to fetch file: {response.text}")
112
 
113
+ # Save to a temporary file
114
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
115
+ temp_file.write(response.content)
116
+ temp_file.close()
117
+ return temp_file.name
118
 
119
  #-----------------------------------------------------------
120
 
 
137
  return {"error": str(e)}
138
  #_________________________________________
139
 
140
+
141
+ #takes the text to be said and path to the prosody audio and path to save the generated audio and returns path to the generated audio
142
+ # (save_path -> full path including the filename, not just a folder.)
143
+ def inference_by_model(text: str, audio_file: str, save_path: str) -> str:
144
+ gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=[audio_file])
145
+ out = model.inference(
146
+ text=text,
147
+ language="ar",
148
+ gpt_cond_latent=gpt_cond_latent,
149
+ speaker_embedding=speaker_embedding,
150
+ temperature=model.config.temperature,
151
+ top_k=model.config.top_k,
152
+ length_penalty=model.config.length_penalty,
153
+ repetition_penalty=model.config.repetition_penalty,
154
+ top_p=model.config.top_p,
155
+ )
156
+
157
+ os.makedirs(os.path.dirname(save_path), exist_ok=True)
158
+ torchaudio.save(save_path, torch.tensor(out["wav"]).unsqueeze(0), 24000)
159
+ return save_path
160
+
161
+ #_______________generate audios and folder structure_______________________
162
+
163
+ async def generate_story_audios(story: StoryCreationDTO, base_output: str = "stories"):
164
+ """
165
+ Generates audio files and folders for the entire story
166
+ """
167
+ story_dir = Path(base_output) / story.storyId
168
+ story_dir.mkdir(parents=True, exist_ok=True)
169
+
170
+ for chapter in story.chapters:
171
+ chapter_dir = story_dir / chapter.chapterId
172
+ chapter_dir.mkdir(exist_ok=True)
173
+
174
+ # --- Chapter title audio ---
175
+ prosody_file_title = await download_file_from_url(chapter.title.prosodyReference)
176
+ title_save_path = chapter_dir / "title.wav"
177
+ title_generated_audio_path = inference_by_model(
178
+ text=chapter.title.sentence,
179
+ audio_file=prosody_file_title,
180
+ save_path=title_save_path
181
+ )
182
+ os.remove(prosody_file_title)
183
+
184
+ for scene in chapter.scenes:
185
+ scene_dir = chapter_dir / scene.sceneId
186
+ scene_dir.mkdir(exist_ok=True)
187
+
188
+ # --- Sentences audio ---
189
+ for sentence in scene.sentences:
190
+ # Download the prosody reference audio from Supabase
191
+ prosody_file = await download_file_from_url(sentence.prosodyReference)
192
+ sentence_save_path = scene_dir / f"{sentence.sentenceId}.wav"
193
+ sentence_generated_audio_path = inference_by_model(
194
+ text=sentence.sentence,
195
+ audio_file=prosody_file,
196
+ save_path=sentence_save_path
197
+ )
198
+ os.remove(prosody_file)
199
+
200
+ app = FastAPI(title="EGTTS Arabic TTS API")
201
+
202
+ @app.get("/")
203
+ def root():
204
+ return {"message": "Welcome! Visit /docs for Swagger UI."}
205
+
206
+ #-----------------------------------------------------------
207
+
208
+
209
  ########## creating a dummy audio file
210
  import torchaudio
211
  import torch