Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,6 +5,7 @@ import torchaudio
|
|
| 5 |
import os
|
| 6 |
from pydantic import BaseModel
|
| 7 |
from typing import List, Optional
|
|
|
|
| 8 |
|
| 9 |
|
| 10 |
# MODEL_DIR = "my_model"
|
|
@@ -140,7 +141,7 @@ def inference_by_model(text: str, audio_file: str, save_path: str) -> str:
|
|
| 140 |
|
| 141 |
#_______________generate audios and folder structure_______________________
|
| 142 |
|
| 143 |
-
async def generate_story_audios(story: StoryCreationDTO, base_output: str
|
| 144 |
"""
|
| 145 |
Generates audio files and folders for the entire story
|
| 146 |
"""
|
|
@@ -182,7 +183,7 @@ async def generate_story_audios(story: StoryCreationDTO, base_output: str = "sto
|
|
| 182 |
from pydub import AudioSegment
|
| 183 |
import asyncio
|
| 184 |
|
| 185 |
-
async def concat_story_audio(story: StoryCreationDTO, base_output
|
| 186 |
story_dir = Path(base_output) / story.storyId
|
| 187 |
story_dir.mkdir(parents=True, exist_ok=True)
|
| 188 |
|
|
@@ -275,28 +276,10 @@ def root():
|
|
| 275 |
|
| 276 |
#-----------------------------------------------------------
|
| 277 |
|
| 278 |
-
|
| 279 |
-
########## creating a dummy audio file
|
| 280 |
-
import torchaudio
|
| 281 |
-
import torch
|
| 282 |
-
import os
|
| 283 |
-
|
| 284 |
-
OUTPUT_DIR = "outputs"
|
| 285 |
-
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
| 286 |
-
|
| 287 |
-
dummy_path = os.path.join(OUTPUT_DIR, "dummy.wav")
|
| 288 |
-
|
| 289 |
-
# Generate 1 second of silence at 24kHz
|
| 290 |
-
if not os.path.exists(dummy_path):
|
| 291 |
-
silent = torch.zeros(1, 24000) # 1 channel, 24000 samples
|
| 292 |
-
torchaudio.save(dummy_path, silent, 24000)
|
| 293 |
-
|
| 294 |
-
from pydantic import BaseModel
|
| 295 |
-
|
| 296 |
class TTSResponse(BaseModel):
|
| 297 |
file_name: str
|
| 298 |
duration: float # seconds
|
| 299 |
-
audio_base64: str
|
| 300 |
|
| 301 |
######## Convert your audio to Base64
|
| 302 |
import base64
|
|
@@ -318,32 +301,34 @@ def audio_to_base64(audio_path: str) -> (str, float):
|
|
| 318 |
|
| 319 |
@app.post("/tts/")
|
| 320 |
async def process_story(story: StoryCreationDTO):
|
|
|
|
|
|
|
| 321 |
print(story.storyId)
|
| 322 |
-
|
| 323 |
for cast in story.cast:
|
| 324 |
print(cast.name, cast.voiceReference)
|
| 325 |
-
|
| 326 |
for chapter in story.chapters:
|
| 327 |
for scene in chapter.scenes:
|
| 328 |
for sentence in scene.sentences:
|
| 329 |
print(sentence.speaker, sentence.sentence)
|
| 330 |
|
| 331 |
-
#
|
| 332 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 333 |
|
| 334 |
# Convert to base64 and get duration
|
| 335 |
-
audio_b64, duration = audio_to_base64(
|
| 336 |
|
| 337 |
response = TTSResponse(
|
| 338 |
-
file_name=
|
| 339 |
duration=duration,
|
| 340 |
audio_base64=audio_b64
|
| 341 |
)
|
| 342 |
|
| 343 |
return response
|
| 344 |
|
| 345 |
-
#return {"status": "Story received"}
|
| 346 |
-
|
| 347 |
|
| 348 |
|
| 349 |
# async def tts_endpoint(
|
|
|
|
| 5 |
import os
|
| 6 |
from pydantic import BaseModel
|
| 7 |
from typing import List, Optional
|
| 8 |
+
from pathlib import Path
|
| 9 |
|
| 10 |
|
| 11 |
# MODEL_DIR = "my_model"
|
|
|
|
| 141 |
|
| 142 |
#_______________generate audios and folder structure_______________________
|
| 143 |
|
| 144 |
+
async def generate_story_audios(story: StoryCreationDTO, base_output: str):
|
| 145 |
"""
|
| 146 |
Generates audio files and folders for the entire story
|
| 147 |
"""
|
|
|
|
| 183 |
from pydub import AudioSegment
|
| 184 |
import asyncio
|
| 185 |
|
| 186 |
+
async def concat_story_audio(story: StoryCreationDTO, base_output: str, final_path: str = None): # full path including filename
|
| 187 |
story_dir = Path(base_output) / story.storyId
|
| 188 |
story_dir.mkdir(parents=True, exist_ok=True)
|
| 189 |
|
|
|
|
| 276 |
|
| 277 |
#-----------------------------------------------------------
|
| 278 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
class TTSResponse(BaseModel):
|
| 280 |
file_name: str
|
| 281 |
duration: float # seconds
|
| 282 |
+
audio_base64: str
|
| 283 |
|
| 284 |
######## Convert your audio to Base64
|
| 285 |
import base64
|
|
|
|
| 301 |
|
| 302 |
@app.post("/tts/")
|
| 303 |
async def process_story(story: StoryCreationDTO):
|
| 304 |
+
|
| 305 |
+
# Optional: print info for debugging
|
| 306 |
print(story.storyId)
|
|
|
|
| 307 |
for cast in story.cast:
|
| 308 |
print(cast.name, cast.voiceReference)
|
|
|
|
| 309 |
for chapter in story.chapters:
|
| 310 |
for scene in chapter.scenes:
|
| 311 |
for sentence in scene.sentences:
|
| 312 |
print(sentence.speaker, sentence.sentence)
|
| 313 |
|
| 314 |
+
# 1️⃣ Generate all sentence audios and folder structure
|
| 315 |
+
await generate_story_audios(story, base_output=OUTPUT_DIR)
|
| 316 |
+
|
| 317 |
+
# 2️⃣ Concatenate all into final story audio
|
| 318 |
+
final_story_path = os.path.join(OUTPUT_DIR, story.storyId, f"{story.storyId}_full.wav")
|
| 319 |
+
final_generated_story_path = await concat_story_audio(story, base_output=OUTPUT_DIR, final_path=final_story_path)
|
| 320 |
|
| 321 |
# Convert to base64 and get duration
|
| 322 |
+
audio_b64, duration = audio_to_base64(final_generated_story_path)
|
| 323 |
|
| 324 |
response = TTSResponse(
|
| 325 |
+
file_name= os.path.basename(final_generated_story_path),
|
| 326 |
duration=duration,
|
| 327 |
audio_base64=audio_b64
|
| 328 |
)
|
| 329 |
|
| 330 |
return response
|
| 331 |
|
|
|
|
|
|
|
| 332 |
|
| 333 |
|
| 334 |
# async def tts_endpoint(
|