Spaces:

ttsEmo
/

TTS_API

Sleeping

App Files Files Community

MariaKaiser commited on Mar 6

Commit

cda4205

verified ·

1 Parent(s): 450c267

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -27

app.py CHANGED Viewed

@@ -53,30 +53,48 @@ model.load_checkpoint(
 model.to(device)
 # --------- Define your models ----------
-class BGM(BaseModel):
-    file: str
-    bgm_volume: float
-class Sentence(BaseModel):
-    sentence_id: int
     speaker: str
-    text: str
-    prosody_ref: str
-class Scene(BaseModel):
-    scene_id: int
-    ambiance: str
-    bgm: BGM
-    sentences: List[Sentence]
-class CastMember(BaseModel):
     name: str
     gender: str
-    voice_ref: str
-class StoryInput(BaseModel):
-    cast: List[CastMember]
-    scenes: List[Scene]
 def tts_arabic(text: str, audio_file: str) -> str:
     gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=[audio_file])
@@ -101,17 +119,36 @@ app = FastAPI(title="EGTTS Arabic TTS API")
 def root():
     return {"message": "Welcome! Visit /docs for Swagger UI."}
 @app.post("/tts/")
-async def tts_endpoint(
-    text: str = Form(...),
-    audio_file: UploadFile = File(...)
-):
-    file_path = os.path.join(OUTPUT_DIR, audio_file.filename)
-    with open(file_path, "wb") as f:
-        f.write(await audio_file.read())
-    output_wav = tts_arabic(text, file_path)
-    return FileResponse(output_wav, media_type="audio/wav", filename="output.wav")
 if __name__ == "__main__":
     import uvicorn

 model.to(device)
 # --------- Define your models ----------
+class BGMusicDto(BaseModel):
+    musicPath: str
+    emotion: str
+    volume: float
+class SentenceDto(BaseModel):
     speaker: str
+    sentenceId: str
+    sentence: str
+    prosodyReference: str
+    emotion: str
+    intensity: int
+class SceneDto(BaseModel):
+    sceneId: str
+    locationName: str
+    sentences: List[SentenceDto]
+    bgMusic: BGMusicDto
+class ChapterDto(BaseModel):
+    chapterId: str
+    title: SentenceDto
+    scenes: List[SceneDto]
+class CastDto(BaseModel):
     name: str
     gender: str
+    isAdult: bool
+    voiceReference: str
+class StoryCreationDTO(BaseModel):
+    storyId: str
+    chapters: List[ChapterDto]
+    cast: List[CastDto]
+#-----------------------------------------------------------
 def tts_arabic(text: str, audio_file: str) -> str:
     gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=[audio_file])
 def root():
     return {"message": "Welcome! Visit /docs for Swagger UI."}
 @app.post("/tts/")
+async def process_story(story: StoryCreationDTO):
+    print(story.storyId)
+    for cast in story.cast:
+        print(cast.name, cast.voiceReference)
+    for chapter in story.chapters:
+        for scene in chapter.scenes:
+            for sentence in scene.sentences:
+                print(sentence.speaker, sentence.sentence)
+    return {"status": "Story received"}
+# async def tts_endpoint(
+#     text: str = Form(...),
+#     audio_file: UploadFile = File(...)
+# ):
+#     file_path = os.path.join(OUTPUT_DIR, audio_file.filename)
+#     with open(file_path, "wb") as f:
+#         f.write(await audio_file.read())
+#     output_wav = tts_arabic(text, file_path)
+#     return FileResponse(output_wav, media_type="audio/wav", filename="output.wav")
 if __name__ == "__main__":
     import uvicorn