MariaKaiser commited on
Commit
cda4205
·
verified ·
1 Parent(s): 450c267

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -27
app.py CHANGED
@@ -53,30 +53,48 @@ model.load_checkpoint(
53
  model.to(device)
54
 
55
  # --------- Define your models ----------
56
- class BGM(BaseModel):
57
- file: str
58
- bgm_volume: float
59
 
60
- class Sentence(BaseModel):
61
- sentence_id: int
 
 
 
 
 
62
  speaker: str
63
- text: str
64
- prosody_ref: str
 
 
 
 
 
 
 
 
 
 
 
65
 
66
- class Scene(BaseModel):
67
- scene_id: int
68
- ambiance: str
69
- bgm: BGM
70
- sentences: List[Sentence]
71
 
72
- class CastMember(BaseModel):
 
73
  name: str
74
  gender: str
75
- voice_ref: str
 
 
 
 
 
 
 
76
 
77
- class StoryInput(BaseModel):
78
- cast: List[CastMember]
79
- scenes: List[Scene]
80
 
81
  def tts_arabic(text: str, audio_file: str) -> str:
82
  gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=[audio_file])
@@ -101,17 +119,36 @@ app = FastAPI(title="EGTTS Arabic TTS API")
101
  def root():
102
  return {"message": "Welcome! Visit /docs for Swagger UI."}
103
 
 
104
  @app.post("/tts/")
105
- async def tts_endpoint(
106
- text: str = Form(...),
107
- audio_file: UploadFile = File(...)
108
- ):
109
- file_path = os.path.join(OUTPUT_DIR, audio_file.filename)
110
- with open(file_path, "wb") as f:
111
- f.write(await audio_file.read())
112
-
113
- output_wav = tts_arabic(text, file_path)
114
- return FileResponse(output_wav, media_type="audio/wav", filename="output.wav")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
  if __name__ == "__main__":
117
  import uvicorn
 
53
  model.to(device)
54
 
55
  # --------- Define your models ----------
 
 
 
56
 
57
+ class BGMusicDto(BaseModel):
58
+ musicPath: str
59
+ emotion: str
60
+ volume: float
61
+
62
+
63
+ class SentenceDto(BaseModel):
64
  speaker: str
65
+ sentenceId: str
66
+ sentence: str
67
+ prosodyReference: str
68
+ emotion: str
69
+ intensity: int
70
+
71
+
72
+ class SceneDto(BaseModel):
73
+ sceneId: str
74
+ locationName: str
75
+ sentences: List[SentenceDto]
76
+ bgMusic: BGMusicDto
77
+
78
 
79
+ class ChapterDto(BaseModel):
80
+ chapterId: str
81
+ title: SentenceDto
82
+ scenes: List[SceneDto]
 
83
 
84
+
85
+ class CastDto(BaseModel):
86
  name: str
87
  gender: str
88
+ isAdult: bool
89
+ voiceReference: str
90
+
91
+
92
+ class StoryCreationDTO(BaseModel):
93
+ storyId: str
94
+ chapters: List[ChapterDto]
95
+ cast: List[CastDto]
96
 
97
+ #-----------------------------------------------------------
 
 
98
 
99
  def tts_arabic(text: str, audio_file: str) -> str:
100
  gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=[audio_file])
 
119
  def root():
120
  return {"message": "Welcome! Visit /docs for Swagger UI."}
121
 
122
+
123
  @app.post("/tts/")
124
+ async def process_story(story: StoryCreationDTO):
125
+ print(story.storyId)
126
+
127
+ for cast in story.cast:
128
+ print(cast.name, cast.voiceReference)
129
+
130
+ for chapter in story.chapters:
131
+ for scene in chapter.scenes:
132
+ for sentence in scene.sentences:
133
+ print(sentence.speaker, sentence.sentence)
134
+
135
+ return {"status": "Story received"}
136
+
137
+
138
+
139
+ # async def tts_endpoint(
140
+ # text: str = Form(...),
141
+ # audio_file: UploadFile = File(...)
142
+ # ):
143
+ # file_path = os.path.join(OUTPUT_DIR, audio_file.filename)
144
+ # with open(file_path, "wb") as f:
145
+ # f.write(await audio_file.read())
146
+
147
+ # output_wav = tts_arabic(text, file_path)
148
+ # return FileResponse(output_wav, media_type="audio/wav", filename="output.wav")
149
+
150
+
151
+
152
 
153
  if __name__ == "__main__":
154
  import uvicorn