MariaKaiser commited on
Commit
1f7f064
·
verified ·
1 Parent(s): 43378d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -5
app.py CHANGED
@@ -135,6 +135,31 @@ if not os.path.exists(dummy_path):
135
  silent = torch.zeros(1, 24000) # 1 channel, 24000 samples
136
  torchaudio.save(dummy_path, silent, 24000)
137
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  @app.post("/tts/")
139
  async def process_story(story: StoryCreationDTO):
140
  print(story.storyId)
@@ -147,12 +172,20 @@ async def process_story(story: StoryCreationDTO):
147
  for sentence in scene.sentences:
148
  print(sentence.speaker, sentence.sentence)
149
 
150
- # return dummy audio
151
- return FileResponse(
152
- dummy_path,
153
- media_type="audio/wav",
154
- filename="output.wav"
 
 
 
 
 
155
  )
 
 
 
156
  #return {"status": "Story received"}
157
 
158
 
 
135
  silent = torch.zeros(1, 24000) # 1 channel, 24000 samples
136
  torchaudio.save(dummy_path, silent, 24000)
137
 
138
+ from pydantic import BaseModel
139
+
140
+ class TTSResponse(BaseModel):
141
+ file_name: str
142
+ duration: float # seconds
143
+ audio_base64: str
144
+
145
+ ######## Convert your audio to Base64
146
+ import base64
147
+ import torchaudio
148
+ import io
149
+
150
+ def audio_to_base64(audio_path: str) -> (str, float):
151
+ # load audio to get duration
152
+ waveform, sr = torchaudio.load(audio_path) # waveform shape: [channels, samples]
153
+ duration = waveform.shape[1] / sr # seconds
154
+
155
+ # read file bytes
156
+ with open(audio_path, "rb") as f:
157
+ audio_bytes = f.read()
158
+
159
+ audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
160
+ return audio_b64, duration
161
+
162
+
163
  @app.post("/tts/")
164
  async def process_story(story: StoryCreationDTO):
165
  print(story.storyId)
 
172
  for sentence in scene.sentences:
173
  print(sentence.speaker, sentence.sentence)
174
 
175
+ # For testing, use your dummy WAV
176
+ dummy_path = os.path.join(OUTPUT_DIR, "dummy.wav")
177
+
178
+ # Convert to base64 and get duration
179
+ audio_b64, duration = audio_to_base64(dummy_path)
180
+
181
+ response = TTSResponse(
182
+ file_name="chapter1_scene2.wav",
183
+ duration=duration,
184
+ audio_base64=audio_b64
185
  )
186
+
187
+ return response
188
+
189
  #return {"status": "Story received"}
190
 
191