Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -135,6 +135,31 @@ if not os.path.exists(dummy_path):
|
|
| 135 |
silent = torch.zeros(1, 24000) # 1 channel, 24000 samples
|
| 136 |
torchaudio.save(dummy_path, silent, 24000)
|
| 137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
@app.post("/tts/")
|
| 139 |
async def process_story(story: StoryCreationDTO):
|
| 140 |
print(story.storyId)
|
|
@@ -147,12 +172,20 @@ async def process_story(story: StoryCreationDTO):
|
|
| 147 |
for sentence in scene.sentences:
|
| 148 |
print(sentence.speaker, sentence.sentence)
|
| 149 |
|
| 150 |
-
#
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
)
|
|
|
|
|
|
|
|
|
|
| 156 |
#return {"status": "Story received"}
|
| 157 |
|
| 158 |
|
|
|
|
| 135 |
silent = torch.zeros(1, 24000) # 1 channel, 24000 samples
|
| 136 |
torchaudio.save(dummy_path, silent, 24000)
|
| 137 |
|
| 138 |
+
from pydantic import BaseModel
|
| 139 |
+
|
| 140 |
+
class TTSResponse(BaseModel):
|
| 141 |
+
file_name: str
|
| 142 |
+
duration: float # seconds
|
| 143 |
+
audio_base64: str
|
| 144 |
+
|
| 145 |
+
######## Convert your audio to Base64
|
| 146 |
+
import base64
|
| 147 |
+
import torchaudio
|
| 148 |
+
import io
|
| 149 |
+
|
| 150 |
+
def audio_to_base64(audio_path: str) -> (str, float):
|
| 151 |
+
# load audio to get duration
|
| 152 |
+
waveform, sr = torchaudio.load(audio_path) # waveform shape: [channels, samples]
|
| 153 |
+
duration = waveform.shape[1] / sr # seconds
|
| 154 |
+
|
| 155 |
+
# read file bytes
|
| 156 |
+
with open(audio_path, "rb") as f:
|
| 157 |
+
audio_bytes = f.read()
|
| 158 |
+
|
| 159 |
+
audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
|
| 160 |
+
return audio_b64, duration
|
| 161 |
+
|
| 162 |
+
|
| 163 |
@app.post("/tts/")
|
| 164 |
async def process_story(story: StoryCreationDTO):
|
| 165 |
print(story.storyId)
|
|
|
|
| 172 |
for sentence in scene.sentences:
|
| 173 |
print(sentence.speaker, sentence.sentence)
|
| 174 |
|
| 175 |
+
# For testing, use your dummy WAV
|
| 176 |
+
dummy_path = os.path.join(OUTPUT_DIR, "dummy.wav")
|
| 177 |
+
|
| 178 |
+
# Convert to base64 and get duration
|
| 179 |
+
audio_b64, duration = audio_to_base64(dummy_path)
|
| 180 |
+
|
| 181 |
+
response = TTSResponse(
|
| 182 |
+
file_name="chapter1_scene2.wav",
|
| 183 |
+
duration=duration,
|
| 184 |
+
audio_base64=audio_b64
|
| 185 |
)
|
| 186 |
+
|
| 187 |
+
return response
|
| 188 |
+
|
| 189 |
#return {"status": "Story received"}
|
| 190 |
|
| 191 |
|