Spaces:
Sleeping
Sleeping
Update audio_generator.py
Browse files- audio_generator.py +23 -43
audio_generator.py
CHANGED
|
@@ -1,54 +1,34 @@
|
|
| 1 |
import os
|
| 2 |
import edge_tts
|
| 3 |
import asyncio
|
| 4 |
-
|
|
|
|
| 5 |
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
"female": {
|
| 9 |
-
"en-US": "en-US-AriaNeural", # Very natural
|
| 10 |
-
"en-GB": "en-GB-LibbyNeural", # British English
|
| 11 |
-
"es-ES": "es-ES-ElviraNeural" # Spanish
|
| 12 |
-
},
|
| 13 |
-
"male": {
|
| 14 |
-
"en-US": "en-US-GuyNeural",
|
| 15 |
-
"en-GB": "en-GB-RyanNeural",
|
| 16 |
-
"es-ES": "es-ES-AlvaroNeural"
|
| 17 |
-
}
|
| 18 |
-
}
|
| 19 |
-
|
| 20 |
-
async def generate_speech(
|
| 21 |
-
text: str,
|
| 22 |
-
output_file: str = "output.mp3",
|
| 23 |
-
voice: Optional[str] = None,
|
| 24 |
-
rate: str = "+0%", # Speed adjustment
|
| 25 |
-
volume: str = "+0%" # Volume adjustment
|
| 26 |
-
) -> str:
|
| 27 |
-
"""
|
| 28 |
-
Generate speech using free EdgeTTS
|
| 29 |
-
Args:
|
| 30 |
-
text: Input text (max 3000 chars)
|
| 31 |
-
output_file: Output path
|
| 32 |
-
voice: Voice code (e.g. 'en-US-AriaNeural')
|
| 33 |
-
rate: Speaking rate adjustment (-50% to +100%)
|
| 34 |
-
volume: Volume adjustment (-50% to +50%)
|
| 35 |
-
"""
|
| 36 |
-
if not voice:
|
| 37 |
-
voice = VOICES["female"]["en-US"] # Default to natural female voice
|
| 38 |
|
|
|
|
| 39 |
try:
|
| 40 |
communicate = edge_tts.Communicate(
|
| 41 |
text=text,
|
| 42 |
-
voice=
|
| 43 |
-
rate=
|
| 44 |
-
volume=
|
| 45 |
)
|
| 46 |
-
await communicate.save(
|
| 47 |
-
return
|
| 48 |
except Exception as e:
|
| 49 |
-
|
|
|
|
| 50 |
|
| 51 |
-
def generate_audio(text: str
|
| 52 |
-
"""
|
| 53 |
-
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import edge_tts
|
| 3 |
import asyncio
|
| 4 |
+
import logging
|
| 5 |
+
from datetime import datetime
|
| 6 |
|
| 7 |
+
logging.basicConfig(level=logging.INFO)
|
| 8 |
+
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
+
async def _generate_speech(text: str, output_path: str) -> str:
|
| 11 |
try:
|
| 12 |
communicate = edge_tts.Communicate(
|
| 13 |
text=text,
|
| 14 |
+
voice="en-US-AriaNeural",
|
| 15 |
+
rate="+0%",
|
| 16 |
+
volume="+0%"
|
| 17 |
)
|
| 18 |
+
await communicate.save(output_path)
|
| 19 |
+
return output_path
|
| 20 |
except Exception as e:
|
| 21 |
+
logger.error(f"Generation failed: {str(e)}")
|
| 22 |
+
raise RuntimeError(f"Audio generation error: {str(e)}")
|
| 23 |
|
| 24 |
+
def generate_audio(text: str) -> str:
|
| 25 |
+
"""Main entry point for audio generation"""
|
| 26 |
+
try:
|
| 27 |
+
os.makedirs("tts_outputs", exist_ok=True)
|
| 28 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 29 |
+
output_path = os.path.join("tts_outputs", f"tts_{timestamp}.mp3")
|
| 30 |
+
|
| 31 |
+
return asyncio.run(_generate_speech(text, output_path))
|
| 32 |
+
except Exception as e:
|
| 33 |
+
logger.error(f"Audio generation failed: {str(e)}")
|
| 34 |
+
raise
|