smartdigitalnetworks commited on
Commit
d9e0e01
·
verified ·
1 Parent(s): 1b9b7ff

Upload 4 files

Browse files
Files changed (4) hide show
  1. apppy.py +79 -0
  2. apptts1.py +31 -0
  3. apptts2.py +17 -0
  4. requirements.txt +5 -0
apppy.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import edge_tts
3
+ import asyncio
4
+ import tempfile
5
+ import os
6
+
7
+ async def get_voices():
8
+ voices = await edge_tts.list_voices()
9
+ return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
10
+
11
+ async def text_to_speech(text, voice, rate, volume, pitch):
12
+ if not text.strip():
13
+ return None, "Please enter text to convert."
14
+ if not voice:
15
+ return None, "Please select a voice."
16
+
17
+ voice_short_name = voice.split(" - ")[0]
18
+ rate_str = f"{rate:+d}%"
19
+ volume_str = f"{volume:+d}%"
20
+ pitch_str = f"{pitch:+d}Hz"
21
+ communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, volume=volume_str, pitch=pitch_str)
22
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
23
+ tmp_path = tmp_file.name
24
+ await communicate.save(tmp_path)
25
+ return tmp_path, None
26
+
27
+ async def tts_interface(text, voice, rate, volume, pitch):
28
+ audio, warning = await text_to_speech(text, voice, rate, volume, pitch)
29
+ if warning:
30
+ return audio, gr.Warning(warning)
31
+ return audio, None
32
+
33
+ async def create_demo():
34
+ voices = await get_voices()
35
+
36
+ description = """
37
+ Convert text to speech using Microsoft Edge TTS. Adjust speech rate and pitch: 0 is default, positive values increase, negative values decrease.
38
+
39
+ 🎥 **Exciting News: Introducing our Text-to-Video Converter!** 🎥
40
+
41
+ Take your content creation to the next level with our cutting-edge Text-to-Video Converter!
42
+ Transform your words into stunning, professional-quality videos in just a few clicks.
43
+
44
+ ✨ Features:
45
+ • Convert text to engaging videos with customizable visuals
46
+ • Choose from 40+ languages and 300+ voices
47
+ • Perfect for creating audiobooks, storytelling, and language learning materials
48
+ • Ideal for educators, content creators, and language enthusiasts
49
+ """
50
+
51
+ demo = gr.Interface(
52
+ fn=tts_interface,
53
+ inputs=[
54
+ gr.Textbox(label="Input Text", lines=5),
55
+ gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value=""),
56
+ gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1),
57
+ gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Volume Adjustment (%)", step=1),
58
+ gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1)
59
+ ],
60
+ outputs=[
61
+ gr.Audio(label="Generated Audio", type="filepath"),
62
+ gr.Markdown(label="Warning", visible=False)
63
+ ],
64
+ title="Edge TTS Text-to-Speech",
65
+ description=description,
66
+ article="Experience the power of Edge TTS for text-to-speech conversion, and explore our advanced Text-to-Video Converter for even more creative possibilities!",
67
+ analytics_enabled=False,
68
+ allow_flagging="manual",
69
+ api_name=None
70
+ )
71
+ return demo
72
+
73
+ async def main():
74
+ demo = await create_demo()
75
+ demo.queue(default_concurrency_limit=5)
76
+ demo.launch(show_api=False)
77
+
78
+ if __name__ == "__main__":
79
+ asyncio.run(main())
apptts1.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import torch
4
+
5
+ # Load a modern TTS pipeline (Microsoft SpeechT5 for balance of speed/quality)
6
+ # In 2025, ensure you have the latest 'transformers' and 'torch' installed.
7
+ synthesizer = pipeline("text-to-speech", model="microsoft/speecht5_tts")
8
+
9
+ # For SpeechT5, we also need speaker embeddings
10
+ from datasets import load_dataset
11
+ embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
12
+ speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
13
+
14
+ def generate_speech(text):
15
+ if not text:
16
+ return None
17
+ speech = synthesizer(text, forward_params={"speaker_embeddings": speaker_embedding})
18
+ # Returns a tuple of (sample_rate, audio_numpy_array)
19
+ return (speech["sampling_rate"], speech["audio"])
20
+
21
+ # Build the Interface
22
+ demo = gr.Interface(
23
+ fn=generate_speech,
24
+ inputs=gr.Textbox(label="Enter Text", placeholder="Type something to hear it..."),
25
+ outputs=gr.Audio(label="Generated Audio"),
26
+ title="2025 Gradio TTS Demo",
27
+ description="A simple text-to-speech demo using Microsoft SpeechT5."
28
+ )
29
+
30
+ if __name__ == "__main__":
31
+ demo.launch()
apptts2.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ # Note: Requires F5-TTS package (pip install f5-tts)
3
+ from f5_tts.infer.utils_infer import infer_process
4
+
5
+ def tts_f5(text):
6
+ # This is a simplified representation; F5-TTS usually requires a reference audio for cloning
7
+ # Official Gradio apps for F5-TTS often use 'f5-tts_infer-gradio' command directly
8
+ audio_path = "output.wav"
9
+ # Logic to process text -> audio file
10
+ return audio_path
11
+
12
+ demo = gr.Interface(
13
+ fn=tts_f5,
14
+ inputs="text",
15
+ outputs="audio",
16
+ title="F5-TTS Advanced Demo"
17
+ )
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ edge-tts
2
+ torch
3
+ transformers
4
+ datasets
5
+ gradio-client