triflix commited on
Commit
58646cc
·
verified ·
1 Parent(s): 195efd3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -0
app.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import uuid
3
+ import gradio as gr
4
+ import numpy as np
5
+ import soundfile as sf
6
+ from kokoro import KPipeline
7
+
8
+ # Create a directory to store audio files if it doesn't exist
9
+ AUDIO_DIR = "audio_files"
10
+ os.makedirs(AUDIO_DIR, exist_ok=True)
11
+
12
+ # Initialize the Kokoro TTS pipeline for American English
13
+ pipeline = KPipeline(lang_code='a')
14
+
15
+ def generate_tts(text, voice="af_sky", speed=1.0):
16
+ """Generate TTS from input text and return a URL to the audio file."""
17
+ if not text.strip():
18
+ return "Please enter some text to convert to speech."
19
+
20
+ try:
21
+ # Generate audio without segmentation
22
+ segments = list(
23
+ pipeline(
24
+ text,
25
+ voice=voice,
26
+ speed=speed,
27
+ split_pattern=r'$^' # Regex that never matches → full audio as one segment
28
+ )
29
+ )
30
+
31
+ # Concatenate all segments into one NumPy array
32
+ audio_full = np.concatenate([audio for (_, _, audio) in segments])
33
+
34
+ # Create a unique filename for the audio
35
+ session_id = str(uuid.uuid4())[:8]
36
+ filename = f"{session_id}_audio.wav"
37
+ filepath = os.path.join(AUDIO_DIR, filename)
38
+
39
+ # Save the audio to a WAV file
40
+ sf.write(filepath, audio_full, 24000)
41
+
42
+ # Generate a URL for the audio file
43
+ audio_url = f"/file={filepath}"
44
+
45
+ return audio_url, filepath
46
+
47
+ except Exception as e:
48
+ return f"Error generating speech: {str(e)}", None
49
+
50
+ # Define the Gradio interface
51
+ with gr.Blocks() as demo:
52
+ gr.Markdown("# Kokoro Text-to-Speech Generator")
53
+
54
+ with gr.Row():
55
+ with gr.Column():
56
+ text_input = gr.Textbox(
57
+ label="Enter text to convert to speech",
58
+ placeholder="Type your text here...",
59
+ lines=5
60
+ )
61
+ voice_selector = gr.Dropdown(
62
+ choices=["af_sky", "af_breeze", "af_rays", "af_glow", "af_ember"],
63
+ value="af_sky",
64
+ label="Voice"
65
+ )
66
+ speed_slider = gr.Slider(
67
+ minimum=0.5,
68
+ maximum=2.0,
69
+ value=1.0,
70
+ step=0.1,
71
+ label="Speech Speed"
72
+ )
73
+ submit_btn = gr.Button("Generate Speech")
74
+
75
+ with gr.Column():
76
+ audio_output = gr.Audio(label="Generated Speech", type="filepath")
77
+ url_output = gr.Textbox(label="Audio URL")
78
+
79
+ # Set up the event handler
80
+ submit_btn.click(
81
+ fn=generate_tts,
82
+ inputs=[text_input, voice_selector, speed_slider],
83
+ outputs=[url_output, audio_output]
84
+ )
85
+
86
+ gr.Markdown("""
87
+ ## How to Use
88
+ 1. Enter the text you want to convert to speech in the text box
89
+ 2. Select a voice from the dropdown menu
90
+ 3. Adjust the speech speed if needed
91
+ 4. Click "Generate Speech" to create the audio
92
+ 5. The audio will play automatically and a URL will be provided for download
93
+ """)
94
+
95
+ # Launch the app with file serving capability
96
+ demo.launch(share=True)