heerjtdev commited on
Commit
1e48f34
Β·
verified Β·
1 Parent(s): dff9996

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -37
app.py CHANGED
@@ -1,54 +1,63 @@
 
1
  import gradio as gr
2
- from kokoro import Kokoro
3
- import torch
 
4
 
 
5
 
6
- device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
7
 
8
- print("πŸ”„ Loading Kokoro model...")
9
- model = Kokoro(device=device)
10
- print("βœ… Kokoro loaded!")
 
 
 
11
 
12
 
13
- def generate_audio(text, voice):
14
- if not text.strip():
15
- return None, "Please enter some text."
 
 
16
 
17
  try:
18
- print("🎀 Generating...")
19
- audio = model.tts(text, voice=voice) # numpy float32 array
20
- return (24000, audio), "Success!"
21
- except Exception as e:
22
- return None, f"Error: {str(e)}"
23
 
 
 
 
24
 
25
- voices = [
26
- "af_heart",
27
- "af_bella",
28
- "af_nicole",
29
- "am_adam",
30
- "am_michael",
31
- "bf_emma",
32
- "bm_george"
33
- ]
34
 
 
 
 
 
35
 
36
- with gr.Blocks(title="Kokoro-TTS") as demo:
37
- gr.Markdown("## 🎧 Kokoro Text β†’ Speech")
38
 
39
- with gr.Row():
40
- txt = gr.Textbox(lines=4, label="Input Text")
41
- voice_select = gr.Dropdown(voices, value="af_heart", label="Voice")
42
 
43
- audio_out = gr.Audio(label="Generated Audio")
44
- status = gr.Textbox(label="Status")
 
 
 
45
 
46
- btn = gr.Button("Generate Audio")
 
47
 
48
- btn.click(
49
- fn=generate_audio,
50
- inputs=[txt, voice_select],
51
- outputs=[audio_out, status]
52
- )
53
 
54
- demo.launch()
 
 
1
+ # app.py
2
  import gradio as gr
3
+ import tempfile
4
+ import soundfile as sf
5
+ import numpy as np
6
 
7
+ from kokoro import KPipeline # correct import
8
 
9
+ # Initialize pipeline once on startup.
10
+ # lang_code: 'a' => American English, 'b' => British English, etc. See README for mapping.
11
+ pipeline = KPipeline(lang_code="a") # choose lang_code that matches the voice prefix
12
 
13
+ # Example voices (prefix letter indicates language family)
14
+ VOICES = [
15
+ "af_heart", "af_bella", "af_nicole", # a* = american-ish voices
16
+ "am_adam", "am_michael",
17
+ "bf_emma", "bm_george" # b* = british-ish voices
18
+ ]
19
 
20
 
21
+ def synthesize_to_file(text: str, voice: str = "af_heart"):
22
+ """Run kokoro pipeline and write first generated audio to a temporary wav file."""
23
+ text = (text or "").strip()
24
+ if not text:
25
+ return None, "Please enter text."
26
 
27
  try:
28
+ gen = pipeline(text, voice=voice) # generator yielding (gs, ps, audio)
29
+ # take the first item produced
30
+ item = next(gen, None)
31
+ if item is None:
32
+ return None, "Kokoro returned no audio."
33
 
34
+ gs, ps, audio = item # gs: generation metadata, ps: phonemes, audio: numpy float32
35
+ # Kokoro audio sample rate is 24000
36
+ sr = 24000
37
 
38
+ # Ensure numpy array dtype is float32
39
+ audio = np.asarray(audio, dtype=np.float32)
 
 
 
 
 
 
 
40
 
41
+ # Write to temporary wav file and return its path (Gradio can serve file paths)
42
+ tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
43
+ sf.write(tmp.name, audio, sr, format="WAV")
44
+ return tmp.name, f"Success β€” generated {len(audio)} samples @ {sr}Hz."
45
 
46
+ except Exception as e:
47
+ return None, f"Error: {e}"
48
 
 
 
 
49
 
50
+ with gr.Blocks(title="Kokoro TTS (Gradio)") as demo:
51
+ gr.Markdown("## Kokoro-82M β€” Text β†’ Speech (Gradio)")
52
+ with gr.Row():
53
+ txt = gr.Textbox(lines=4, placeholder="Type text to synthesize...", label="Input text")
54
+ voice = gr.Dropdown(choices=VOICES, value=VOICES[0], label="Voice")
55
 
56
+ out_audio = gr.Audio(label="Generated audio (wav file)")
57
+ status = gr.Textbox(label="Status", interactive=False)
58
 
59
+ btn = gr.Button("Generate")
60
+ btn.click(fn=synthesize_to_file, inputs=[txt, voice], outputs=[out_audio, status])
 
 
 
61
 
62
+ if __name__ == "__main__":
63
+ demo.launch(server_name="0.0.0.0", server_port=7860)