UpCoder commited on
Commit
742fa67
·
verified ·
1 Parent(s): ef438a0

updated the interface...

Browse files
Files changed (1) hide show
  1. app.py +72 -17
app.py CHANGED
@@ -1,5 +1,7 @@
1
  import gradio as gr
2
  import os
 
 
3
  from TTS.utils.synthesizer import Synthesizer
4
  from huggingface_hub import hf_hub_download
5
 
@@ -16,7 +18,7 @@ try:
16
  except Exception as e:
17
  print(f"Error downloading files: {e}")
18
 
19
- # 3. Load the AI (We set use_cuda=False because the free cloud tier doesn't have a GPU!)
20
  print("Loading AI Model...")
21
  synthesizer = Synthesizer(
22
  tts_checkpoint=model_path,
@@ -24,21 +26,74 @@ synthesizer = Synthesizer(
24
  use_cuda=False
25
  )
26
 
27
- # 4. The function that generates the audio
28
- def synthesize_voice(text):
29
- wav = synthesizer.tts(text)
30
- output_file = "output.wav"
31
- synthesizer.save_wav(wav, output_file)
32
- return output_file
33
-
34
- # 5. Build the beautiful User Interface
35
- iface = gr.Interface(
36
- fn=synthesize_voice,
37
- inputs=gr.Textbox(label="Enter Uzbek Text Here", lines=3, placeholder="Salom, bu mening raqamli ovozim..."),
38
- outputs=gr.Audio(label="Generated Audio"),
39
- title="🎙️ Behruz's Digital Voice Clone",
40
- description="Type any Uzbek sentence below to hear it spoken by an AI trained on my real voice! (Note: Generation takes a few seconds on the free tier).",
41
- theme="huggingface"
42
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  iface.launch()
 
1
  import gradio as gr
2
  import os
3
+ import re
4
+ import numpy as np
5
  from TTS.utils.synthesizer import Synthesizer
6
  from huggingface_hub import hf_hub_download
7
 
 
18
  except Exception as e:
19
  print(f"Error downloading files: {e}")
20
 
21
+ # 3. Load the AI
22
  print("Loading AI Model...")
23
  synthesizer = Synthesizer(
24
  tts_checkpoint=model_path,
 
26
  use_cuda=False
27
  )
28
 
29
+ # VITS models typically run at a 22050 Hz sample rate
30
+ SAMPLE_RATE = 22050
31
+
32
+ def split_into_sentences(text):
33
+ # This regex smartly splits paragraphs by punctuation (. ! ?) but keeps the words intact
34
+ sentences = re.split(r'(?<=[.!?]) +', text.strip())
35
+ return [s for s in sentences if s.strip()]
36
+
37
+ def synthesize_voice_stream(text):
38
+ if not text.strip():
39
+ return None
40
+
41
+ # Safety feature: Hard limit of 2000 characters so users don't paste an entire Harry Potter book and crash your free server!
42
+ if len(text) > 2000:
43
+ text = text[:2000]
44
+
45
+ sentences = split_into_sentences(text)
46
+
47
+ for sentence in sentences:
48
+ try:
49
+ # Generate the raw audio math for just this one sentence
50
+ wav = synthesizer.tts(sentence)
51
+
52
+ # Convert the raw math into a standard audio waveform array
53
+ wav_array = np.array(wav)
54
+ wav_int16 = (wav_array * 32767).astype(np.int16)
55
+
56
+ # YIELD instead of RETURN. This streams the audio chunk straight to the user's speakers instantly!
57
+ yield (SAMPLE_RATE, wav_int16)
58
+ except Exception as e:
59
+ print(f"Failed to synthesize sentence: {sentence}. Error: {e}")
60
+ continue
61
+
62
+ # 4. Build the Professional UI layout
63
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue")) as iface:
64
+ gr.Markdown(
65
+ """
66
+ # 🎙️ Behruz's Digital Voice Clone (V3)
67
+ Welcome to my AI voice generator! This model was trained locally on my real voice using deep learning.
68
+
69
+ 💡 **Pro Tip:** You can paste a whole paragraph! The AI will smartly split it into sentences and stream the audio to you in real-time without crashing.
70
+ """
71
+ )
72
+
73
+ with gr.Row():
74
+ with gr.Column(scale=2):
75
+ text_input = gr.Textbox(
76
+ label="Enter Uzbek Text Here (Max 2000 chars)",
77
+ lines=6,
78
+ placeholder="Salom! Bugun havo juda ajoyib, shunday emasmi? Men internetda yashaydigan raqamli sun'iy intellektman..."
79
+ )
80
+ generate_btn = gr.Button("🚀 Generate Audio Stream", variant="primary")
81
+
82
+ with gr.Column(scale=1):
83
+ # autoplay=True means as soon as the first chunk arrives, it starts speaking!
84
+ audio_output = gr.Audio(label="Live Audio Stream", autoplay=True)
85
+
86
+ # Add quick-click examples so your friends don't have to think of what to type
87
+ gr.Examples(
88
+ examples=[
89
+ "Salom, men Behruzning raqamli egizagiman va men endi internetda yashayman!",
90
+ "Axborot texnologiyalari sohasida qanday yangiliklar bor, kuzatib boryapsizmi?",
91
+ "Voh, bu natijani umuman kutmagan edim! Qoyilmaqom ish bo'libdi."
92
+ ],
93
+ inputs=text_input
94
+ )
95
+
96
+ # Connect the button to the streaming function
97
+ generate_btn.click(fn=synthesize_voice_stream, inputs=text_input, outputs=audio_output)
98
 
99
  iface.launch()