Spaces:

UpCoder
/

Behruz-Voice-AI

Sleeping

App Files Files Community

UpCoder commited on 11 days ago

Commit

742fa67

verified ·

1 Parent(s): ef438a0

updated the interface...

Browse files

Files changed (1) hide show

app.py +72 -17

app.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import gradio as gr
 import os
 from TTS.utils.synthesizer import Synthesizer
 from huggingface_hub import hf_hub_download
@@ -16,7 +18,7 @@ try:
 except Exception as e:
     print(f"Error downloading files: {e}")
-# 3. Load the AI (We set use_cuda=False because the free cloud tier doesn't have a GPU!)
 print("Loading AI Model...")
 synthesizer = Synthesizer(
     tts_checkpoint=model_path,
@@ -24,21 +26,74 @@ synthesizer = Synthesizer(
     use_cuda=False
 )
-# 4. The function that generates the audio
-def synthesize_voice(text):
-    wav = synthesizer.tts(text)
-    output_file = "output.wav"
-    synthesizer.save_wav(wav, output_file)
-    return output_file
-# 5. Build the beautiful User Interface
-iface = gr.Interface(
-    fn=synthesize_voice,
-    inputs=gr.Textbox(label="Enter Uzbek Text Here", lines=3, placeholder="Salom, bu mening raqamli ovozim..."),
-    outputs=gr.Audio(label="Generated Audio"),
-    title="🎙️ Behruz's Digital Voice Clone",
-    description="Type any Uzbek sentence below to hear it spoken by an AI trained on my real voice! (Note: Generation takes a few seconds on the free tier).",
-    theme="huggingface"
-)
 iface.launch()

 import gradio as gr
 import os
+import re
+import numpy as np
 from TTS.utils.synthesizer import Synthesizer
 from huggingface_hub import hf_hub_download
 except Exception as e:
     print(f"Error downloading files: {e}")
+# 3. Load the AI
 print("Loading AI Model...")
 synthesizer = Synthesizer(
     tts_checkpoint=model_path,
     use_cuda=False
 )
+# VITS models typically run at a 22050 Hz sample rate
+SAMPLE_RATE = 22050
+def split_into_sentences(text):
+    # This regex smartly splits paragraphs by punctuation (. ! ?) but keeps the words intact
+    sentences = re.split(r'(?<=[.!?]) +', text.strip())
+    return [s for s in sentences if s.strip()]
+def synthesize_voice_stream(text):
+    if not text.strip():
+        return None
+    # Safety feature: Hard limit of 2000 characters so users don't paste an entire Harry Potter book and crash your free server!
+    if len(text) > 2000:
+        text = text[:2000]
+    sentences = split_into_sentences(text)
+    for sentence in sentences:
+        try:
+            # Generate the raw audio math for just this one sentence
+            wav = synthesizer.tts(sentence)
+            # Convert the raw math into a standard audio waveform array
+            wav_array = np.array(wav)
+            wav_int16 = (wav_array * 32767).astype(np.int16)
+            # YIELD instead of RETURN. This streams the audio chunk straight to the user's speakers instantly!
+            yield (SAMPLE_RATE, wav_int16)
+        except Exception as e:
+            print(f"Failed to synthesize sentence: {sentence}. Error: {e}")
+            continue
+# 4. Build the Professional UI layout
+with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue")) as iface:
+    gr.Markdown(
+        """
+        # 🎙️ Behruz's Digital Voice Clone (V3)
+        Welcome to my AI voice generator! This model was trained locally on my real voice using deep learning.
+        💡 **Pro Tip:** You can paste a whole paragraph! The AI will smartly split it into sentences and stream the audio to you in real-time without crashing.
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=2):
+            text_input = gr.Textbox(
+                label="Enter Uzbek Text Here (Max 2000 chars)",
+                lines=6,
+                placeholder="Salom! Bugun havo juda ajoyib, shunday emasmi? Men internetda yashaydigan raqamli sun'iy intellektman..."
+            )
+            generate_btn = gr.Button("🚀 Generate Audio Stream", variant="primary")
+        with gr.Column(scale=1):
+            # autoplay=True means as soon as the first chunk arrives, it starts speaking!
+            audio_output = gr.Audio(label="Live Audio Stream", autoplay=True)
+    # Add quick-click examples so your friends don't have to think of what to type
+    gr.Examples(
+        examples=[
+            "Salom, men Behruzning raqamli egizagiman va men endi internetda yashayman!",
+            "Axborot texnologiyalari sohasida qanday yangiliklar bor, kuzatib boryapsizmi?",
+            "Voh, bu natijani umuman kutmagan edim! Qoyilmaqom ish bo'libdi."
+        ],
+        inputs=text_input
+    )
+    # Connect the button to the streaming function
+    generate_btn.click(fn=synthesize_voice_stream, inputs=text_input, outputs=audio_output)
 iface.launch()