Spaces:

UpCoder
/

Behruz-Voice-AI

Sleeping

App Files Files Community

UpCoder commited on 10 days ago

Commit

01a6ec3

verified ·

1 Parent(s): 742fa67

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -32

app.py CHANGED Viewed

@@ -5,95 +5,105 @@ import numpy as np
 from TTS.utils.synthesizer import Synthesizer
 from huggingface_hub import hf_hub_download
-# 1. Grab the secret key you hid in the settings
 hf_token = os.environ.get("HF_TOKEN")
-# 2. Quietly download the brain from your private vault
 repo_id = "UpCoder/behruz-vits-v3-private"
 try:
-    print("Downloading model files...")
     model_path = hf_hub_download(repo_id=repo_id, filename="checkpoint_43000.pth", token=hf_token)
     config_path = hf_hub_download(repo_id=repo_id, filename="config.json", token=hf_token)
 except Exception as e:
-    print(f"Error downloading files: {e}")
-# 3. Load the AI
-print("Loading AI Model...")
 synthesizer = Synthesizer(
     tts_checkpoint=model_path,
     tts_config_path=config_path,
     use_cuda=False
 )
-# VITS models typically run at a 22050 Hz sample rate
 SAMPLE_RATE = 22050
 def split_into_sentences(text):
-    # This regex smartly splits paragraphs by punctuation (. ! ?) but keeps the words intact
     sentences = re.split(r'(?<=[.!?]) +', text.strip())
     return [s for s in sentences if s.strip()]
-def synthesize_voice_stream(text):
     if not text.strip():
         return None
-    # Safety feature: Hard limit of 2000 characters so users don't paste an entire Harry Potter book and crash your free server!
     if len(text) > 2000:
         text = text[:2000]
     sentences = split_into_sentences(text)
-    for sentence in sentences:
         try:
-            # Generate the raw audio math for just this one sentence
             wav = synthesizer.tts(sentence)
-            # Convert the raw math into a standard audio waveform array
-            wav_array = np.array(wav)
-            wav_int16 = (wav_array * 32767).astype(np.int16)
-            # YIELD instead of RETURN. This streams the audio chunk straight to the user's speakers instantly!
-            yield (SAMPLE_RATE, wav_int16)
         except Exception as e:
-            print(f"Failed to synthesize sentence: {sentence}. Error: {e}")
             continue
-# 4. Build the Professional UI layout
-with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue")) as iface:
     gr.Markdown(
         """
-        # 🎙️ Behruz's Digital Voice Clone (V3)
-        Welcome to my AI voice generator! This model was trained locally on my real voice using deep learning.
-        💡 **Pro Tip:** You can paste a whole paragraph! The AI will smartly split it into sentences and stream the audio to you in real-time without crashing.
         """
     )
     with gr.Row():
         with gr.Column(scale=2):
             text_input = gr.Textbox(
-                label="Enter Uzbek Text Here (Max 2000 chars)",
                 lines=6,
                 placeholder="Salom! Bugun havo juda ajoyib, shunday emasmi? Men internetda yashaydigan raqamli sun'iy intellektman..."
             )
-            generate_btn = gr.Button("🚀 Generate Audio Stream", variant="primary")
         with gr.Column(scale=1):
-            # autoplay=True means as soon as the first chunk arrives, it starts speaking!
-            audio_output = gr.Audio(label="Live Audio Stream", autoplay=True)
-    # Add quick-click examples so your friends don't have to think of what to type
     gr.Examples(
         examples=[
             "Salom, men Behruzning raqamli egizagiman va men endi internetda yashayman!",
             "Axborot texnologiyalari sohasida qanday yangiliklar bor, kuzatib boryapsizmi?",
             "Voh, bu natijani umuman kutmagan edim! Qoyilmaqom ish bo'libdi."
         ],
-        inputs=text_input
     )
-    # Connect the button to the streaming function
-    generate_btn.click(fn=synthesize_voice_stream, inputs=text_input, outputs=audio_output)
 iface.launch()

 from TTS.utils.synthesizer import Synthesizer
 from huggingface_hub import hf_hub_download
+# 1. Maxfiy kalitni olish
 hf_token = os.environ.get("HF_TOKEN")
+# 2. Modelni maxfiy ombordan yuklab olish
 repo_id = "UpCoder/behruz-vits-v3-private"
 try:
+    print("Model fayllari yuklanmoqda...")
     model_path = hf_hub_download(repo_id=repo_id, filename="checkpoint_43000.pth", token=hf_token)
     config_path = hf_hub_download(repo_id=repo_id, filename="config.json", token=hf_token)
 except Exception as e:
+    print(f"Fayllarni yuklashda xatolik: {e}")
+# 3. Sun'iy intellektni ishga tushirish
+print("Sun'iy intellekt ishga tushmoqda...")
 synthesizer = Synthesizer(
     tts_checkpoint=model_path,
     tts_config_path=config_path,
     use_cuda=False
 )
+# VITS uchun standart chastota (22050 Hz)
 SAMPLE_RATE = 22050
 def split_into_sentences(text):
     sentences = re.split(r'(?<=[.!?]) +', text.strip())
     return [s for s in sentences if s.strip()]
+def synthesize_full_audio(text):
     if not text.strip():
         return None
+    # Xavfsizlik: Server qotib qolmasligi uchun matnni 2000 belgi bilan cheklaymiz
     if len(text) > 2000:
         text = text[:2000]
     sentences = split_into_sentences(text)
+    all_wavs = []
+    # Jumlalar orasida tabiiy nafas olish uchun 0.25 soniyalik sukut
+    silence = np.zeros(int(SAMPLE_RATE * 0.25))
+    for i, sentence in enumerate(sentences):
         try:
             wav = synthesizer.tts(sentence)
+            all_wavs.append(np.array(wav))
+            # Oxirgi jumladan tashqari hammadan keyin sukut qo'shamiz
+            if i < len(sentences) - 1:
+                all_wavs.append(silence)
         except Exception as e:
+            print(f"Jumlani o'qishda xatolik: {sentence}. Xato: {e}")
             continue
+    if not all_wavs:
+        return None
+    # Barcha audio parchalarni bitta butun faylga birlashtirish
+    final_wav = np.concatenate(all_wavs)
+    final_wav_int16 = (final_wav * 32767).astype(np.int16)
+    return (SAMPLE_RATE, final_wav_int16)
+# 4. Professional va O'zbekcha Interfeys (UI) yaratish
+with gr.Blocks(theme=gr.themes.Soft(primary_hue="emerald", secondary_hue="teal")) as iface:
     gr.Markdown(
         """
+        <div style="text-align: center;">
+            <h1>🎙️ Behruzning Raqamli Ovozli Kloni (V3)</h1>
+            <p><strong>Mening sun'iy intellekt ovoz generatorimga xush kelibsiz!</strong> Ushbu model o'zimning haqiqiy ovozim asosida neyrotarmoqlar yordamida o'qitildi.</p>
+        </div>
+        💡 **Foydali maslahat:** Katta matnlarni (masalan, butun bir xatboshini) bemalol kiritishingiz mumkin! Dastur uni avtomat ravishda jumlalarga bo'lib, xatosiz o'qib beradi va bitta tayyor audio fayl qilib taqdim etadi.
         """
     )
     with gr.Row():
         with gr.Column(scale=2):
             text_input = gr.Textbox(
+                label="O'zbekcha matnni bu yerga kiriting (Maksimum 2000 belgi)",
                 lines=6,
                 placeholder="Salom! Bugun havo juda ajoyib, shunday emasmi? Men internetda yashaydigan raqamli sun'iy intellektman..."
             )
+            generate_btn = gr.Button("🚀 Ovozga Aylantirish", variant="primary")
         with gr.Column(scale=1):
+            audio_output = gr.Audio(label="🎧 Tayyor Audio Fayl")
     gr.Examples(
         examples=[
             "Salom, men Behruzning raqamli egizagiman va men endi internetda yashayman!",
             "Axborot texnologiyalari sohasida qanday yangiliklar bor, kuzatib boryapsizmi?",
             "Voh, bu natijani umuman kutmagan edim! Qoyilmaqom ish bo'libdi."
         ],
+        inputs=text_input,
+        label="Namuna jumlalar (birini tanlang)"
     )
+    generate_btn.click(fn=synthesize_full_audio, inputs=text_input, outputs=audio_output)
 iface.launch()