UpCoder commited on
Commit
01a6ec3
Β·
verified Β·
1 Parent(s): 742fa67

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -32
app.py CHANGED
@@ -5,95 +5,105 @@ import numpy as np
5
  from TTS.utils.synthesizer import Synthesizer
6
  from huggingface_hub import hf_hub_download
7
 
8
- # 1. Grab the secret key you hid in the settings
9
  hf_token = os.environ.get("HF_TOKEN")
10
 
11
- # 2. Quietly download the brain from your private vault
12
  repo_id = "UpCoder/behruz-vits-v3-private"
13
 
14
  try:
15
- print("Downloading model files...")
16
  model_path = hf_hub_download(repo_id=repo_id, filename="checkpoint_43000.pth", token=hf_token)
17
  config_path = hf_hub_download(repo_id=repo_id, filename="config.json", token=hf_token)
18
  except Exception as e:
19
- print(f"Error downloading files: {e}")
20
 
21
- # 3. Load the AI
22
- print("Loading AI Model...")
23
  synthesizer = Synthesizer(
24
  tts_checkpoint=model_path,
25
  tts_config_path=config_path,
26
  use_cuda=False
27
  )
28
 
29
- # VITS models typically run at a 22050 Hz sample rate
30
  SAMPLE_RATE = 22050
31
 
32
  def split_into_sentences(text):
33
- # This regex smartly splits paragraphs by punctuation (. ! ?) but keeps the words intact
34
  sentences = re.split(r'(?<=[.!?]) +', text.strip())
35
  return [s for s in sentences if s.strip()]
36
 
37
- def synthesize_voice_stream(text):
38
  if not text.strip():
39
  return None
40
 
41
- # Safety feature: Hard limit of 2000 characters so users don't paste an entire Harry Potter book and crash your free server!
42
  if len(text) > 2000:
43
  text = text[:2000]
44
 
45
  sentences = split_into_sentences(text)
 
46
 
47
- for sentence in sentences:
 
 
 
48
  try:
49
- # Generate the raw audio math for just this one sentence
50
  wav = synthesizer.tts(sentence)
 
51
 
52
- # Convert the raw math into a standard audio waveform array
53
- wav_array = np.array(wav)
54
- wav_int16 = (wav_array * 32767).astype(np.int16)
55
-
56
- # YIELD instead of RETURN. This streams the audio chunk straight to the user's speakers instantly!
57
- yield (SAMPLE_RATE, wav_int16)
58
  except Exception as e:
59
- print(f"Failed to synthesize sentence: {sentence}. Error: {e}")
60
  continue
 
 
 
 
 
 
 
 
 
61
 
62
- # 4. Build the Professional UI layout
63
- with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue")) as iface:
64
  gr.Markdown(
65
  """
66
- # πŸŽ™οΈ Behruz's Digital Voice Clone (V3)
67
- Welcome to my AI voice generator! This model was trained locally on my real voice using deep learning.
 
 
68
 
69
- πŸ’‘ **Pro Tip:** You can paste a whole paragraph! The AI will smartly split it into sentences and stream the audio to you in real-time without crashing.
70
  """
71
  )
72
 
73
  with gr.Row():
74
  with gr.Column(scale=2):
75
  text_input = gr.Textbox(
76
- label="Enter Uzbek Text Here (Max 2000 chars)",
77
  lines=6,
78
  placeholder="Salom! Bugun havo juda ajoyib, shunday emasmi? Men internetda yashaydigan raqamli sun'iy intellektman..."
79
  )
80
- generate_btn = gr.Button("πŸš€ Generate Audio Stream", variant="primary")
81
 
82
  with gr.Column(scale=1):
83
- # autoplay=True means as soon as the first chunk arrives, it starts speaking!
84
- audio_output = gr.Audio(label="Live Audio Stream", autoplay=True)
85
 
86
- # Add quick-click examples so your friends don't have to think of what to type
87
  gr.Examples(
88
  examples=[
89
  "Salom, men Behruzning raqamli egizagiman va men endi internetda yashayman!",
90
  "Axborot texnologiyalari sohasida qanday yangiliklar bor, kuzatib boryapsizmi?",
91
  "Voh, bu natijani umuman kutmagan edim! Qoyilmaqom ish bo'libdi."
92
  ],
93
- inputs=text_input
 
94
  )
95
 
96
- # Connect the button to the streaming function
97
- generate_btn.click(fn=synthesize_voice_stream, inputs=text_input, outputs=audio_output)
98
 
99
  iface.launch()
 
5
  from TTS.utils.synthesizer import Synthesizer
6
  from huggingface_hub import hf_hub_download
7
 
8
+ # 1. Maxfiy kalitni olish
9
  hf_token = os.environ.get("HF_TOKEN")
10
 
11
+ # 2. Modelni maxfiy ombordan yuklab olish
12
  repo_id = "UpCoder/behruz-vits-v3-private"
13
 
14
  try:
15
+ print("Model fayllari yuklanmoqda...")
16
  model_path = hf_hub_download(repo_id=repo_id, filename="checkpoint_43000.pth", token=hf_token)
17
  config_path = hf_hub_download(repo_id=repo_id, filename="config.json", token=hf_token)
18
  except Exception as e:
19
+ print(f"Fayllarni yuklashda xatolik: {e}")
20
 
21
+ # 3. Sun'iy intellektni ishga tushirish
22
+ print("Sun'iy intellekt ishga tushmoqda...")
23
  synthesizer = Synthesizer(
24
  tts_checkpoint=model_path,
25
  tts_config_path=config_path,
26
  use_cuda=False
27
  )
28
 
29
+ # VITS uchun standart chastota (22050 Hz)
30
  SAMPLE_RATE = 22050
31
 
32
  def split_into_sentences(text):
 
33
  sentences = re.split(r'(?<=[.!?]) +', text.strip())
34
  return [s for s in sentences if s.strip()]
35
 
36
+ def synthesize_full_audio(text):
37
  if not text.strip():
38
  return None
39
 
40
+ # Xavfsizlik: Server qotib qolmasligi uchun matnni 2000 belgi bilan cheklaymiz
41
  if len(text) > 2000:
42
  text = text[:2000]
43
 
44
  sentences = split_into_sentences(text)
45
+ all_wavs = []
46
 
47
+ # Jumlalar orasida tabiiy nafas olish uchun 0.25 soniyalik sukut
48
+ silence = np.zeros(int(SAMPLE_RATE * 0.25))
49
+
50
+ for i, sentence in enumerate(sentences):
51
  try:
 
52
  wav = synthesizer.tts(sentence)
53
+ all_wavs.append(np.array(wav))
54
 
55
+ # Oxirgi jumladan tashqari hammadan keyin sukut qo'shamiz
56
+ if i < len(sentences) - 1:
57
+ all_wavs.append(silence)
58
+
 
 
59
  except Exception as e:
60
+ print(f"Jumlani o'qishda xatolik: {sentence}. Xato: {e}")
61
  continue
62
+
63
+ if not all_wavs:
64
+ return None
65
+
66
+ # Barcha audio parchalarni bitta butun faylga birlashtirish
67
+ final_wav = np.concatenate(all_wavs)
68
+ final_wav_int16 = (final_wav * 32767).astype(np.int16)
69
+
70
+ return (SAMPLE_RATE, final_wav_int16)
71
 
72
+ # 4. Professional va O'zbekcha Interfeys (UI) yaratish
73
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="emerald", secondary_hue="teal")) as iface:
74
  gr.Markdown(
75
  """
76
+ <div style="text-align: center;">
77
+ <h1>πŸŽ™οΈ Behruzning Raqamli Ovozli Kloni (V3)</h1>
78
+ <p><strong>Mening sun'iy intellekt ovoz generatorimga xush kelibsiz!</strong> Ushbu model o'zimning haqiqiy ovozim asosida neyrotarmoqlar yordamida o'qitildi.</p>
79
+ </div>
80
 
81
+ πŸ’‘ **Foydali maslahat:** Katta matnlarni (masalan, butun bir xatboshini) bemalol kiritishingiz mumkin! Dastur uni avtomat ravishda jumlalarga bo'lib, xatosiz o'qib beradi va bitta tayyor audio fayl qilib taqdim etadi.
82
  """
83
  )
84
 
85
  with gr.Row():
86
  with gr.Column(scale=2):
87
  text_input = gr.Textbox(
88
+ label="O'zbekcha matnni bu yerga kiriting (Maksimum 2000 belgi)",
89
  lines=6,
90
  placeholder="Salom! Bugun havo juda ajoyib, shunday emasmi? Men internetda yashaydigan raqamli sun'iy intellektman..."
91
  )
92
+ generate_btn = gr.Button("πŸš€ Ovozga Aylantirish", variant="primary")
93
 
94
  with gr.Column(scale=1):
95
+ audio_output = gr.Audio(label="🎧 Tayyor Audio Fayl")
 
96
 
 
97
  gr.Examples(
98
  examples=[
99
  "Salom, men Behruzning raqamli egizagiman va men endi internetda yashayman!",
100
  "Axborot texnologiyalari sohasida qanday yangiliklar bor, kuzatib boryapsizmi?",
101
  "Voh, bu natijani umuman kutmagan edim! Qoyilmaqom ish bo'libdi."
102
  ],
103
+ inputs=text_input,
104
+ label="Namuna jumlalar (birini tanlang)"
105
  )
106
 
107
+ generate_btn.click(fn=synthesize_full_audio, inputs=text_input, outputs=audio_output)
 
108
 
109
  iface.launch()