Rezfars commited on
Commit
2a11e16
·
verified ·
1 Parent(s): 6b4ac8f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -73
app.py CHANGED
@@ -1,93 +1,75 @@
1
- import os
2
  import tempfile
3
- import numpy as np
4
- from TTS.utils.download import download_url
5
- from TTS.utils.synthesizer import Synthesizer
6
  from pydub import AudioSegment
 
7
  import gradio as gr
8
 
9
- # فقط دو مدل با کیفیت بالا
10
- MODEL_INFO = {
11
- "VITS Male": ["best_model_65633.pth", "config-0.json", "https://huggingface.co/Kamtera/persian-tts-male-vits/resolve/main/"],
12
- "VITS Female (Best)": ["checkpoint_48000.pth", "config-2.json", "https://huggingface.co/Kamtera/persian-tts-female-vits/resolve/main/"]
 
 
13
  }
14
 
15
- MAX_TXT_LEN = 800
16
- synthesizers = {} # برای lazy loading
17
-
18
- # دانلود مدل‌ها اگر موجود نیستند
19
- for model_name, (model_file, config_file, url) in MODEL_INFO.items():
20
- if not os.path.exists(model_name):
21
- os.makedirs(model_name)
22
- download_url(url + model_file, model_name, "best_model.pth")
23
- download_url(url + config_file, model_name, "config.json")
24
-
25
- # تابع lazy load
26
- def get_synthesizer(model_name):
27
- if model_name not in synthesizers:
28
- synthesizers[model_name] = Synthesizer(
29
- model_name + "/best_model.pth",
30
- model_name + "/config.json"
31
- )
32
- return synthesizers[model_name]
33
-
34
- # تبدیل numpy به AudioSegment
35
- def numpy_to_audiosegment(wav: np.ndarray, sample_rate: int):
36
- if wav.dtype != np.float32:
37
- wav = wav.astype(np.float32) / np.max(np.abs(wav))
38
- audio_int16 = (wav * 32767).astype(np.int16)
39
- return AudioSegment(
40
- audio_int16.tobytes(),
41
- frame_rate=sample_rate,
42
- sample_width=2,
43
- channels=1
44
- )
45
-
46
- # تابع TTS دیالوگ
47
- def tts_dialogue(texts: str):
48
- lines = texts.strip().split("\n")
49
  audio_segments = []
50
 
51
  for line in lines:
52
  if ':' not in line:
53
  continue
54
  speaker, text = line.split(":", 1)
55
- text = text.strip()[:MAX_TXT_LEN]
56
 
57
- # انتخاب مدل بر اساس گوینده
58
- if "مرد" in speaker:
59
- model_name = "VITS Male"
60
  else:
61
- model_name = "VITS Female (Best)"
62
 
63
- synthesizer = get_synthesizer(model_name)
64
- wav, sr = synthesizer.tts(text)
65
- segment = numpy_to_audiosegment(wav, sr)
66
- audio_segments.append(segment)
67
 
68
- if not audio_segments:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  return None
70
 
71
- # چسباندن با crossfade کوتاه
72
- final_audio = audio_segments[0]
73
- for segment in audio_segments[1:]:
74
- final_audio = final_audio.append(segment, crossfade=50)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
- # ذخیره نهایی
77
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
78
- final_audio.export(fp.name, format="wav")
79
- return fp.name
80
 
81
- # Gradio interface
82
- iface = gr.Interface(
83
- fn=tts_dialogue,
84
- inputs=gr.Textbox(
85
- label="Enter Dialogue (use 'مرد:' and 'زن:' as prefixes)",
86
- lines=10,
87
- placeholder="مرد: سلام\nزن: سلام، خوبی؟"
88
- ),
89
- outputs=gr.Audio(label="Generated Dialogue", type='filepath'),
90
- title="🗣️ Persian TTS Dialogue 🗣️",
91
- description="Convert a Persian dialogue between two speakers into speech.",
92
- )
93
- iface.launch(share=False)
 
 
1
  import tempfile
2
+ import asyncio
 
 
3
  from pydub import AudioSegment
4
+ import edge_tts
5
  import gradio as gr
6
 
7
+ # بهترین مدل های TTS فارسی (با کیفیت بالا)
8
+ language_dict = {
9
+ "Persian": {
10
+ "Dilara (Female)": "fa-IR-DilaraNeural", # بهترین مدل زنانه
11
+ "Farid (Male)": "fa-IR-FaridNeural" # بهترین مدل مردانه
12
+ }
13
  }
14
 
15
+ # تابع async برای تولید گفتار
16
+ async def tts_dialogue_persian(dialogue_text):
17
+ lines = dialogue_text.strip().split("\n")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  audio_segments = []
19
 
20
  for line in lines:
21
  if ':' not in line:
22
  continue
23
  speaker, text = line.split(":", 1)
24
+ text = text.strip()
25
 
26
+ # انتخاب بهترین مدل براساس پیشوند
27
+ if "زن" in speaker:
28
+ voice = language_dict["Persian"]["Dilara (Female)"]
29
  else:
30
+ voice = language_dict["Persian"]["Farid (Male)"]
31
 
32
+ communicate = edge_tts.Communicate(text, voice)
 
 
 
33
 
34
+ # ذخیره موقت و تبدیل به AudioSegment
35
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
36
+ tmp_path = tmp_file.name
37
+ await communicate.save(tmp_path)
38
+ segment = AudioSegment.from_file(tmp_path)
39
+ audio_segments.append(segment)
40
+
41
+ # ترکیب تمام قطعات صوتی
42
+ if audio_segments:
43
+ final_audio = sum(audio_segments)
44
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
45
+ final_path = tmp_file.name
46
+ final_audio.export(final_path, format="mp3")
47
+ return final_path
48
+ else:
49
  return None
50
 
51
+ # Wrapper برای استفاده در Gradio
52
+ def tts_dialogue_wrapper(dialogue_text):
53
+ return asyncio.run(tts_dialogue_persian(dialogue_text))
54
+
55
+ # رابط Gradio
56
+ with gr.Blocks(title="Persian TTS Dialogue") as demo:
57
+ gr.HTML("<center><h1>Persian TTS Dialogue (Edge TTS)</h1></center>")
58
+ gr.Markdown("Use 'زن:' and 'مرد:' as prefixes for lines to select voice.")
59
+
60
+ with gr.Row():
61
+ with gr.Column():
62
+ input_text = gr.Textbox(
63
+ lines=10,
64
+ label="Input Dialogue",
65
+ placeholder="مرد: سلام\nزن: سلام، خوبی؟"
66
+ )
67
+ run_btn = gr.Button(value="Generate Audio", variant="primary")
68
+
69
+ with gr.Column():
70
+ output_audio = gr.Audio(type="filepath", label="Generated Dialogue")
71
 
72
+ run_btn.click(tts_dialogue_wrapper, inputs=[input_text], outputs=[output_audio])
 
 
 
73
 
74
+ if __name__ == "__main__":
75
+ demo.queue().launch(share=True)