Spaces:
Sleeping
Sleeping
| """ | |
| UI Components for F5-TTS Thai WebUI | |
| จัดการ Gradio UI components และการสร้าง interface | |
| """ | |
| import gradio as gr | |
| from f5_tts.config import ( | |
| MODEL_CHOICES, | |
| DEFAULT_TTS_SETTINGS, | |
| EXAMPLES, | |
| TIPS_TEXT, | |
| MULTISPEECH_EXAMPLE_TEXT, | |
| MULTISPEECH_PLACEHOLDER, | |
| WHISPER_MODELS, | |
| WHISPER_COMPUTE_TYPES, | |
| WHISPER_LANGUAGES, | |
| MAX_SPEECH_TYPES, | |
| MAX_SEGMENTS | |
| ) | |
| class UIComponents: | |
| """จัดการ Gradio UI Components""" | |
| def __init__(self): | |
| self.speech_type_count = 1 | |
| self.setup_speech_type_components() | |
| def setup_speech_type_components(self): | |
| """สร้าง speech type components""" | |
| self.speech_type_rows = [] | |
| self.speech_type_names = [] | |
| self.speech_type_audios = [] | |
| self.speech_type_ref_texts = [] | |
| self.speech_type_delete_btns = [] | |
| self.speech_type_insert_btns = [] | |
| def create_model_selection_section(self): | |
| """สร้างส่วนเลือกโมเดล""" | |
| with gr.Row(): | |
| model_select = gr.Radio( | |
| label="โมเดล", | |
| choices=MODEL_CHOICES, | |
| value="Default", | |
| interactive=True, | |
| ) | |
| model_custom = gr.Textbox( | |
| label="ตำแหน่งโมเดลแบบกำหนดเอง", | |
| value="hf://VIZINTZOR/F5-TTS-THAI/model_650000.pt", | |
| visible=False, | |
| interactive=True | |
| ) | |
| model_status = gr.Textbox(label="สถานะโมเดล", value="") | |
| load_custom_btn = gr.Button("โหลด", variant="primary") | |
| return model_select, model_custom, model_status, load_custom_btn | |
| def create_tts_tab(self, infer_tts_fn=None): | |
| """สร้าง Text To Speech tab""" | |
| with gr.Row(): | |
| with gr.Column(): | |
| ref_text = gr.Textbox( | |
| label="ข้อความต้นฉบับ", | |
| lines=1, | |
| info="แนะนำให้ใช้เสียงที่มีความยาวไม่เกิน 5-10 วินาที" | |
| ) | |
| ref_audio = gr.Audio(label="เสียงต้นฉบับ", type="filepath") | |
| gen_text = gr.Textbox(label="ข้อความที่จะสร้าง", lines=4) | |
| generate_btn = gr.Button("สร้าง", variant="primary") | |
| with gr.Accordion(label="ตั้งค่า"): | |
| remove_silence = gr.Checkbox( | |
| label="Remove Silence", | |
| value=DEFAULT_TTS_SETTINGS["remove_silence"] | |
| ) | |
| speed = gr.Slider( | |
| label="ความเร็ว", | |
| value=DEFAULT_TTS_SETTINGS["speed"], | |
| minimum=0.3, maximum=1.5, step=0.1 | |
| ) | |
| cross_fade_duration = gr.Slider( | |
| label="Cross Fade Duration", | |
| value=DEFAULT_TTS_SETTINGS["cross_fade_duration"], | |
| minimum=0, maximum=1, step=0.05 | |
| ) | |
| nfe_step = gr.Slider( | |
| label="NFE Step", | |
| value=DEFAULT_TTS_SETTINGS["nfe_step"], | |
| minimum=7, maximum=64, step=1, | |
| info="ยิ่งค่ามากยิ่งมีคุณภาพสูง แต่อาจจะช้าลง" | |
| ) | |
| cfg_strength = gr.Slider( | |
| label="CFG Strength", | |
| value=DEFAULT_TTS_SETTINGS["cfg_strength"], | |
| minimum=1, maximum=4, step=0.1 | |
| ) | |
| max_chars = gr.Number( | |
| label="ตัวอักษรสูงสุดต่อส่วน", | |
| minimum=50, maximum=1000, | |
| value=DEFAULT_TTS_SETTINGS["max_chars"], | |
| info="จำนวนตัวอักษรสูงสุดที่ใช้ในการแบ่งส่วน สำหรับข้อความยาวๆ" | |
| ) | |
| seed = gr.Number( | |
| label="Seed", | |
| value=DEFAULT_TTS_SETTINGS["seed"], | |
| precision=0, | |
| info="-1 = สุ่ม Seed" | |
| ) | |
| no_ref_audio = gr.Checkbox( | |
| label="เสียงดั้งเดิม", | |
| value=DEFAULT_TTS_SETTINGS["no_ref_audio"], | |
| info="ใช้เสียงที่ไม่ผ่านการโคลนเสียงจากโมเดล" | |
| ) | |
| with gr.Column(): | |
| output_audio = gr.Audio(label="เสียงที่สร้าง", type="filepath") | |
| seed_output = gr.Textbox(label="Seed", interactive=False) | |
| output_spectrogram = gr.Image(label="Spectrogram") | |
| # Examples | |
| examples = gr.Examples( | |
| examples=EXAMPLES, | |
| inputs=[ref_audio, ref_text, gen_text], | |
| fn=infer_tts_fn, | |
| outputs=[output_audio, output_spectrogram, ref_text, seed_output], | |
| cache_examples=False, | |
| label="ตัวอย่าง" | |
| ) | |
| # Tips | |
| tips = gr.Markdown("# คำแนะนำ") | |
| tips_content = gr.Markdown(TIPS_TEXT) | |
| return { | |
| 'inputs': { | |
| 'ref_audio': ref_audio, | |
| 'ref_text': ref_text, | |
| 'gen_text': gen_text, | |
| 'remove_silence': remove_silence, | |
| 'cross_fade_duration': cross_fade_duration, | |
| 'nfe_step': nfe_step, | |
| 'speed': speed, | |
| 'cfg_strength': cfg_strength, | |
| 'max_chars': max_chars, | |
| 'seed': seed, | |
| 'no_ref_audio': no_ref_audio | |
| }, | |
| 'outputs': { | |
| 'output_audio': output_audio, | |
| 'seed_output': seed_output, | |
| 'spectrogram': output_spectrogram | |
| }, | |
| 'controls': { | |
| 'generate_btn': generate_btn, | |
| 'examples': examples | |
| } | |
| } | |
| def create_multispeech_tab(self): | |
| """สร้าง Multi Speech tab""" | |
| gr.Markdown(MULTISPEECH_EXAMPLE_TEXT) | |
| gr.Markdown("""อัปโหลดคลิปเสียงที่แตกต่างกันสำหรับแต่ละประเภทคำพูด โดยประเภทคำพูดแรกเป็นประเภทที่จำเป็นต้องมี คุณสามารถเพิ่มประเภทคำพูดเพิ่มเติมได้โดยคลิกปุ่ม "เพิ่มประเภทคำพูด".""") | |
| # Regular speech type (mandatory) | |
| with gr.Row() as regular_row: | |
| with gr.Column(): | |
| regular_name = gr.Textbox(value="ปกติ", label="ลักษณะอารมณ์/ชื่อผู้พูด") | |
| regular_insert = gr.Button("เพิ่มตัวกำกับ", variant="secondary") | |
| regular_audio = gr.Audio(label="เสียงต้นแบบ", type="filepath") | |
| regular_ref_text = gr.Textbox(label="ข้อความต้นฉบับ", lines=2) | |
| # Initialize lists | |
| speech_type_rows = [regular_row] | |
| speech_type_names = [regular_name] | |
| speech_type_audios = [regular_audio] | |
| speech_type_ref_texts = [regular_ref_text] | |
| speech_type_delete_btns = [None] | |
| speech_type_insert_btns = [regular_insert] | |
| # Additional speech types | |
| for i in range(MAX_SPEECH_TYPES - 1): | |
| with gr.Row(visible=False) as row: | |
| with gr.Column(): | |
| name_input = gr.Textbox(label="ลักษณะอารมณ์/ชื่อผู้พูด") | |
| delete_btn = gr.Button("ลบ", variant="secondary") | |
| insert_btn = gr.Button("เพิ่มตัวกำกับ", variant="secondary") | |
| audio_input = gr.Audio(label="เสียงตัวอย่าง", type="filepath") | |
| ref_text_input = gr.Textbox(label="ข้อความต้นฉบับ", lines=2) | |
| speech_type_rows.append(row) | |
| speech_type_names.append(name_input) | |
| speech_type_audios.append(audio_input) | |
| speech_type_ref_texts.append(ref_text_input) | |
| speech_type_delete_btns.append(delete_btn) | |
| speech_type_insert_btns.append(insert_btn) | |
| add_speech_type_btn = gr.Button("เพิ่มประเภทคำพูด", variant="secondary") | |
| # Text input | |
| gen_text_input_multistyle = gr.Textbox( | |
| label="ข้อความ", | |
| lines=10, | |
| placeholder=MULTISPEECH_PLACEHOLDER, | |
| ) | |
| # Settings | |
| with gr.Accordion("ตั้งค่า", open=False): | |
| remove_silence_multistyle = gr.Checkbox(label="Remove Silences", value=True) | |
| ms_cross_fade_duration = gr.Slider(label="Cross Fade Duration", value="0.15", minimum=0, maximum=1, step=0.05) | |
| ms_nfe_step = gr.Slider(label="NFE Step", value=32, minimum=16, maximum=64, step=8, info="ยิ่งค่ามากยิ่งมีคุณภาพสูง แต่จะช้าลง") | |
| # Generate button | |
| generate_multistyle_btn = gr.Button("สร้าง", variant="primary") | |
| # Output | |
| audio_output_multistyle = gr.Audio(label="เสียงที่สร้าง") | |
| download_btn_multistyle = gr.DownloadButton(label="ดาวน์โหลด", value=None, variant="secondary") | |
| # State components | |
| segments_state = gr.State([]) | |
| sr_state = gr.State(24000) | |
| # Segment editing components | |
| with gr.Accordion("ปรับแต่ง", open=False): | |
| segment_players = [] | |
| segment_text_inputs = [] | |
| segment_silence_inputs = [] | |
| segment_regen_btns = [] | |
| for i in range(MAX_SEGMENTS): | |
| player = gr.Audio(label=f"เสียง Segment {i+1}", visible=False) | |
| text_input = gr.Textbox(label=f"แก้ไขข้อความ Segment {i+1}", visible=False) | |
| silence_input = gr.Number(label=f"เพิ่มเสียงเงียบ (ms) Segment {i+1}", value=0, visible=False) | |
| regen_btn = gr.Button(f"Regenerate Segment {i+1}", visible=False, variant="secondary") | |
| segment_players.append(player) | |
| segment_text_inputs.append(text_input) | |
| segment_silence_inputs.append(silence_input) | |
| segment_regen_btns.append(regen_btn) | |
| update_silence_btn = gr.Button("อัปเดต Silence ทั้งหมด", variant="secondary") | |
| # Store components for access | |
| self.speech_type_rows = speech_type_rows | |
| self.speech_type_names = speech_type_names | |
| self.speech_type_audios = speech_type_audios | |
| self.speech_type_ref_texts = speech_type_ref_texts | |
| self.speech_type_delete_btns = speech_type_delete_btns | |
| self.speech_type_insert_btns = speech_type_insert_btns | |
| return { | |
| 'inputs': { | |
| 'gen_text': gen_text_input_multistyle, | |
| 'cross_fade_duration': ms_cross_fade_duration, | |
| 'nfe_step': ms_nfe_step, | |
| 'remove_silence': remove_silence_multistyle, | |
| 'speech_type_names': speech_type_names, | |
| 'speech_type_audios': speech_type_audios, | |
| 'speech_type_ref_texts': speech_type_ref_texts, | |
| 'segment_silence_inputs': segment_silence_inputs, | |
| 'segment_text_inputs': segment_text_inputs | |
| }, | |
| 'outputs': { | |
| 'audio_output': audio_output_multistyle, | |
| 'download_btn': download_btn_multistyle, | |
| 'segment_players': segment_players, | |
| 'segment_text_inputs': segment_text_inputs, | |
| 'segment_silence_inputs': segment_silence_inputs, | |
| 'segment_regen_btns': segment_regen_btns | |
| }, | |
| 'controls': { | |
| 'add_speech_type_btn': add_speech_type_btn, | |
| 'generate_btn': generate_multistyle_btn, | |
| 'update_silence_btn': update_silence_btn, | |
| 'speech_type_rows': speech_type_rows | |
| }, | |
| 'state': { | |
| 'segments_state': segments_state, | |
| 'sr_state': sr_state | |
| } | |
| } | |
| def create_stt_tab(self): | |
| """สร้าง Speech to Text tab""" | |
| gr.Markdown("เปลี่ยนเสียงพูดเป็นข้อความด้วย โมเดล [Whisper](https://github.com/openai/whisper) โดยใช้ [faster-whisper](https://github.com/SYSTRAN/faster-whisper)") | |
| with gr.Row(): | |
| with gr.Column(): | |
| ref_audio_input = gr.Audio(label="เสียงต้นฉบับ", type="filepath") | |
| is_translate = gr.Checkbox(label="แปลภาษา") | |
| generate_btn_stt = gr.Button("ถอดข้อความ", variant="primary") | |
| with gr.Accordion(label="ตั้งค่า", open=False): | |
| model_wp = gr.Dropdown( | |
| label="Model", | |
| choices=WHISPER_MODELS, | |
| value="large-v2" | |
| ) | |
| compute_type = gr.Dropdown( | |
| label="Compute Type", | |
| choices=WHISPER_COMPUTE_TYPES, | |
| value="float16" | |
| ) | |
| source_lg = gr.Dropdown( | |
| label="ภาษาต้นฉบับ", | |
| choices=WHISPER_LANGUAGES["source"], | |
| value="Auto" | |
| ) | |
| target_lg = gr.Dropdown( | |
| label="ภาษาที่แปล", | |
| choices=WHISPER_LANGUAGES["target"], | |
| value="th" | |
| ) | |
| with gr.Column(): | |
| output_ref_text = gr.Textbox( | |
| label="ข้อความต้นฉบับ", | |
| lines=3, | |
| show_copy_button=True | |
| ) | |
| return { | |
| 'inputs': { | |
| 'ref_audio_input': ref_audio_input, | |
| 'is_translate': is_translate, | |
| 'model_wp': model_wp, | |
| 'compute_type': compute_type, | |
| 'source_lg': source_lg, | |
| 'target_lg': target_lg | |
| }, | |
| 'outputs': { | |
| 'output_ref_text': output_ref_text | |
| }, | |
| 'controls': { | |
| 'generate_btn_stt': generate_btn_stt | |
| } | |
| } | |
| def add_speech_type_fn(self): | |
| """เพิ่ม speech type""" | |
| row_updates = [gr.update() for _ in range(MAX_SPEECH_TYPES)] | |
| if self.speech_type_count < MAX_SPEECH_TYPES: | |
| row_updates[self.speech_type_count] = gr.update(visible=True) | |
| self.speech_type_count += 1 | |
| else: | |
| gr.Warning("ครบจำนวนสูงสุดของประเภทคำพูดแล้ว กรุณาเริ่มแอปใหม่") | |
| return row_updates | |
| def delete_speech_type_fn(): | |
| """ลบ speech type""" | |
| return gr.update(visible=False), None, None, None | |
| def make_insert_speech_type_fn(index): | |
| """สร้างฟังก์ชันเพิ่มตัวกำกับ speech type""" | |
| def insert_speech_type_fn(current_text, speech_type_name): | |
| current_text = current_text or "" | |
| speech_type_name = speech_type_name or "None" | |
| updated_text = current_text + f"{{{speech_type_name}}} " | |
| return updated_text | |
| return insert_speech_type_fn |