""" F5-TTS Thai WebUI - Refactored Version เวอร์ชันที่ปรับปรุงโครงสร้างใหม่ให้มีระเบียบและง่ายต่อการดูแลรักษา """ import argparse import sys import os import gradio as gr # Add the src directory to Python path for imports current_dir = os.path.dirname(os.path.abspath(__file__)) src_dir = os.path.dirname(current_dir) if src_dir not in sys.path: sys.path.insert(0, src_dir) from f5_tts.model_manager import ModelManager from f5_tts.tts_processor import TTSProcessor, SpeechToTextProcessor from f5_tts.multi_speech_processor import MultiSpeechProcessor from f5_tts.ui_components import UIComponents from f5_tts.config import MAX_SPEECH_TYPES class F5TTSWebUI: """หลัก Web UI Application สำหรับ F5-TTS Thai""" def __init__(self): self.model_manager = ModelManager() self.tts_processor = TTSProcessor(self.model_manager) self.stt_processor = SpeechToTextProcessor() self.multi_speech_processor = MultiSpeechProcessor(self.model_manager) self.ui_components = UIComponents() def create_gradio_interface(self): """สร้าง Gradio interface""" with gr.Blocks(title="F5-TTS ไทย", theme=gr.themes.Ocean()) as demo: gr.Markdown("# F5-TTS ภาษาไทย") gr.Markdown("สร้างคำพูดจากข้อความ ด้วย Zero-shot TTS หรือ เสียงต้นฉบับ ภาษาไทย.") # Model selection section model_select, model_custom, model_status, load_custom_btn = self.ui_components.create_model_selection_section() # Setup model selection events self._setup_model_selection_events( model_select, model_custom, model_status, load_custom_btn ) # Create tabs #with gr.Tab(label="Text To Speech"): # self._create_tts_tab() with gr.Tab(label="Multi Speech"): self._create_multispeech_tab() #with gr.Tab(label="Speech to Text"): # self._create_stt_tab() return demo def _setup_model_selection_events(self, model_select, model_custom, model_status, load_custom_btn): """ตั้งค่า events สำหรับการเลือกโมเดล""" # Model selection change event model_select.change( fn=self.model_manager.update_custom_model_visibility, inputs=model_select, outputs=model_custom ) # Load custom model button load_custom_btn.click( fn=self.model_manager.load_model_by_choice, inputs=[model_select, model_custom], outputs=model_status ) def _create_tts_tab(self): """สร้าง Text To Speech tab""" tts_components = self.ui_components.create_tts_tab(self.tts_processor.infer_tts) # Setup TTS generation tts_components['controls']['generate_btn'].click( fn=self.tts_processor.infer_tts, inputs=[ tts_components['inputs']['ref_audio'], tts_components['inputs']['ref_text'], tts_components['inputs']['gen_text'], tts_components['inputs']['remove_silence'], tts_components['inputs']['cross_fade_duration'], tts_components['inputs']['nfe_step'], tts_components['inputs']['speed'], tts_components['inputs']['cfg_strength'], tts_components['inputs']['max_chars'], tts_components['inputs']['seed'], tts_components['inputs']['no_ref_audio'] ], outputs=[ tts_components['outputs']['output_audio'], tts_components['outputs']['spectrogram'], tts_components['inputs']['ref_text'], tts_components['outputs']['seed_output'] ] ) def _create_multispeech_tab(self): """สร้าง Multi Speech tab""" ms_components = self.ui_components.create_multispeech_tab() # Setup speech type management self._setup_speech_type_events(ms_components) # Setup multispeech generation self._setup_multispeech_generation(ms_components) # Setup segment editing self._setup_segment_editing(ms_components) def _setup_speech_type_events(self, ms_components): """ตั้งค่า events สำหรับ speech type management""" # Add speech type button ms_components['controls']['add_speech_type_btn'].click( fn=self.ui_components.add_speech_type_fn, outputs=ms_components['controls']['speech_type_rows'] ) # Delete speech type buttons for i in range(1, len(self.ui_components.speech_type_delete_btns)): if self.ui_components.speech_type_delete_btns[i] is not None: self.ui_components.speech_type_delete_btns[i].click( fn=self.ui_components.delete_speech_type_fn, outputs=[ self.ui_components.speech_type_rows[i], self.ui_components.speech_type_names[i], self.ui_components.speech_type_audios[i], self.ui_components.speech_type_ref_texts[i] ] ) # Insert speech type buttons for i, insert_btn in enumerate(self.ui_components.speech_type_insert_btns): insert_fn = self.ui_components.make_insert_speech_type_fn(i) insert_btn.click( fn=insert_fn, inputs=[ms_components['inputs']['gen_text'], self.ui_components.speech_type_names[i]], outputs=ms_components['inputs']['gen_text'] ) # Validation for generate button ms_components['inputs']['gen_text'].change( fn=self.multi_speech_processor.validate_speech_types, inputs=[ms_components['inputs']['gen_text']] + ms_components['inputs']['speech_type_names'], outputs=ms_components['controls']['generate_btn'] ) def _setup_multispeech_generation(self, ms_components): """ตั้งค่า multispeech generation""" # Prepare inputs for generation generation_inputs = [ ms_components['inputs']['gen_text'], ms_components['inputs']['cross_fade_duration'], ms_components['inputs']['nfe_step'] ] + ( ms_components['inputs']['speech_type_names'] + ms_components['inputs']['speech_type_audios'] + ms_components['inputs']['speech_type_ref_texts'] + [ms_components['inputs']['remove_silence']] + ms_components['inputs']['segment_silence_inputs'] ) # Prepare outputs for generation generation_outputs = [ ms_components['outputs']['audio_output'], ms_components['outputs']['download_btn'] ] + ( ms_components['outputs']['segment_players'] + ms_components['outputs']['segment_text_inputs'] + ms_components['outputs']['segment_silence_inputs'] + ms_components['outputs']['segment_regen_btns'] + [ms_components['state']['segments_state'], ms_components['state']['sr_state']] ) # Generate button click ms_components['controls']['generate_btn'].click( fn=self._wrap_multispeech_generation, inputs=generation_inputs, outputs=generation_outputs ) def _wrap_multispeech_generation(self, gen_text, cross_fade_duration, nfe_step, *args): """Wrapper สำหรับ multispeech generation""" speech_types_data = args[:MAX_SPEECH_TYPES * 3] remove_silence = args[MAX_SPEECH_TYPES * 3] silence_inputs = args[MAX_SPEECH_TYPES * 3 + 1:] return self.multi_speech_processor.generate_multistyle_speech( gen_text, cross_fade_duration, nfe_step, speech_types_data, remove_silence, silence_inputs ) def _setup_segment_editing(self, ms_components): """ตั้งค่า segment editing""" # Update silence button ms_components['controls']['update_silence_btn'].click( fn=self.multi_speech_processor.update_silence_all, inputs=ms_components['inputs']['segment_silence_inputs'] + [ ms_components['state']['segments_state'], ms_components['state']['sr_state'] ], outputs=ms_components['outputs']['segment_players'] + ms_components['outputs']['segment_text_inputs'] + ms_components['outputs']['segment_silence_inputs'] + ms_components['outputs']['segment_regen_btns'] + [ ms_components['outputs']['audio_output'], ms_components['outputs']['download_btn'], ms_components['state']['segments_state'], ms_components['state']['sr_state'] ] ) # Regenerate segment buttons for i, btn in enumerate(ms_components['outputs']['segment_regen_btns']): btn.click( fn=self._wrap_regenerate_segment, inputs=[ gr.State(i), ms_components['outputs']['segment_text_inputs'][i], ms_components['outputs']['segment_silence_inputs'][i], ms_components['state']['segments_state'], ms_components['inputs']['cross_fade_duration'], ms_components['inputs']['nfe_step'] ], outputs=ms_components['outputs']['segment_players'] + ms_components['outputs']['segment_text_inputs'] + ms_components['outputs']['segment_silence_inputs'] + ms_components['outputs']['segment_regen_btns'] + [ ms_components['outputs']['audio_output'], ms_components['outputs']['download_btn'], ms_components['state']['segments_state'], ms_components['state']['sr_state'] ] ) def _wrap_regenerate_segment(self, idx, new_text, silence_ms, segments, cross_fade_duration, nfe_step): """Wrapper สำหรับ regenerate segment""" return self.multi_speech_processor.regenerate_segment( idx, new_text, silence_ms, segments, cross_fade_duration, nfe_step ) def _create_stt_tab(self): """สร้าง Speech to Text tab""" stt_components = self.ui_components.create_stt_tab() # Setup STT generation stt_components['controls']['generate_btn_stt'].click( fn=self.stt_processor.transcribe_text, inputs=[ stt_components['inputs']['ref_audio_input'], stt_components['inputs']['is_translate'], stt_components['inputs']['model_wp'], stt_components['inputs']['compute_type'], stt_components['inputs']['target_lg'], stt_components['inputs']['source_lg'] ], outputs=stt_components['outputs']['output_ref_text'] ) def main(): """Main function สำหรับรัน application""" try: parser = argparse.ArgumentParser(description="F5-TTS Thai WebUI - Refactored") parser.add_argument("--share", action="store_true", help="Share the app") args = parser.parse_args() print("กำลังเริ่มต้น F5-TTS Thai WebUI...") app = F5TTSWebUI() demo = app.create_gradio_interface() print("WebUI พร้อมใช้งาน!") demo.launch(inbrowser=True, share=args.share) except Exception as e: print(f"เกิดข้อผิดพลาด: {e}") import traceback traceback.print_exc() if __name__ == "__main__": main()