Spaces:
Sleeping
Sleeping
| """ | |
| F5-TTS Thai WebUI - Refactored Version | |
| เวอร์ชันที่ปรับปรุงโครงสร้างใหม่ให้มีระเบียบและง่ายต่อการดูแลรักษา | |
| """ | |
| import argparse | |
| import sys | |
| import os | |
| import gradio as gr | |
| # Add the src directory to Python path for imports | |
| current_dir = os.path.dirname(os.path.abspath(__file__)) | |
| src_dir = os.path.dirname(current_dir) | |
| if src_dir not in sys.path: | |
| sys.path.insert(0, src_dir) | |
| from f5_tts.model_manager import ModelManager | |
| from f5_tts.tts_processor import TTSProcessor, SpeechToTextProcessor | |
| from f5_tts.multi_speech_processor import MultiSpeechProcessor | |
| from f5_tts.ui_components import UIComponents | |
| from f5_tts.config import MAX_SPEECH_TYPES | |
| class F5TTSWebUI: | |
| """หลัก Web UI Application สำหรับ F5-TTS Thai""" | |
| def __init__(self): | |
| self.model_manager = ModelManager() | |
| self.tts_processor = TTSProcessor(self.model_manager) | |
| self.stt_processor = SpeechToTextProcessor() | |
| self.multi_speech_processor = MultiSpeechProcessor(self.model_manager) | |
| self.ui_components = UIComponents() | |
| def create_gradio_interface(self): | |
| """สร้าง Gradio interface""" | |
| with gr.Blocks(title="F5-TTS ไทย", theme=gr.themes.Ocean()) as demo: | |
| gr.Markdown("# F5-TTS ภาษาไทย") | |
| gr.Markdown("สร้างคำพูดจากข้อความ ด้วย Zero-shot TTS หรือ เสียงต้นฉบับ ภาษาไทย.") | |
| # Model selection section | |
| model_select, model_custom, model_status, load_custom_btn = self.ui_components.create_model_selection_section() | |
| # Setup model selection events | |
| self._setup_model_selection_events( | |
| model_select, model_custom, model_status, load_custom_btn | |
| ) | |
| # Create tabs | |
| #with gr.Tab(label="Text To Speech"): | |
| # self._create_tts_tab() | |
| with gr.Tab(label="Multi Speech"): | |
| self._create_multispeech_tab() | |
| #with gr.Tab(label="Speech to Text"): | |
| # self._create_stt_tab() | |
| return demo | |
| def _setup_model_selection_events(self, model_select, model_custom, model_status, load_custom_btn): | |
| """ตั้งค่า events สำหรับการเลือกโมเดล""" | |
| # Model selection change event | |
| model_select.change( | |
| fn=self.model_manager.update_custom_model_visibility, | |
| inputs=model_select, | |
| outputs=model_custom | |
| ) | |
| # Load custom model button | |
| load_custom_btn.click( | |
| fn=self.model_manager.load_model_by_choice, | |
| inputs=[model_select, model_custom], | |
| outputs=model_status | |
| ) | |
| def _create_tts_tab(self): | |
| """สร้าง Text To Speech tab""" | |
| tts_components = self.ui_components.create_tts_tab(self.tts_processor.infer_tts) | |
| # Setup TTS generation | |
| tts_components['controls']['generate_btn'].click( | |
| fn=self.tts_processor.infer_tts, | |
| inputs=[ | |
| tts_components['inputs']['ref_audio'], | |
| tts_components['inputs']['ref_text'], | |
| tts_components['inputs']['gen_text'], | |
| tts_components['inputs']['remove_silence'], | |
| tts_components['inputs']['cross_fade_duration'], | |
| tts_components['inputs']['nfe_step'], | |
| tts_components['inputs']['speed'], | |
| tts_components['inputs']['cfg_strength'], | |
| tts_components['inputs']['max_chars'], | |
| tts_components['inputs']['seed'], | |
| tts_components['inputs']['no_ref_audio'] | |
| ], | |
| outputs=[ | |
| tts_components['outputs']['output_audio'], | |
| tts_components['outputs']['spectrogram'], | |
| tts_components['inputs']['ref_text'], | |
| tts_components['outputs']['seed_output'] | |
| ] | |
| ) | |
| def _create_multispeech_tab(self): | |
| """สร้าง Multi Speech tab""" | |
| ms_components = self.ui_components.create_multispeech_tab() | |
| # Setup speech type management | |
| self._setup_speech_type_events(ms_components) | |
| # Setup multispeech generation | |
| self._setup_multispeech_generation(ms_components) | |
| # Setup segment editing | |
| self._setup_segment_editing(ms_components) | |
| def _setup_speech_type_events(self, ms_components): | |
| """ตั้งค่า events สำหรับ speech type management""" | |
| # Add speech type button | |
| ms_components['controls']['add_speech_type_btn'].click( | |
| fn=self.ui_components.add_speech_type_fn, | |
| outputs=ms_components['controls']['speech_type_rows'] | |
| ) | |
| # Delete speech type buttons | |
| for i in range(1, len(self.ui_components.speech_type_delete_btns)): | |
| if self.ui_components.speech_type_delete_btns[i] is not None: | |
| self.ui_components.speech_type_delete_btns[i].click( | |
| fn=self.ui_components.delete_speech_type_fn, | |
| outputs=[ | |
| self.ui_components.speech_type_rows[i], | |
| self.ui_components.speech_type_names[i], | |
| self.ui_components.speech_type_audios[i], | |
| self.ui_components.speech_type_ref_texts[i] | |
| ] | |
| ) | |
| # Insert speech type buttons | |
| for i, insert_btn in enumerate(self.ui_components.speech_type_insert_btns): | |
| insert_fn = self.ui_components.make_insert_speech_type_fn(i) | |
| insert_btn.click( | |
| fn=insert_fn, | |
| inputs=[ms_components['inputs']['gen_text'], self.ui_components.speech_type_names[i]], | |
| outputs=ms_components['inputs']['gen_text'] | |
| ) | |
| # Validation for generate button | |
| ms_components['inputs']['gen_text'].change( | |
| fn=self.multi_speech_processor.validate_speech_types, | |
| inputs=[ms_components['inputs']['gen_text']] + ms_components['inputs']['speech_type_names'], | |
| outputs=ms_components['controls']['generate_btn'] | |
| ) | |
| def _setup_multispeech_generation(self, ms_components): | |
| """ตั้งค่า multispeech generation""" | |
| # Prepare inputs for generation | |
| generation_inputs = [ | |
| ms_components['inputs']['gen_text'], | |
| ms_components['inputs']['cross_fade_duration'], | |
| ms_components['inputs']['nfe_step'] | |
| ] + ( | |
| ms_components['inputs']['speech_type_names'] + | |
| ms_components['inputs']['speech_type_audios'] + | |
| ms_components['inputs']['speech_type_ref_texts'] + | |
| [ms_components['inputs']['remove_silence']] + | |
| ms_components['inputs']['segment_silence_inputs'] | |
| ) | |
| # Prepare outputs for generation | |
| generation_outputs = [ | |
| ms_components['outputs']['audio_output'], | |
| ms_components['outputs']['download_btn'] | |
| ] + ( | |
| ms_components['outputs']['segment_players'] + | |
| ms_components['outputs']['segment_text_inputs'] + | |
| ms_components['outputs']['segment_silence_inputs'] + | |
| ms_components['outputs']['segment_regen_btns'] + | |
| [ms_components['state']['segments_state'], ms_components['state']['sr_state']] | |
| ) | |
| # Generate button click | |
| ms_components['controls']['generate_btn'].click( | |
| fn=self._wrap_multispeech_generation, | |
| inputs=generation_inputs, | |
| outputs=generation_outputs | |
| ) | |
| def _wrap_multispeech_generation(self, gen_text, cross_fade_duration, nfe_step, *args): | |
| """Wrapper สำหรับ multispeech generation""" | |
| speech_types_data = args[:MAX_SPEECH_TYPES * 3] | |
| remove_silence = args[MAX_SPEECH_TYPES * 3] | |
| silence_inputs = args[MAX_SPEECH_TYPES * 3 + 1:] | |
| return self.multi_speech_processor.generate_multistyle_speech( | |
| gen_text, | |
| cross_fade_duration, | |
| nfe_step, | |
| speech_types_data, | |
| remove_silence, | |
| silence_inputs | |
| ) | |
| def _setup_segment_editing(self, ms_components): | |
| """ตั้งค่า segment editing""" | |
| # Update silence button | |
| ms_components['controls']['update_silence_btn'].click( | |
| fn=self.multi_speech_processor.update_silence_all, | |
| inputs=ms_components['inputs']['segment_silence_inputs'] + [ | |
| ms_components['state']['segments_state'], | |
| ms_components['state']['sr_state'] | |
| ], | |
| outputs=ms_components['outputs']['segment_players'] + | |
| ms_components['outputs']['segment_text_inputs'] + | |
| ms_components['outputs']['segment_silence_inputs'] + | |
| ms_components['outputs']['segment_regen_btns'] + [ | |
| ms_components['outputs']['audio_output'], | |
| ms_components['outputs']['download_btn'], | |
| ms_components['state']['segments_state'], | |
| ms_components['state']['sr_state'] | |
| ] | |
| ) | |
| # Regenerate segment buttons | |
| for i, btn in enumerate(ms_components['outputs']['segment_regen_btns']): | |
| btn.click( | |
| fn=self._wrap_regenerate_segment, | |
| inputs=[ | |
| gr.State(i), | |
| ms_components['outputs']['segment_text_inputs'][i], | |
| ms_components['outputs']['segment_silence_inputs'][i], | |
| ms_components['state']['segments_state'], | |
| ms_components['inputs']['cross_fade_duration'], | |
| ms_components['inputs']['nfe_step'] | |
| ], | |
| outputs=ms_components['outputs']['segment_players'] + | |
| ms_components['outputs']['segment_text_inputs'] + | |
| ms_components['outputs']['segment_silence_inputs'] + | |
| ms_components['outputs']['segment_regen_btns'] + [ | |
| ms_components['outputs']['audio_output'], | |
| ms_components['outputs']['download_btn'], | |
| ms_components['state']['segments_state'], | |
| ms_components['state']['sr_state'] | |
| ] | |
| ) | |
| def _wrap_regenerate_segment(self, idx, new_text, silence_ms, segments, cross_fade_duration, nfe_step): | |
| """Wrapper สำหรับ regenerate segment""" | |
| return self.multi_speech_processor.regenerate_segment( | |
| idx, new_text, silence_ms, segments, cross_fade_duration, nfe_step | |
| ) | |
| def _create_stt_tab(self): | |
| """สร้าง Speech to Text tab""" | |
| stt_components = self.ui_components.create_stt_tab() | |
| # Setup STT generation | |
| stt_components['controls']['generate_btn_stt'].click( | |
| fn=self.stt_processor.transcribe_text, | |
| inputs=[ | |
| stt_components['inputs']['ref_audio_input'], | |
| stt_components['inputs']['is_translate'], | |
| stt_components['inputs']['model_wp'], | |
| stt_components['inputs']['compute_type'], | |
| stt_components['inputs']['target_lg'], | |
| stt_components['inputs']['source_lg'] | |
| ], | |
| outputs=stt_components['outputs']['output_ref_text'] | |
| ) | |
| def main(): | |
| """Main function สำหรับรัน application""" | |
| try: | |
| parser = argparse.ArgumentParser(description="F5-TTS Thai WebUI - Refactored") | |
| parser.add_argument("--share", action="store_true", help="Share the app") | |
| args = parser.parse_args() | |
| print("กำลังเริ่มต้น F5-TTS Thai WebUI...") | |
| app = F5TTSWebUI() | |
| demo = app.create_gradio_interface() | |
| print("WebUI พร้อมใช้งาน!") | |
| demo.launch(inbrowser=True, share=args.share) | |
| except Exception as e: | |
| print(f"เกิดข้อผิดพลาด: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| if __name__ == "__main__": | |
| main() | |