File size: 12,898 Bytes
106478e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
"""

F5-TTS Thai WebUI - Refactored Version

เวอร์ชันที่ปรับปรุงโครงสร้างใหม่ให้มีระเบียบและง่ายต่อการดูแลรักษา

"""

import argparse
import sys
import os
import gradio as gr

# Add the src directory to Python path for imports
current_dir = os.path.dirname(os.path.abspath(__file__))
src_dir = os.path.dirname(current_dir)
if src_dir not in sys.path:
    sys.path.insert(0, src_dir)

from f5_tts.model_manager import ModelManager
from f5_tts.tts_processor import TTSProcessor, SpeechToTextProcessor
from f5_tts.multi_speech_processor import MultiSpeechProcessor
from f5_tts.ui_components import UIComponents
from f5_tts.config import MAX_SPEECH_TYPES


class F5TTSWebUI:
    """หลัก Web UI Application สำหรับ F5-TTS Thai"""
    
    def __init__(self):
        self.model_manager = ModelManager()
        self.tts_processor = TTSProcessor(self.model_manager)
        self.stt_processor = SpeechToTextProcessor()
        self.multi_speech_processor = MultiSpeechProcessor(self.model_manager)
        self.ui_components = UIComponents()
        
    def create_gradio_interface(self):
        """สร้าง Gradio interface"""
        with gr.Blocks(title="F5-TTS ไทย", theme=gr.themes.Ocean()) as demo:
            gr.Markdown("# F5-TTS ภาษาไทย")
            gr.Markdown("สร้างคำพูดจากข้อความ ด้วย Zero-shot TTS หรือ เสียงต้นฉบับ ภาษาไทย.")

            # Model selection section
            model_select, model_custom, model_status, load_custom_btn = self.ui_components.create_model_selection_section()

            # Setup model selection events
            self._setup_model_selection_events(
                model_select, model_custom, model_status, load_custom_btn
            )

            # Create tabs
            #with gr.Tab(label="Text To Speech"):
            #    self._create_tts_tab()
            
            with gr.Tab(label="Multi Speech"):
                self._create_multispeech_tab()
            
            #with gr.Tab(label="Speech to Text"):
            #    self._create_stt_tab()

        return demo
    
    def _setup_model_selection_events(self, model_select, model_custom, model_status, load_custom_btn):
        """ตั้งค่า events สำหรับการเลือกโมเดล"""
        
        # Model selection change event
        model_select.change(
            fn=self.model_manager.update_custom_model_visibility,
            inputs=model_select,
            outputs=model_custom
        )
        
        # Load custom model button
        load_custom_btn.click(
            fn=self.model_manager.load_model_by_choice,
            inputs=[model_select, model_custom],
            outputs=model_status
        )
    
    def _create_tts_tab(self):
        """สร้าง Text To Speech tab"""
        tts_components = self.ui_components.create_tts_tab(self.tts_processor.infer_tts)
        
        # Setup TTS generation
        tts_components['controls']['generate_btn'].click(
            fn=self.tts_processor.infer_tts,
            inputs=[
                tts_components['inputs']['ref_audio'],
                tts_components['inputs']['ref_text'],
                tts_components['inputs']['gen_text'],
                tts_components['inputs']['remove_silence'],
                tts_components['inputs']['cross_fade_duration'],
                tts_components['inputs']['nfe_step'],
                tts_components['inputs']['speed'],
                tts_components['inputs']['cfg_strength'],
                tts_components['inputs']['max_chars'],
                tts_components['inputs']['seed'],
                tts_components['inputs']['no_ref_audio']
            ],
            outputs=[
                tts_components['outputs']['output_audio'],
                tts_components['outputs']['spectrogram'],
                tts_components['inputs']['ref_text'],
                tts_components['outputs']['seed_output']
            ]
        )
    
    def _create_multispeech_tab(self):
        """สร้าง Multi Speech tab"""
        ms_components = self.ui_components.create_multispeech_tab()
        
        # Setup speech type management
        self._setup_speech_type_events(ms_components)
        
        # Setup multispeech generation
        self._setup_multispeech_generation(ms_components)
        
        # Setup segment editing
        self._setup_segment_editing(ms_components)
    
    def _setup_speech_type_events(self, ms_components):
        """ตั้งค่า events สำหรับ speech type management"""
        
        # Add speech type button
        ms_components['controls']['add_speech_type_btn'].click(
            fn=self.ui_components.add_speech_type_fn,
            outputs=ms_components['controls']['speech_type_rows']
        )
        
        # Delete speech type buttons
        for i in range(1, len(self.ui_components.speech_type_delete_btns)):
            if self.ui_components.speech_type_delete_btns[i] is not None:
                self.ui_components.speech_type_delete_btns[i].click(
                    fn=self.ui_components.delete_speech_type_fn,
                    outputs=[
                        self.ui_components.speech_type_rows[i], 
                        self.ui_components.speech_type_names[i], 
                        self.ui_components.speech_type_audios[i], 
                        self.ui_components.speech_type_ref_texts[i]
                    ]
                )
        
        # Insert speech type buttons
        for i, insert_btn in enumerate(self.ui_components.speech_type_insert_btns):
            insert_fn = self.ui_components.make_insert_speech_type_fn(i)
            insert_btn.click(
                fn=insert_fn,
                inputs=[ms_components['inputs']['gen_text'], self.ui_components.speech_type_names[i]],
                outputs=ms_components['inputs']['gen_text']
            )
        
        # Validation for generate button
        ms_components['inputs']['gen_text'].change(
            fn=self.multi_speech_processor.validate_speech_types,
            inputs=[ms_components['inputs']['gen_text']] + ms_components['inputs']['speech_type_names'],
            outputs=ms_components['controls']['generate_btn']
        )
    
    def _setup_multispeech_generation(self, ms_components):
        """ตั้งค่า multispeech generation"""
        
        # Prepare inputs for generation
        generation_inputs = [
            ms_components['inputs']['gen_text'],
            ms_components['inputs']['cross_fade_duration'],
            ms_components['inputs']['nfe_step']
        ] + (
            ms_components['inputs']['speech_type_names'] +
            ms_components['inputs']['speech_type_audios'] +
            ms_components['inputs']['speech_type_ref_texts'] +
            [ms_components['inputs']['remove_silence']] +
            ms_components['inputs']['segment_silence_inputs']
        )
        
        # Prepare outputs for generation
        generation_outputs = [
            ms_components['outputs']['audio_output'],
            ms_components['outputs']['download_btn']
        ] + (
            ms_components['outputs']['segment_players'] +
            ms_components['outputs']['segment_text_inputs'] +
            ms_components['outputs']['segment_silence_inputs'] +
            ms_components['outputs']['segment_regen_btns'] +
            [ms_components['state']['segments_state'], ms_components['state']['sr_state']]
        )
        
        # Generate button click
        ms_components['controls']['generate_btn'].click(
            fn=self._wrap_multispeech_generation,
            inputs=generation_inputs,
            outputs=generation_outputs
        )
    
    def _wrap_multispeech_generation(self, gen_text, cross_fade_duration, nfe_step, *args):
        """Wrapper สำหรับ multispeech generation"""
        speech_types_data = args[:MAX_SPEECH_TYPES * 3]
        remove_silence = args[MAX_SPEECH_TYPES * 3]
        silence_inputs = args[MAX_SPEECH_TYPES * 3 + 1:]
        
        return self.multi_speech_processor.generate_multistyle_speech(
            gen_text,
            cross_fade_duration,
            nfe_step,
            speech_types_data,
            remove_silence,
            silence_inputs
        )
    
    def _setup_segment_editing(self, ms_components):
        """ตั้งค่า segment editing"""
        
        # Update silence button
        ms_components['controls']['update_silence_btn'].click(
            fn=self.multi_speech_processor.update_silence_all,
            inputs=ms_components['inputs']['segment_silence_inputs'] + [
                ms_components['state']['segments_state'], 
                ms_components['state']['sr_state']
            ],
            outputs=ms_components['outputs']['segment_players'] + 
                   ms_components['outputs']['segment_text_inputs'] + 
                   ms_components['outputs']['segment_silence_inputs'] + 
                   ms_components['outputs']['segment_regen_btns'] + [
                       ms_components['outputs']['audio_output'],
                       ms_components['outputs']['download_btn'],
                       ms_components['state']['segments_state'],
                       ms_components['state']['sr_state']
                   ]
        )
        
        # Regenerate segment buttons
        for i, btn in enumerate(ms_components['outputs']['segment_regen_btns']):
            btn.click(
                fn=self._wrap_regenerate_segment,
                inputs=[
                    gr.State(i),
                    ms_components['outputs']['segment_text_inputs'][i],
                    ms_components['outputs']['segment_silence_inputs'][i],
                    ms_components['state']['segments_state'],
                    ms_components['inputs']['cross_fade_duration'],
                    ms_components['inputs']['nfe_step']
                ],
                outputs=ms_components['outputs']['segment_players'] + 
                       ms_components['outputs']['segment_text_inputs'] + 
                       ms_components['outputs']['segment_silence_inputs'] + 
                       ms_components['outputs']['segment_regen_btns'] + [
                           ms_components['outputs']['audio_output'],
                           ms_components['outputs']['download_btn'],
                           ms_components['state']['segments_state'],
                           ms_components['state']['sr_state']
                       ]
            )
    
    def _wrap_regenerate_segment(self, idx, new_text, silence_ms, segments, cross_fade_duration, nfe_step):
        """Wrapper สำหรับ regenerate segment"""
        return self.multi_speech_processor.regenerate_segment(
            idx, new_text, silence_ms, segments, cross_fade_duration, nfe_step
        )
    
    def _create_stt_tab(self):
        """สร้าง Speech to Text tab"""
        stt_components = self.ui_components.create_stt_tab()
        
        # Setup STT generation
        stt_components['controls']['generate_btn_stt'].click(
            fn=self.stt_processor.transcribe_text,
            inputs=[
                stt_components['inputs']['ref_audio_input'],
                stt_components['inputs']['is_translate'],
                stt_components['inputs']['model_wp'],
                stt_components['inputs']['compute_type'],
                stt_components['inputs']['target_lg'],
                stt_components['inputs']['source_lg']
            ],
            outputs=stt_components['outputs']['output_ref_text']
        )


def main():
    """Main function สำหรับรัน application"""
    try:
        parser = argparse.ArgumentParser(description="F5-TTS Thai WebUI - Refactored")
        parser.add_argument("--share", action="store_true", help="Share the app")
        args = parser.parse_args()

        print("กำลังเริ่มต้น F5-TTS Thai WebUI...")
        app = F5TTSWebUI()
        demo = app.create_gradio_interface()
        print("WebUI พร้อมใช้งาน!")
        demo.launch(inbrowser=True, share=args.share)
    except Exception as e:
        print(f"เกิดข้อผิดพลาด: {e}")
        import traceback
        traceback.print_exc()


if __name__ == "__main__":
    main()