F5-TTS-THAI / src /f5_tts /f5_tts_webui.py
pythonlearnreal's picture
Upload folder using huggingface_hub
106478e verified
"""
F5-TTS Thai WebUI - Refactored Version
เวอร์ชันที่ปรับปรุงโครงสร้างใหม่ให้มีระเบียบและง่ายต่อการดูแลรักษา
"""
import argparse
import sys
import os
import gradio as gr
# Add the src directory to Python path for imports
current_dir = os.path.dirname(os.path.abspath(__file__))
src_dir = os.path.dirname(current_dir)
if src_dir not in sys.path:
sys.path.insert(0, src_dir)
from f5_tts.model_manager import ModelManager
from f5_tts.tts_processor import TTSProcessor, SpeechToTextProcessor
from f5_tts.multi_speech_processor import MultiSpeechProcessor
from f5_tts.ui_components import UIComponents
from f5_tts.config import MAX_SPEECH_TYPES
class F5TTSWebUI:
"""หลัก Web UI Application สำหรับ F5-TTS Thai"""
def __init__(self):
self.model_manager = ModelManager()
self.tts_processor = TTSProcessor(self.model_manager)
self.stt_processor = SpeechToTextProcessor()
self.multi_speech_processor = MultiSpeechProcessor(self.model_manager)
self.ui_components = UIComponents()
def create_gradio_interface(self):
"""สร้าง Gradio interface"""
with gr.Blocks(title="F5-TTS ไทย", theme=gr.themes.Ocean()) as demo:
gr.Markdown("# F5-TTS ภาษาไทย")
gr.Markdown("สร้างคำพูดจากข้อความ ด้วย Zero-shot TTS หรือ เสียงต้นฉบับ ภาษาไทย.")
# Model selection section
model_select, model_custom, model_status, load_custom_btn = self.ui_components.create_model_selection_section()
# Setup model selection events
self._setup_model_selection_events(
model_select, model_custom, model_status, load_custom_btn
)
# Create tabs
#with gr.Tab(label="Text To Speech"):
# self._create_tts_tab()
with gr.Tab(label="Multi Speech"):
self._create_multispeech_tab()
#with gr.Tab(label="Speech to Text"):
# self._create_stt_tab()
return demo
def _setup_model_selection_events(self, model_select, model_custom, model_status, load_custom_btn):
"""ตั้งค่า events สำหรับการเลือกโมเดล"""
# Model selection change event
model_select.change(
fn=self.model_manager.update_custom_model_visibility,
inputs=model_select,
outputs=model_custom
)
# Load custom model button
load_custom_btn.click(
fn=self.model_manager.load_model_by_choice,
inputs=[model_select, model_custom],
outputs=model_status
)
def _create_tts_tab(self):
"""สร้าง Text To Speech tab"""
tts_components = self.ui_components.create_tts_tab(self.tts_processor.infer_tts)
# Setup TTS generation
tts_components['controls']['generate_btn'].click(
fn=self.tts_processor.infer_tts,
inputs=[
tts_components['inputs']['ref_audio'],
tts_components['inputs']['ref_text'],
tts_components['inputs']['gen_text'],
tts_components['inputs']['remove_silence'],
tts_components['inputs']['cross_fade_duration'],
tts_components['inputs']['nfe_step'],
tts_components['inputs']['speed'],
tts_components['inputs']['cfg_strength'],
tts_components['inputs']['max_chars'],
tts_components['inputs']['seed'],
tts_components['inputs']['no_ref_audio']
],
outputs=[
tts_components['outputs']['output_audio'],
tts_components['outputs']['spectrogram'],
tts_components['inputs']['ref_text'],
tts_components['outputs']['seed_output']
]
)
def _create_multispeech_tab(self):
"""สร้าง Multi Speech tab"""
ms_components = self.ui_components.create_multispeech_tab()
# Setup speech type management
self._setup_speech_type_events(ms_components)
# Setup multispeech generation
self._setup_multispeech_generation(ms_components)
# Setup segment editing
self._setup_segment_editing(ms_components)
def _setup_speech_type_events(self, ms_components):
"""ตั้งค่า events สำหรับ speech type management"""
# Add speech type button
ms_components['controls']['add_speech_type_btn'].click(
fn=self.ui_components.add_speech_type_fn,
outputs=ms_components['controls']['speech_type_rows']
)
# Delete speech type buttons
for i in range(1, len(self.ui_components.speech_type_delete_btns)):
if self.ui_components.speech_type_delete_btns[i] is not None:
self.ui_components.speech_type_delete_btns[i].click(
fn=self.ui_components.delete_speech_type_fn,
outputs=[
self.ui_components.speech_type_rows[i],
self.ui_components.speech_type_names[i],
self.ui_components.speech_type_audios[i],
self.ui_components.speech_type_ref_texts[i]
]
)
# Insert speech type buttons
for i, insert_btn in enumerate(self.ui_components.speech_type_insert_btns):
insert_fn = self.ui_components.make_insert_speech_type_fn(i)
insert_btn.click(
fn=insert_fn,
inputs=[ms_components['inputs']['gen_text'], self.ui_components.speech_type_names[i]],
outputs=ms_components['inputs']['gen_text']
)
# Validation for generate button
ms_components['inputs']['gen_text'].change(
fn=self.multi_speech_processor.validate_speech_types,
inputs=[ms_components['inputs']['gen_text']] + ms_components['inputs']['speech_type_names'],
outputs=ms_components['controls']['generate_btn']
)
def _setup_multispeech_generation(self, ms_components):
"""ตั้งค่า multispeech generation"""
# Prepare inputs for generation
generation_inputs = [
ms_components['inputs']['gen_text'],
ms_components['inputs']['cross_fade_duration'],
ms_components['inputs']['nfe_step']
] + (
ms_components['inputs']['speech_type_names'] +
ms_components['inputs']['speech_type_audios'] +
ms_components['inputs']['speech_type_ref_texts'] +
[ms_components['inputs']['remove_silence']] +
ms_components['inputs']['segment_silence_inputs']
)
# Prepare outputs for generation
generation_outputs = [
ms_components['outputs']['audio_output'],
ms_components['outputs']['download_btn']
] + (
ms_components['outputs']['segment_players'] +
ms_components['outputs']['segment_text_inputs'] +
ms_components['outputs']['segment_silence_inputs'] +
ms_components['outputs']['segment_regen_btns'] +
[ms_components['state']['segments_state'], ms_components['state']['sr_state']]
)
# Generate button click
ms_components['controls']['generate_btn'].click(
fn=self._wrap_multispeech_generation,
inputs=generation_inputs,
outputs=generation_outputs
)
def _wrap_multispeech_generation(self, gen_text, cross_fade_duration, nfe_step, *args):
"""Wrapper สำหรับ multispeech generation"""
speech_types_data = args[:MAX_SPEECH_TYPES * 3]
remove_silence = args[MAX_SPEECH_TYPES * 3]
silence_inputs = args[MAX_SPEECH_TYPES * 3 + 1:]
return self.multi_speech_processor.generate_multistyle_speech(
gen_text,
cross_fade_duration,
nfe_step,
speech_types_data,
remove_silence,
silence_inputs
)
def _setup_segment_editing(self, ms_components):
"""ตั้งค่า segment editing"""
# Update silence button
ms_components['controls']['update_silence_btn'].click(
fn=self.multi_speech_processor.update_silence_all,
inputs=ms_components['inputs']['segment_silence_inputs'] + [
ms_components['state']['segments_state'],
ms_components['state']['sr_state']
],
outputs=ms_components['outputs']['segment_players'] +
ms_components['outputs']['segment_text_inputs'] +
ms_components['outputs']['segment_silence_inputs'] +
ms_components['outputs']['segment_regen_btns'] + [
ms_components['outputs']['audio_output'],
ms_components['outputs']['download_btn'],
ms_components['state']['segments_state'],
ms_components['state']['sr_state']
]
)
# Regenerate segment buttons
for i, btn in enumerate(ms_components['outputs']['segment_regen_btns']):
btn.click(
fn=self._wrap_regenerate_segment,
inputs=[
gr.State(i),
ms_components['outputs']['segment_text_inputs'][i],
ms_components['outputs']['segment_silence_inputs'][i],
ms_components['state']['segments_state'],
ms_components['inputs']['cross_fade_duration'],
ms_components['inputs']['nfe_step']
],
outputs=ms_components['outputs']['segment_players'] +
ms_components['outputs']['segment_text_inputs'] +
ms_components['outputs']['segment_silence_inputs'] +
ms_components['outputs']['segment_regen_btns'] + [
ms_components['outputs']['audio_output'],
ms_components['outputs']['download_btn'],
ms_components['state']['segments_state'],
ms_components['state']['sr_state']
]
)
def _wrap_regenerate_segment(self, idx, new_text, silence_ms, segments, cross_fade_duration, nfe_step):
"""Wrapper สำหรับ regenerate segment"""
return self.multi_speech_processor.regenerate_segment(
idx, new_text, silence_ms, segments, cross_fade_duration, nfe_step
)
def _create_stt_tab(self):
"""สร้าง Speech to Text tab"""
stt_components = self.ui_components.create_stt_tab()
# Setup STT generation
stt_components['controls']['generate_btn_stt'].click(
fn=self.stt_processor.transcribe_text,
inputs=[
stt_components['inputs']['ref_audio_input'],
stt_components['inputs']['is_translate'],
stt_components['inputs']['model_wp'],
stt_components['inputs']['compute_type'],
stt_components['inputs']['target_lg'],
stt_components['inputs']['source_lg']
],
outputs=stt_components['outputs']['output_ref_text']
)
def main():
"""Main function สำหรับรัน application"""
try:
parser = argparse.ArgumentParser(description="F5-TTS Thai WebUI - Refactored")
parser.add_argument("--share", action="store_true", help="Share the app")
args = parser.parse_args()
print("กำลังเริ่มต้น F5-TTS Thai WebUI...")
app = F5TTSWebUI()
demo = app.create_gradio_interface()
print("WebUI พร้อมใช้งาน!")
demo.launch(inbrowser=True, share=args.share)
except Exception as e:
print(f"เกิดข้อผิดพลาด: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
main()