File size: 2,498 Bytes
c441d2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from PySide6.QtCore import Signal, QThread

from ..Utils.Shared import context
from ..Internal import load_character, set_reference_audio
from ..Core.Inference import tts_client
from ..ModelManager import model_manager


class InferenceWorker(QThread):
    """执行推理任务的 Worker"""
    finished = Signal(bool, str, object)  # success, message, data

    def __init__(self, request_data: dict, mode: str):
        super().__init__()
        self.req: dict = request_data
        self.mode: str = mode

    def run(self) -> None:
        try:
            if self.mode == 'load_character':
                load_character(
                    character_name=self.req['character_name'],
                    onnx_model_dir=self.req['onnx_model_dir'],
                    language=self.req['language'],
                )
                self.finished.emit(True, "导入角色完成", None)

            elif self.mode == 'set_reference_audio':
                set_reference_audio(
                    character_name=self.req['character_name'],
                    audio_path=self.req['audio_path'],
                    audio_text=self.req['audio_text'],
                    language=self.req['language'],
                )
                self.finished.emit(True, "设置参考音频完成", None)

            elif self.mode == 'tts':
                gsv_model = model_manager.get(self.req['character_name'])
                tts_client.stop_event.clear()
                audio_chunk = tts_client.tts(
                    text=self.req['text'],
                    prompt_audio=context.current_prompt_audio,
                    encoder=gsv_model.T2S_ENCODER,
                    first_stage_decoder=gsv_model.T2S_FIRST_STAGE_DECODER,
                    stage_decoder=gsv_model.T2S_STAGE_DECODER,
                    vocoder=gsv_model.VITS,
                    prompt_encoder=gsv_model.PROMPT_ENCODER,
                    language=gsv_model.LANGUAGE,
                )
                audio_chunk = audio_chunk.squeeze()
                try:
                    return_data = {
                        "sample_rate": 32000,
                        "audio_list": [audio_chunk],
                    }
                    self.finished.emit(True, "推理完成", return_data)
                except Exception as e:
                    self.finished.emit(False, f"数据解析失败: {e}", None)

        except Exception as e:
            self.finished.emit(False, f"请求异常: {str(e)}", None)