Spaces:
Running
Running
antigravity
commited on
Commit
·
c441d2c
1
Parent(s):
620bb7c
sync all fixes: prompt leakage, cross-lang, ref_cache update, and file wait logic
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- app.py +62 -9
- genie_tts/Audio/Audio.py +51 -51
- genie_tts/Audio/__pycache__/Audio.cpython-311.pyc +0 -0
- genie_tts/Audio/__pycache__/ReferenceAudio.cpython-311.pyc +0 -0
- genie_tts/Audio/__pycache__/__init__.cpython-311.pyc +0 -0
- genie_tts/Converter/Converter.py +11 -11
- genie_tts/Converter/__pycache__/Converter.cpython-311.pyc +0 -0
- genie_tts/Converter/__pycache__/__init__.cpython-311.pyc +0 -0
- genie_tts/Converter/__pycache__/load_state_dict.cpython-311.pyc +0 -0
- genie_tts/Converter/__pycache__/utils.cpython-311.pyc +0 -0
- genie_tts/Converter/load_state_dict.py +26 -26
- genie_tts/Converter/v2/Converter.py +146 -146
- genie_tts/Converter/v2/EncoderConverter.py +106 -106
- genie_tts/Converter/v2/T2SConverter.py +125 -125
- genie_tts/Converter/v2/VITSConverter.py +129 -129
- genie_tts/Converter/v2/__pycache__/Converter.cpython-311.pyc +0 -0
- genie_tts/Converter/v2/__pycache__/EncoderConverter.cpython-311.pyc +0 -0
- genie_tts/Converter/v2/__pycache__/T2SConverter.cpython-311.pyc +0 -0
- genie_tts/Converter/v2/__pycache__/VITSConverter.cpython-311.pyc +0 -0
- genie_tts/Converter/v2/__pycache__/__init__.cpython-311.pyc +0 -0
- genie_tts/Converter/v2ProPlus/Converter.py +89 -89
- genie_tts/Converter/v2ProPlus/PromptEncoderConverter.py +128 -128
- genie_tts/Converter/v2ProPlus/__pycache__/Converter.cpython-311.pyc +0 -0
- genie_tts/Converter/v2ProPlus/__pycache__/PromptEncoderConverter.cpython-311.pyc +0 -0
- genie_tts/Core/Resources.py +76 -76
- genie_tts/Core/__pycache__/Inference.cpython-311.pyc +0 -0
- genie_tts/Core/__pycache__/Resources.cpython-311.pyc +0 -0
- genie_tts/Core/__pycache__/TTSPlayer.cpython-311.pyc +0 -0
- genie_tts/Core/__pycache__/__init__.cpython-311.pyc +0 -0
- genie_tts/Data/v2/Keys/t2s_onnx_keys.txt +291 -291
- genie_tts/Data/v2/Keys/vits_onnx_keys.txt +668 -668
- genie_tts/Data/v2ProPlus/Keys/prompt_encoder_weights.txt +23 -23
- genie_tts/Data/v2ProPlus/Keys/vits_weights.txt +650 -650
- genie_tts/G2P/Chinese/CorrectPronunciation.py +50 -50
- genie_tts/G2P/Chinese/Erhua.py +49 -49
- genie_tts/G2P/Chinese/Normalization/__pycache__/__init__.cpython-311.pyc +0 -0
- genie_tts/G2P/Chinese/Normalization/__pycache__/char_convert.cpython-311.pyc +0 -0
- genie_tts/G2P/Chinese/Normalization/__pycache__/chronology.cpython-311.pyc +0 -0
- genie_tts/G2P/Chinese/Normalization/__pycache__/constants.cpython-311.pyc +0 -0
- genie_tts/G2P/Chinese/Normalization/__pycache__/num.cpython-311.pyc +0 -0
- genie_tts/G2P/Chinese/Normalization/__pycache__/phonecode.cpython-311.pyc +0 -0
- genie_tts/G2P/Chinese/Normalization/__pycache__/quantifier.cpython-311.pyc +0 -0
- genie_tts/G2P/Chinese/Normalization/__pycache__/text_normlization.cpython-311.pyc +0 -0
- genie_tts/G2P/Chinese/ToneSandhi.py +354 -354
- genie_tts/G2P/Chinese/__pycache__/ChineseG2P.cpython-311.pyc +0 -0
- genie_tts/G2P/Chinese/__pycache__/CorrectPronunciation.cpython-311.pyc +0 -0
- genie_tts/G2P/Chinese/__pycache__/Erhua.cpython-311.pyc +0 -0
- genie_tts/G2P/Chinese/__pycache__/ToneSandhi.cpython-311.pyc +0 -0
- genie_tts/G2P/Chinese/__pycache__/__init__.cpython-311.pyc +0 -0
- genie_tts/G2P/English/EnglishG2P.py +296 -296
app.py
CHANGED
|
@@ -66,6 +66,30 @@ async def load_model(character_name: str = Form(...), model_path: str = Form(...
|
|
| 66 |
try:
|
| 67 |
print(f"📦 Loading character: {character_name} from {full_path}")
|
| 68 |
genie_tts.load_character(character_name, full_path, language)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
return {"status": "success", "message": f"Character '{character_name}' loaded."}
|
| 70 |
except Exception as e:
|
| 71 |
raise HTTPException(status_code=500, detail=str(e))
|
|
@@ -76,12 +100,21 @@ async def upload_and_tts(
|
|
| 76 |
prompt_text: str = Form(...),
|
| 77 |
text: str = Form(...),
|
| 78 |
language: str = Form("zh"),
|
|
|
|
| 79 |
file: UploadFile = File(...)
|
| 80 |
):
|
| 81 |
"""
|
| 82 |
上传临时参考音频并生成语音
|
| 83 |
"""
|
| 84 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
ts = int(time.time() * 1000)
|
| 86 |
save_path = f"/tmp/ref_{ts}.wav"
|
| 87 |
os.makedirs("/tmp", exist_ok=True)
|
|
@@ -89,23 +122,37 @@ async def upload_and_tts(
|
|
| 89 |
with open(save_path, "wb") as buffer:
|
| 90 |
shutil.copyfileobj(file.file, buffer)
|
| 91 |
|
| 92 |
-
print(f"🔥 [Custom] Using temp audio
|
| 93 |
genie_tts.set_reference_audio(character_name, save_path, prompt_text, language)
|
| 94 |
|
| 95 |
out_path = f"/tmp/out_{ts}.wav"
|
| 96 |
-
|
|
|
|
| 97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
def iterfile():
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
|
| 106 |
return StreamingResponse(iterfile(), media_type="audio/wav")
|
| 107 |
except Exception as e:
|
| 108 |
-
print(f"❌ Error in upload/tts: {e}")
|
| 109 |
raise HTTPException(status_code=500, detail=str(e))
|
| 110 |
|
| 111 |
@app.post("/tts")
|
|
@@ -138,6 +185,12 @@ async def dynamic_tts(
|
|
| 138 |
out_path = f"/tmp/out_dyn_{int(time.time())}.wav"
|
| 139 |
genie_tts.tts(character_name, text, save_path=out_path, play=False, text_language=text_lang)
|
| 140 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
return StreamingResponse(open(out_path, "rb"), media_type="audio/wav")
|
| 142 |
except Exception as e:
|
| 143 |
print(f"❌ Error: {e}")
|
|
|
|
| 66 |
try:
|
| 67 |
print(f"📦 Loading character: {character_name} from {full_path}")
|
| 68 |
genie_tts.load_character(character_name, full_path, language)
|
| 69 |
+
|
| 70 |
+
# 自动探测参考音频配置
|
| 71 |
+
prompt_json_path = os.path.join(full_path, "prompt_wav.json")
|
| 72 |
+
ref_wav_path = os.path.join(full_path, "ref.wav")
|
| 73 |
+
|
| 74 |
+
if os.path.exists(prompt_json_path):
|
| 75 |
+
import json
|
| 76 |
+
with open(prompt_json_path, "r", encoding="utf-8") as f:
|
| 77 |
+
data = json.load(f)
|
| 78 |
+
config = data.get("default", {})
|
| 79 |
+
REF_CACHE[character_name] = {
|
| 80 |
+
"path": os.path.join(full_path, config.get("wav_path", "ref.wav")),
|
| 81 |
+
"text": config.get("prompt_text", ""),
|
| 82 |
+
"lang": config.get("prompt_lang", language)
|
| 83 |
+
}
|
| 84 |
+
print(f"📖 Loaded ref info from JSON for {character_name}")
|
| 85 |
+
elif os.path.exists(ref_wav_path):
|
| 86 |
+
REF_CACHE[character_name] = {
|
| 87 |
+
"path": ref_wav_path,
|
| 88 |
+
"text": "",
|
| 89 |
+
"lang": language
|
| 90 |
+
}
|
| 91 |
+
print(f"🎵 Found ref.wav for {character_name}")
|
| 92 |
+
|
| 93 |
return {"status": "success", "message": f"Character '{character_name}' loaded."}
|
| 94 |
except Exception as e:
|
| 95 |
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
| 100 |
prompt_text: str = Form(...),
|
| 101 |
text: str = Form(...),
|
| 102 |
language: str = Form("zh"),
|
| 103 |
+
text_lang: str = Form(None),
|
| 104 |
file: UploadFile = File(...)
|
| 105 |
):
|
| 106 |
"""
|
| 107 |
上传临时参考音频并生成语音
|
| 108 |
"""
|
| 109 |
try:
|
| 110 |
+
# 🟢 确保模型已加载
|
| 111 |
+
if not genie_tts.model_manager.get(character_name):
|
| 112 |
+
print(f"⚠️ Character {character_name} not loaded, trying to load...")
|
| 113 |
+
char_path = os.path.join(MODELS_ROOT, character_name.lower())
|
| 114 |
+
if not os.path.exists(char_path):
|
| 115 |
+
char_path = os.path.join(MODELS_ROOT, "mzm") # 兜底逻辑
|
| 116 |
+
genie_tts.load_character(character_name, char_path, language)
|
| 117 |
+
|
| 118 |
ts = int(time.time() * 1000)
|
| 119 |
save_path = f"/tmp/ref_{ts}.wav"
|
| 120 |
os.makedirs("/tmp", exist_ok=True)
|
|
|
|
| 122 |
with open(save_path, "wb") as buffer:
|
| 123 |
shutil.copyfileobj(file.file, buffer)
|
| 124 |
|
| 125 |
+
print(f"🔥 [Custom] Using temp audio: {save_path}")
|
| 126 |
genie_tts.set_reference_audio(character_name, save_path, prompt_text, language)
|
| 127 |
|
| 128 |
out_path = f"/tmp/out_{ts}.wav"
|
| 129 |
+
# 🟢 执行 TTS
|
| 130 |
+
genie_tts.tts(character_name, text, save_path=out_path, play=False, text_language=text_lang)
|
| 131 |
|
| 132 |
+
# 🟢 关键:强制等待文件出现(最多等5秒)
|
| 133 |
+
wait_time = 0
|
| 134 |
+
while not os.path.exists(out_path) and wait_time < 50:
|
| 135 |
+
time.sleep(0.1)
|
| 136 |
+
wait_time += 1
|
| 137 |
+
|
| 138 |
+
if not os.path.exists(out_path):
|
| 139 |
+
raise HTTPException(status_code=500, detail="Audio file generation timed out or failed.")
|
| 140 |
+
|
| 141 |
def iterfile():
|
| 142 |
+
try:
|
| 143 |
+
with open(out_path, "rb") as f:
|
| 144 |
+
yield from f
|
| 145 |
+
finally:
|
| 146 |
+
# 给一点延迟确保读取完毕后再删除
|
| 147 |
+
time.sleep(1)
|
| 148 |
+
try:
|
| 149 |
+
if os.path.exists(save_path): os.remove(save_path)
|
| 150 |
+
if os.path.exists(out_path): os.remove(out_path)
|
| 151 |
+
except: pass
|
| 152 |
|
| 153 |
return StreamingResponse(iterfile(), media_type="audio/wav")
|
| 154 |
except Exception as e:
|
| 155 |
+
print(f"❌ Error in upload/tts: {str(e)}")
|
| 156 |
raise HTTPException(status_code=500, detail=str(e))
|
| 157 |
|
| 158 |
@app.post("/tts")
|
|
|
|
| 185 |
out_path = f"/tmp/out_dyn_{int(time.time())}.wav"
|
| 186 |
genie_tts.tts(character_name, text, save_path=out_path, play=False, text_language=text_lang)
|
| 187 |
|
| 188 |
+
# 🟢 同样增加文件等待
|
| 189 |
+
wait_time = 0
|
| 190 |
+
while not os.path.exists(out_path) and wait_time < 50:
|
| 191 |
+
time.sleep(0.1)
|
| 192 |
+
wait_time += 1
|
| 193 |
+
|
| 194 |
return StreamingResponse(open(out_path, "rb"), media_type="audio/wav")
|
| 195 |
except Exception as e:
|
| 196 |
print(f"❌ Error: {e}")
|
genie_tts/Audio/Audio.py
CHANGED
|
@@ -1,51 +1,51 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import soundfile as sf
|
| 3 |
-
import soxr
|
| 4 |
-
import numpy as np
|
| 5 |
-
import logging
|
| 6 |
-
from typing import Optional
|
| 7 |
-
|
| 8 |
-
logger = logging.getLogger(__name__)
|
| 9 |
-
|
| 10 |
-
# 音频时长建议范围 (秒)
|
| 11 |
-
MIN_DURATION_S = 3
|
| 12 |
-
MAX_DURATION_S = 10
|
| 13 |
-
# 在音频末尾追加的静音时长 (秒)
|
| 14 |
-
SILENCE_TO_APPEND_S = 0.3
|
| 15 |
-
# 模型期望的目标采样率
|
| 16 |
-
TARGET_SAMPLING_RATE = 16000
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
def load_audio(
|
| 20 |
-
audio_path: str,
|
| 21 |
-
target_sampling_rate: int = TARGET_SAMPLING_RATE
|
| 22 |
-
) -> Optional[np.ndarray]:
|
| 23 |
-
try:
|
| 24 |
-
wav, original_sr = sf.read(audio_path, dtype='float32')
|
| 25 |
-
if wav.ndim > 1:
|
| 26 |
-
wav = np.mean(wav, axis=1) # 多声道转单声道。
|
| 27 |
-
if original_sr != target_sampling_rate:
|
| 28 |
-
wav = soxr.resample(wav, original_sr, target_sampling_rate, quality='hq') # 重采样。
|
| 29 |
-
|
| 30 |
-
except Exception as e:
|
| 31 |
-
logger.error(f"Failed to load reference audio: {audio_path}. Error: {e}")
|
| 32 |
-
return None
|
| 33 |
-
|
| 34 |
-
# 检查音频长度是否在建议范围之外
|
| 35 |
-
min_samples = int(MIN_DURATION_S * target_sampling_rate)
|
| 36 |
-
max_samples = int(MAX_DURATION_S * target_sampling_rate)
|
| 37 |
-
if not (min_samples <= wav.shape[0] <= max_samples):
|
| 38 |
-
duration = len(wav) / target_sampling_rate
|
| 39 |
-
logger.warning(
|
| 40 |
-
f"The reference audio '{os.path.basename(audio_path)}' has a duration of {duration:.2f} seconds, "
|
| 41 |
-
f"which is outside the recommended range of {MIN_DURATION_S} to {MAX_DURATION_S} seconds!"
|
| 42 |
-
)
|
| 43 |
-
|
| 44 |
-
# 创建并拼接静音
|
| 45 |
-
silence_samples = int(SILENCE_TO_APPEND_S * target_sampling_rate)
|
| 46 |
-
silence_array = np.zeros(silence_samples, dtype=np.float32)
|
| 47 |
-
wav_processed = np.concatenate([wav, silence_array])
|
| 48 |
-
|
| 49 |
-
# 为模型输入增加批次维度
|
| 50 |
-
# wav_processed = np.expand_dims(wav_processed, axis=0)
|
| 51 |
-
return wav_processed
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import soundfile as sf
|
| 3 |
+
import soxr
|
| 4 |
+
import numpy as np
|
| 5 |
+
import logging
|
| 6 |
+
from typing import Optional
|
| 7 |
+
|
| 8 |
+
logger = logging.getLogger(__name__)
|
| 9 |
+
|
| 10 |
+
# 音频时长建议范围 (秒)
|
| 11 |
+
MIN_DURATION_S = 3
|
| 12 |
+
MAX_DURATION_S = 10
|
| 13 |
+
# 在音频末尾追加的静音时长 (秒)
|
| 14 |
+
SILENCE_TO_APPEND_S = 0.3
|
| 15 |
+
# 模型期望的目标采样率
|
| 16 |
+
TARGET_SAMPLING_RATE = 16000
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def load_audio(
|
| 20 |
+
audio_path: str,
|
| 21 |
+
target_sampling_rate: int = TARGET_SAMPLING_RATE
|
| 22 |
+
) -> Optional[np.ndarray]:
|
| 23 |
+
try:
|
| 24 |
+
wav, original_sr = sf.read(audio_path, dtype='float32')
|
| 25 |
+
if wav.ndim > 1:
|
| 26 |
+
wav = np.mean(wav, axis=1) # 多声道转单声道。
|
| 27 |
+
if original_sr != target_sampling_rate:
|
| 28 |
+
wav = soxr.resample(wav, original_sr, target_sampling_rate, quality='hq') # 重采样。
|
| 29 |
+
|
| 30 |
+
except Exception as e:
|
| 31 |
+
logger.error(f"Failed to load reference audio: {audio_path}. Error: {e}")
|
| 32 |
+
return None
|
| 33 |
+
|
| 34 |
+
# 检查音频长度是否在建议范围之外
|
| 35 |
+
min_samples = int(MIN_DURATION_S * target_sampling_rate)
|
| 36 |
+
max_samples = int(MAX_DURATION_S * target_sampling_rate)
|
| 37 |
+
if not (min_samples <= wav.shape[0] <= max_samples):
|
| 38 |
+
duration = len(wav) / target_sampling_rate
|
| 39 |
+
logger.warning(
|
| 40 |
+
f"The reference audio '{os.path.basename(audio_path)}' has a duration of {duration:.2f} seconds, "
|
| 41 |
+
f"which is outside the recommended range of {MIN_DURATION_S} to {MAX_DURATION_S} seconds!"
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
# 创建并拼接静音
|
| 45 |
+
silence_samples = int(SILENCE_TO_APPEND_S * target_sampling_rate)
|
| 46 |
+
silence_array = np.zeros(silence_samples, dtype=np.float32)
|
| 47 |
+
wav_processed = np.concatenate([wav, silence_array])
|
| 48 |
+
|
| 49 |
+
# 为模型输入增加批次维度
|
| 50 |
+
# wav_processed = np.expand_dims(wav_processed, axis=0)
|
| 51 |
+
return wav_processed
|
genie_tts/Audio/__pycache__/Audio.cpython-311.pyc
ADDED
|
Binary file (2.61 kB). View file
|
|
|
genie_tts/Audio/__pycache__/ReferenceAudio.cpython-311.pyc
ADDED
|
Binary file (4.63 kB). View file
|
|
|
genie_tts/Audio/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (184 Bytes). View file
|
|
|
genie_tts/Converter/Converter.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
-
from .v2.Converter import convert as convert_v2
|
| 2 |
-
from .v2ProPlus.Converter import convert as convert_v2pp
|
| 3 |
-
|
| 4 |
-
import os
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
def convert(torch_ckpt_path: str, torch_pth_path: str, output_dir: str) -> None:
|
| 8 |
-
if os.path.getsize(torch_pth_path) > 150 * 1024 * 1024: # 大于 150 MB
|
| 9 |
-
convert_v2pp(torch_ckpt_path, torch_pth_path, output_dir)
|
| 10 |
-
else:
|
| 11 |
-
convert_v2(torch_ckpt_path, torch_pth_path, output_dir)
|
|
|
|
| 1 |
+
from .v2.Converter import convert as convert_v2
|
| 2 |
+
from .v2ProPlus.Converter import convert as convert_v2pp
|
| 3 |
+
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def convert(torch_ckpt_path: str, torch_pth_path: str, output_dir: str) -> None:
|
| 8 |
+
if os.path.getsize(torch_pth_path) > 150 * 1024 * 1024: # 大于 150 MB
|
| 9 |
+
convert_v2pp(torch_ckpt_path, torch_pth_path, output_dir)
|
| 10 |
+
else:
|
| 11 |
+
convert_v2(torch_ckpt_path, torch_pth_path, output_dir)
|
genie_tts/Converter/__pycache__/Converter.cpython-311.pyc
ADDED
|
Binary file (838 Bytes). View file
|
|
|
genie_tts/Converter/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (188 Bytes). View file
|
|
|
genie_tts/Converter/__pycache__/load_state_dict.cpython-311.pyc
ADDED
|
Binary file (1.56 kB). View file
|
|
|
genie_tts/Converter/__pycache__/utils.cpython-311.pyc
ADDED
|
Binary file (2.33 kB). View file
|
|
|
genie_tts/Converter/load_state_dict.py
CHANGED
|
@@ -1,26 +1,26 @@
|
|
| 1 |
-
import sys
|
| 2 |
-
import os
|
| 3 |
-
|
| 4 |
-
sys.path.append(os.path.dirname(__file__))
|
| 5 |
-
|
| 6 |
-
import torch
|
| 7 |
-
from io import BytesIO
|
| 8 |
-
import utils
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
def load_sovits_model(pth_path: str, device: str = 'cpu'):
|
| 12 |
-
f = open(pth_path, "rb")
|
| 13 |
-
meta = f.read(2)
|
| 14 |
-
if meta != b"PK":
|
| 15 |
-
# noinspection PyTypeChecker
|
| 16 |
-
data = b"PK" + f.read()
|
| 17 |
-
bio = BytesIO()
|
| 18 |
-
# noinspection PyTypeChecker
|
| 19 |
-
bio.write(data)
|
| 20 |
-
bio.seek(0)
|
| 21 |
-
return torch.load(bio, map_location=device, weights_only=False)
|
| 22 |
-
return torch.load(pth_path, map_location=device, weights_only=False)
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
def load_gpt_model(ckpt_path: str, device: str = 'cpu'):
|
| 26 |
-
return torch.load(ckpt_path, map_location=device, weights_only=True)
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
sys.path.append(os.path.dirname(__file__))
|
| 5 |
+
|
| 6 |
+
import torch
|
| 7 |
+
from io import BytesIO
|
| 8 |
+
import utils
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def load_sovits_model(pth_path: str, device: str = 'cpu'):
|
| 12 |
+
f = open(pth_path, "rb")
|
| 13 |
+
meta = f.read(2)
|
| 14 |
+
if meta != b"PK":
|
| 15 |
+
# noinspection PyTypeChecker
|
| 16 |
+
data = b"PK" + f.read()
|
| 17 |
+
bio = BytesIO()
|
| 18 |
+
# noinspection PyTypeChecker
|
| 19 |
+
bio.write(data)
|
| 20 |
+
bio.seek(0)
|
| 21 |
+
return torch.load(bio, map_location=device, weights_only=False)
|
| 22 |
+
return torch.load(pth_path, map_location=device, weights_only=False)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def load_gpt_model(ckpt_path: str, device: str = 'cpu'):
|
| 26 |
+
return torch.load(ckpt_path, map_location=device, weights_only=True)
|
genie_tts/Converter/v2/Converter.py
CHANGED
|
@@ -1,146 +1,146 @@
|
|
| 1 |
-
from .VITSConverter import VITSConverter
|
| 2 |
-
from .T2SConverter import T2SModelConverter
|
| 3 |
-
from .EncoderConverter import EncoderConverter
|
| 4 |
-
from ...Utils.Constants import PACKAGE_NAME
|
| 5 |
-
|
| 6 |
-
import logging
|
| 7 |
-
from typing import Optional, Tuple
|
| 8 |
-
import re
|
| 9 |
-
import os
|
| 10 |
-
import shutil
|
| 11 |
-
import traceback
|
| 12 |
-
import importlib.resources
|
| 13 |
-
import contextlib
|
| 14 |
-
|
| 15 |
-
logger = logging.getLogger()
|
| 16 |
-
|
| 17 |
-
CACHE_DIR = os.path.join(os.getcwd(), "Cache")
|
| 18 |
-
ENCODER_RESOURCE_PATH = "Data/v2/Models/t2s_encoder_fp32.onnx"
|
| 19 |
-
STAGE_DECODER_RESOURCE_PATH = "Data/v2/Models/t2s_stage_decoder_fp32.onnx"
|
| 20 |
-
FIRST_STAGE_DECODER_RESOURCE_PATH = "Data/v2/Models/t2s_first_stage_decoder_fp32.onnx"
|
| 21 |
-
VITS_RESOURCE_PATH = "Data/v2/Models/vits_fp32.onnx"
|
| 22 |
-
T2S_KEYS_RESOURCE_PATH = "Data/v2/Keys/t2s_onnx_keys.txt"
|
| 23 |
-
VITS_KEYS_RESOURCE_PATH = "Data/v2/Keys/vits_onnx_keys.txt"
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
def find_ckpt_and_pth(directory: str) -> Tuple[Optional[str], Optional[str]]:
|
| 27 |
-
"""
|
| 28 |
-
在 directory(不递归子目录)里查找:
|
| 29 |
-
- .ckpt:从所有 .ckpt 文件名中搜索 'e{正整数}' 作为 epoch(找不到则视为 e0),
|
| 30 |
-
选择 epoch 最大的那个文件(若无则为 None)
|
| 31 |
-
- .pth :从所有 .pth 文件名中搜索 'e{正整数}' 作为 epoch(找不到则视为 e0),
|
| 32 |
-
选择 epoch 最大的那个文件(若无则为 None)
|
| 33 |
-
若出现相同 epoch,选修改时间较新的文件以打破平手。
|
| 34 |
-
"""
|
| 35 |
-
best_ckpt_path: Optional[str] = None
|
| 36 |
-
best_ckpt_epoch: int = -1
|
| 37 |
-
|
| 38 |
-
best_pth_path: Optional[str] = None
|
| 39 |
-
best_pth_epoch: int = -1
|
| 40 |
-
|
| 41 |
-
for filename in os.listdir(directory):
|
| 42 |
-
full_path = os.path.join(directory, filename)
|
| 43 |
-
|
| 44 |
-
if not os.path.isfile(full_path):
|
| 45 |
-
continue
|
| 46 |
-
|
| 47 |
-
# 提取 epoch
|
| 48 |
-
m = re.search(r"e(\d+)", filename, flags=re.IGNORECASE)
|
| 49 |
-
epoch = int(m.group(1)) if m else 0
|
| 50 |
-
|
| 51 |
-
# .ckpt 文件处理
|
| 52 |
-
if filename.lower().endswith(".ckpt"):
|
| 53 |
-
if (
|
| 54 |
-
epoch > best_ckpt_epoch
|
| 55 |
-
or (
|
| 56 |
-
epoch == best_ckpt_epoch
|
| 57 |
-
and best_ckpt_path is not None
|
| 58 |
-
and os.path.getmtime(full_path) > os.path.getmtime(best_ckpt_path)
|
| 59 |
-
)
|
| 60 |
-
):
|
| 61 |
-
best_ckpt_epoch = epoch
|
| 62 |
-
best_ckpt_path = full_path
|
| 63 |
-
|
| 64 |
-
# .pth 文件处理
|
| 65 |
-
elif filename.lower().endswith(".pth"):
|
| 66 |
-
if (
|
| 67 |
-
epoch > best_pth_epoch
|
| 68 |
-
or (
|
| 69 |
-
epoch == best_pth_epoch
|
| 70 |
-
and best_pth_path is not None
|
| 71 |
-
and os.path.getmtime(full_path) > os.path.getmtime(best_pth_path)
|
| 72 |
-
)
|
| 73 |
-
):
|
| 74 |
-
best_pth_epoch = epoch
|
| 75 |
-
best_pth_path = full_path
|
| 76 |
-
|
| 77 |
-
return best_ckpt_path, best_pth_path
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
def remove_folder(folder: str) -> None:
|
| 81 |
-
try:
|
| 82 |
-
if os.path.exists(folder):
|
| 83 |
-
shutil.rmtree(folder)
|
| 84 |
-
logger.info(f"🧹 Folder cleaned: {folder}")
|
| 85 |
-
except Exception as e:
|
| 86 |
-
logger.error(f"❌ Failed to clean folder {folder}: {e}")
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
def convert(torch_ckpt_path: str,
|
| 90 |
-
torch_pth_path: str,
|
| 91 |
-
output_dir: str):
|
| 92 |
-
# 确保缓存和输出目录存在
|
| 93 |
-
os.makedirs(CACHE_DIR, exist_ok=True)
|
| 94 |
-
os.makedirs(output_dir, exist_ok=True)
|
| 95 |
-
|
| 96 |
-
if len(os.listdir(output_dir)) > 0:
|
| 97 |
-
logger.warning(f"The output directory {output_dir} is not empty!")
|
| 98 |
-
|
| 99 |
-
with contextlib.ExitStack() as stack:
|
| 100 |
-
files = importlib.resources.files(PACKAGE_NAME)
|
| 101 |
-
|
| 102 |
-
def enter(p):
|
| 103 |
-
return stack.enter_context(importlib.resources.as_file(files.joinpath(p)))
|
| 104 |
-
|
| 105 |
-
encoder_onnx_path = enter(ENCODER_RESOURCE_PATH)
|
| 106 |
-
stage_decoder_path = enter(STAGE_DECODER_RESOURCE_PATH)
|
| 107 |
-
first_stage_decoder_path = enter(FIRST_STAGE_DECODER_RESOURCE_PATH)
|
| 108 |
-
vits_onnx_path = enter(VITS_RESOURCE_PATH)
|
| 109 |
-
t2s_keys_path = enter(T2S_KEYS_RESOURCE_PATH)
|
| 110 |
-
vits_keys_path = enter(VITS_KEYS_RESOURCE_PATH)
|
| 111 |
-
|
| 112 |
-
converter_1 = T2SModelConverter(
|
| 113 |
-
torch_ckpt_path=torch_ckpt_path,
|
| 114 |
-
stage_decoder_onnx_path=str(stage_decoder_path),
|
| 115 |
-
first_stage_decoder_onnx_path=str(first_stage_decoder_path),
|
| 116 |
-
key_list_file=str(t2s_keys_path),
|
| 117 |
-
output_dir=output_dir,
|
| 118 |
-
cache_dir=CACHE_DIR,
|
| 119 |
-
)
|
| 120 |
-
converter_2 = VITSConverter(
|
| 121 |
-
torch_pth_path=torch_pth_path,
|
| 122 |
-
vits_onnx_path=str(vits_onnx_path),
|
| 123 |
-
key_list_file=str(vits_keys_path),
|
| 124 |
-
output_dir=output_dir,
|
| 125 |
-
cache_dir=CACHE_DIR,
|
| 126 |
-
)
|
| 127 |
-
converter_3 = EncoderConverter(
|
| 128 |
-
ckpt_path=torch_ckpt_path,
|
| 129 |
-
pth_path=torch_pth_path,
|
| 130 |
-
onnx_input_path=str(encoder_onnx_path),
|
| 131 |
-
output_dir=output_dir,
|
| 132 |
-
)
|
| 133 |
-
|
| 134 |
-
try:
|
| 135 |
-
converter_1.run_full_process()
|
| 136 |
-
converter_2.run_full_process()
|
| 137 |
-
converter_3.run_full_process()
|
| 138 |
-
logger.info(f"🎉 Conversion successful! Saved to: {os.path.abspath(output_dir)}\n"
|
| 139 |
-
f"- Model Type: V2")
|
| 140 |
-
except Exception:
|
| 141 |
-
logger.error(f"❌ A critical error occurred during the conversion process")
|
| 142 |
-
logger.error(traceback.format_exc())
|
| 143 |
-
remove_folder(output_dir) # 只在失败时清理输出目录
|
| 144 |
-
finally:
|
| 145 |
-
# 无论成功还是失败,都尝试清理缓存目录
|
| 146 |
-
remove_folder(CACHE_DIR)
|
|
|
|
| 1 |
+
from .VITSConverter import VITSConverter
|
| 2 |
+
from .T2SConverter import T2SModelConverter
|
| 3 |
+
from .EncoderConverter import EncoderConverter
|
| 4 |
+
from ...Utils.Constants import PACKAGE_NAME
|
| 5 |
+
|
| 6 |
+
import logging
|
| 7 |
+
from typing import Optional, Tuple
|
| 8 |
+
import re
|
| 9 |
+
import os
|
| 10 |
+
import shutil
|
| 11 |
+
import traceback
|
| 12 |
+
import importlib.resources
|
| 13 |
+
import contextlib
|
| 14 |
+
|
| 15 |
+
logger = logging.getLogger()
|
| 16 |
+
|
| 17 |
+
CACHE_DIR = os.path.join(os.getcwd(), "Cache")
|
| 18 |
+
ENCODER_RESOURCE_PATH = "Data/v2/Models/t2s_encoder_fp32.onnx"
|
| 19 |
+
STAGE_DECODER_RESOURCE_PATH = "Data/v2/Models/t2s_stage_decoder_fp32.onnx"
|
| 20 |
+
FIRST_STAGE_DECODER_RESOURCE_PATH = "Data/v2/Models/t2s_first_stage_decoder_fp32.onnx"
|
| 21 |
+
VITS_RESOURCE_PATH = "Data/v2/Models/vits_fp32.onnx"
|
| 22 |
+
T2S_KEYS_RESOURCE_PATH = "Data/v2/Keys/t2s_onnx_keys.txt"
|
| 23 |
+
VITS_KEYS_RESOURCE_PATH = "Data/v2/Keys/vits_onnx_keys.txt"
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def find_ckpt_and_pth(directory: str) -> Tuple[Optional[str], Optional[str]]:
|
| 27 |
+
"""
|
| 28 |
+
在 directory(不递归子目录)里查找:
|
| 29 |
+
- .ckpt:从所有 .ckpt 文件名中搜索 'e{正整数}' 作为 epoch(找不到则视为 e0),
|
| 30 |
+
选择 epoch 最大的那个文件(若无则为 None)
|
| 31 |
+
- .pth :从所有 .pth 文件名中搜索 'e{正整数}' 作为 epoch(找不到则视为 e0),
|
| 32 |
+
选择 epoch 最大的那个文件(若无则为 None)
|
| 33 |
+
若出现相同 epoch,选修改时间较新的文件以打破平手。
|
| 34 |
+
"""
|
| 35 |
+
best_ckpt_path: Optional[str] = None
|
| 36 |
+
best_ckpt_epoch: int = -1
|
| 37 |
+
|
| 38 |
+
best_pth_path: Optional[str] = None
|
| 39 |
+
best_pth_epoch: int = -1
|
| 40 |
+
|
| 41 |
+
for filename in os.listdir(directory):
|
| 42 |
+
full_path = os.path.join(directory, filename)
|
| 43 |
+
|
| 44 |
+
if not os.path.isfile(full_path):
|
| 45 |
+
continue
|
| 46 |
+
|
| 47 |
+
# 提取 epoch
|
| 48 |
+
m = re.search(r"e(\d+)", filename, flags=re.IGNORECASE)
|
| 49 |
+
epoch = int(m.group(1)) if m else 0
|
| 50 |
+
|
| 51 |
+
# .ckpt 文件处理
|
| 52 |
+
if filename.lower().endswith(".ckpt"):
|
| 53 |
+
if (
|
| 54 |
+
epoch > best_ckpt_epoch
|
| 55 |
+
or (
|
| 56 |
+
epoch == best_ckpt_epoch
|
| 57 |
+
and best_ckpt_path is not None
|
| 58 |
+
and os.path.getmtime(full_path) > os.path.getmtime(best_ckpt_path)
|
| 59 |
+
)
|
| 60 |
+
):
|
| 61 |
+
best_ckpt_epoch = epoch
|
| 62 |
+
best_ckpt_path = full_path
|
| 63 |
+
|
| 64 |
+
# .pth 文件处理
|
| 65 |
+
elif filename.lower().endswith(".pth"):
|
| 66 |
+
if (
|
| 67 |
+
epoch > best_pth_epoch
|
| 68 |
+
or (
|
| 69 |
+
epoch == best_pth_epoch
|
| 70 |
+
and best_pth_path is not None
|
| 71 |
+
and os.path.getmtime(full_path) > os.path.getmtime(best_pth_path)
|
| 72 |
+
)
|
| 73 |
+
):
|
| 74 |
+
best_pth_epoch = epoch
|
| 75 |
+
best_pth_path = full_path
|
| 76 |
+
|
| 77 |
+
return best_ckpt_path, best_pth_path
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def remove_folder(folder: str) -> None:
|
| 81 |
+
try:
|
| 82 |
+
if os.path.exists(folder):
|
| 83 |
+
shutil.rmtree(folder)
|
| 84 |
+
logger.info(f"🧹 Folder cleaned: {folder}")
|
| 85 |
+
except Exception as e:
|
| 86 |
+
logger.error(f"❌ Failed to clean folder {folder}: {e}")
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def convert(torch_ckpt_path: str,
|
| 90 |
+
torch_pth_path: str,
|
| 91 |
+
output_dir: str):
|
| 92 |
+
# 确保缓存和输出目录存在
|
| 93 |
+
os.makedirs(CACHE_DIR, exist_ok=True)
|
| 94 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 95 |
+
|
| 96 |
+
if len(os.listdir(output_dir)) > 0:
|
| 97 |
+
logger.warning(f"The output directory {output_dir} is not empty!")
|
| 98 |
+
|
| 99 |
+
with contextlib.ExitStack() as stack:
|
| 100 |
+
files = importlib.resources.files(PACKAGE_NAME)
|
| 101 |
+
|
| 102 |
+
def enter(p):
|
| 103 |
+
return stack.enter_context(importlib.resources.as_file(files.joinpath(p)))
|
| 104 |
+
|
| 105 |
+
encoder_onnx_path = enter(ENCODER_RESOURCE_PATH)
|
| 106 |
+
stage_decoder_path = enter(STAGE_DECODER_RESOURCE_PATH)
|
| 107 |
+
first_stage_decoder_path = enter(FIRST_STAGE_DECODER_RESOURCE_PATH)
|
| 108 |
+
vits_onnx_path = enter(VITS_RESOURCE_PATH)
|
| 109 |
+
t2s_keys_path = enter(T2S_KEYS_RESOURCE_PATH)
|
| 110 |
+
vits_keys_path = enter(VITS_KEYS_RESOURCE_PATH)
|
| 111 |
+
|
| 112 |
+
converter_1 = T2SModelConverter(
|
| 113 |
+
torch_ckpt_path=torch_ckpt_path,
|
| 114 |
+
stage_decoder_onnx_path=str(stage_decoder_path),
|
| 115 |
+
first_stage_decoder_onnx_path=str(first_stage_decoder_path),
|
| 116 |
+
key_list_file=str(t2s_keys_path),
|
| 117 |
+
output_dir=output_dir,
|
| 118 |
+
cache_dir=CACHE_DIR,
|
| 119 |
+
)
|
| 120 |
+
converter_2 = VITSConverter(
|
| 121 |
+
torch_pth_path=torch_pth_path,
|
| 122 |
+
vits_onnx_path=str(vits_onnx_path),
|
| 123 |
+
key_list_file=str(vits_keys_path),
|
| 124 |
+
output_dir=output_dir,
|
| 125 |
+
cache_dir=CACHE_DIR,
|
| 126 |
+
)
|
| 127 |
+
converter_3 = EncoderConverter(
|
| 128 |
+
ckpt_path=torch_ckpt_path,
|
| 129 |
+
pth_path=torch_pth_path,
|
| 130 |
+
onnx_input_path=str(encoder_onnx_path),
|
| 131 |
+
output_dir=output_dir,
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
try:
|
| 135 |
+
converter_1.run_full_process()
|
| 136 |
+
converter_2.run_full_process()
|
| 137 |
+
converter_3.run_full_process()
|
| 138 |
+
logger.info(f"🎉 Conversion successful! Saved to: {os.path.abspath(output_dir)}\n"
|
| 139 |
+
f"- Model Type: V2")
|
| 140 |
+
except Exception:
|
| 141 |
+
logger.error(f"❌ A critical error occurred during the conversion process")
|
| 142 |
+
logger.error(traceback.format_exc())
|
| 143 |
+
remove_folder(output_dir) # 只在失败时清理输出目录
|
| 144 |
+
finally:
|
| 145 |
+
# 无论成功还是失败,都尝试清理缓存目录
|
| 146 |
+
remove_folder(CACHE_DIR)
|
genie_tts/Converter/v2/EncoderConverter.py
CHANGED
|
@@ -1,106 +1,106 @@
|
|
| 1 |
-
import torch
|
| 2 |
-
import onnx
|
| 3 |
-
import os
|
| 4 |
-
|
| 5 |
-
from ..load_state_dict import load_gpt_model, load_sovits_model
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
class EncoderConverter:
|
| 9 |
-
"""
|
| 10 |
-
一个转换器,用于为 t2s_encoder 模型创建:
|
| 11 |
-
1. 一个从 .ckpt 和 .pth 文件中合并而来的全精度 (fp32) .bin 权重文件。
|
| 12 |
-
2. 一个链接到该 .bin 文件的 ONNX 模型。
|
| 13 |
-
"""
|
| 14 |
-
|
| 15 |
-
def __init__(self,
|
| 16 |
-
ckpt_path: str,
|
| 17 |
-
pth_path: str,
|
| 18 |
-
onnx_input_path: str,
|
| 19 |
-
output_dir: str,
|
| 20 |
-
):
|
| 21 |
-
self.ckpt_path: str = ckpt_path
|
| 22 |
-
self.pth_path: str = pth_path
|
| 23 |
-
self.onnx_input_path: str = onnx_input_path
|
| 24 |
-
self.output_dir: str = output_dir
|
| 25 |
-
|
| 26 |
-
# 定义最终输出文件的路径
|
| 27 |
-
self.output_bin_path: str = os.path.join(self.output_dir, "t2s_encoder_fp32.bin")
|
| 28 |
-
self.output_onnx_path: str = os.path.join(self.output_dir, "t2s_encoder_fp32.onnx")
|
| 29 |
-
|
| 30 |
-
# 确保输出目录存在
|
| 31 |
-
os.makedirs(self.output_dir, exist_ok=True)
|
| 32 |
-
|
| 33 |
-
# 检查所有输入文件是否存在
|
| 34 |
-
for path in [self.ckpt_path, self.pth_path, self.onnx_input_path]:
|
| 35 |
-
if not os.path.exists(path):
|
| 36 |
-
raise FileNotFoundError(f"Error: Input file not found! Path: {path}")
|
| 37 |
-
|
| 38 |
-
def run_full_process(self):
|
| 39 |
-
# 1. 定义固定的 ONNX 权重键列表 (此顺序决定了 .bin 文件的布局)
|
| 40 |
-
onnx_keys = [
|
| 41 |
-
"encoder.ar_text_embedding.word_embeddings.weight",
|
| 42 |
-
"encoder.bert_proj.weight",
|
| 43 |
-
"encoder.bert_proj.bias",
|
| 44 |
-
"encoder.ar_text_position.alpha",
|
| 45 |
-
"vits.ssl_proj.weight",
|
| 46 |
-
"vits.ssl_proj.bias",
|
| 47 |
-
"vits.quantizer.vq.layers.0._codebook.embed"
|
| 48 |
-
]
|
| 49 |
-
|
| 50 |
-
# 2. 加载所有必要的模型和权重
|
| 51 |
-
ckpt_state_dict = load_gpt_model(self.ckpt_path)['weight']
|
| 52 |
-
pth_state_dict = load_sovits_model(self.pth_path)['weight']
|
| 53 |
-
model = onnx.load(self.onnx_input_path, load_external_data=False)
|
| 54 |
-
initializer_map = {init.name: init for init in model.graph.initializer}
|
| 55 |
-
current_offset = 0
|
| 56 |
-
bin_filename = os.path.basename(self.output_bin_path)
|
| 57 |
-
|
| 58 |
-
# 3. 生成 .bin 文件并同步修改 ONNX 模型
|
| 59 |
-
with open(self.output_bin_path, 'wb') as f_bin:
|
| 60 |
-
for onnx_key in onnx_keys:
|
| 61 |
-
source_key = ""
|
| 62 |
-
source_dict = None
|
| 63 |
-
|
| 64 |
-
if onnx_key.startswith("encoder."):
|
| 65 |
-
source_key = "model." + onnx_key[len("encoder."):]
|
| 66 |
-
source_dict = ckpt_state_dict
|
| 67 |
-
elif onnx_key.startswith("vits."):
|
| 68 |
-
source_key = onnx_key[len("vits."):]
|
| 69 |
-
source_dict = pth_state_dict
|
| 70 |
-
|
| 71 |
-
if source_dict is None:
|
| 72 |
-
raise ValueError(
|
| 73 |
-
f"❌ Critical error: Unable to determine the weight source for ONNX key '{onnx_key}'.")
|
| 74 |
-
# 从源文件中提取张量
|
| 75 |
-
tensor = source_dict.get(source_key)
|
| 76 |
-
if tensor is None:
|
| 77 |
-
raise ValueError(
|
| 78 |
-
f"❌ Critical error: Key '{source_key}' (corresponding to ONNX key '{onnx_key}') not found in the source file.")
|
| 79 |
-
|
| 80 |
-
# 转换为 fp32 numpy 数组并获取字节
|
| 81 |
-
numpy_array_fp32 = tensor.to(torch.float32).cpu().numpy()
|
| 82 |
-
tensor_bytes = numpy_array_fp32.tobytes()
|
| 83 |
-
tensor_length = len(tensor_bytes)
|
| 84 |
-
f_bin.write(tensor_bytes)
|
| 85 |
-
|
| 86 |
-
# 在 ONNX 模型中找到对应的 initializer 并修改它
|
| 87 |
-
if onnx_key in initializer_map:
|
| 88 |
-
tensor_proto = initializer_map[onnx_key]
|
| 89 |
-
|
| 90 |
-
tensor_proto.ClearField('raw_data')
|
| 91 |
-
tensor_proto.data_location = onnx.TensorProto.EXTERNAL
|
| 92 |
-
del tensor_proto.external_data[:]
|
| 93 |
-
|
| 94 |
-
keys_to_set = ["location", "offset", "length"]
|
| 95 |
-
values_to_set = [bin_filename, str(current_offset), str(tensor_length)]
|
| 96 |
-
|
| 97 |
-
for k, v in zip(keys_to_set, values_to_set):
|
| 98 |
-
entry = tensor_proto.external_data.add()
|
| 99 |
-
entry.key = k
|
| 100 |
-
entry.value = v
|
| 101 |
-
|
| 102 |
-
# 更新下一个权重的偏移量
|
| 103 |
-
current_offset += tensor_length
|
| 104 |
-
|
| 105 |
-
# 4. 保存修改后的 ONNX 模型
|
| 106 |
-
onnx.save(model, self.output_onnx_path)
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import onnx
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
from ..load_state_dict import load_gpt_model, load_sovits_model
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class EncoderConverter:
|
| 9 |
+
"""
|
| 10 |
+
一个转换器,用于为 t2s_encoder 模型创建:
|
| 11 |
+
1. 一个从 .ckpt 和 .pth 文件中合并而来的全精度 (fp32) .bin 权重文件。
|
| 12 |
+
2. 一个链接到该 .bin 文件的 ONNX 模型。
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
def __init__(self,
|
| 16 |
+
ckpt_path: str,
|
| 17 |
+
pth_path: str,
|
| 18 |
+
onnx_input_path: str,
|
| 19 |
+
output_dir: str,
|
| 20 |
+
):
|
| 21 |
+
self.ckpt_path: str = ckpt_path
|
| 22 |
+
self.pth_path: str = pth_path
|
| 23 |
+
self.onnx_input_path: str = onnx_input_path
|
| 24 |
+
self.output_dir: str = output_dir
|
| 25 |
+
|
| 26 |
+
# 定义最终输出文件的路径
|
| 27 |
+
self.output_bin_path: str = os.path.join(self.output_dir, "t2s_encoder_fp32.bin")
|
| 28 |
+
self.output_onnx_path: str = os.path.join(self.output_dir, "t2s_encoder_fp32.onnx")
|
| 29 |
+
|
| 30 |
+
# 确保输出目录存在
|
| 31 |
+
os.makedirs(self.output_dir, exist_ok=True)
|
| 32 |
+
|
| 33 |
+
# 检查所有输入文件是否存在
|
| 34 |
+
for path in [self.ckpt_path, self.pth_path, self.onnx_input_path]:
|
| 35 |
+
if not os.path.exists(path):
|
| 36 |
+
raise FileNotFoundError(f"Error: Input file not found! Path: {path}")
|
| 37 |
+
|
| 38 |
+
def run_full_process(self):
|
| 39 |
+
# 1. 定义固定的 ONNX 权重键列表 (此顺序决定了 .bin 文件的布局)
|
| 40 |
+
onnx_keys = [
|
| 41 |
+
"encoder.ar_text_embedding.word_embeddings.weight",
|
| 42 |
+
"encoder.bert_proj.weight",
|
| 43 |
+
"encoder.bert_proj.bias",
|
| 44 |
+
"encoder.ar_text_position.alpha",
|
| 45 |
+
"vits.ssl_proj.weight",
|
| 46 |
+
"vits.ssl_proj.bias",
|
| 47 |
+
"vits.quantizer.vq.layers.0._codebook.embed"
|
| 48 |
+
]
|
| 49 |
+
|
| 50 |
+
# 2. 加载所有必要的模型和权重
|
| 51 |
+
ckpt_state_dict = load_gpt_model(self.ckpt_path)['weight']
|
| 52 |
+
pth_state_dict = load_sovits_model(self.pth_path)['weight']
|
| 53 |
+
model = onnx.load(self.onnx_input_path, load_external_data=False)
|
| 54 |
+
initializer_map = {init.name: init for init in model.graph.initializer}
|
| 55 |
+
current_offset = 0
|
| 56 |
+
bin_filename = os.path.basename(self.output_bin_path)
|
| 57 |
+
|
| 58 |
+
# 3. 生成 .bin 文件并同步修改 ONNX 模型
|
| 59 |
+
with open(self.output_bin_path, 'wb') as f_bin:
|
| 60 |
+
for onnx_key in onnx_keys:
|
| 61 |
+
source_key = ""
|
| 62 |
+
source_dict = None
|
| 63 |
+
|
| 64 |
+
if onnx_key.startswith("encoder."):
|
| 65 |
+
source_key = "model." + onnx_key[len("encoder."):]
|
| 66 |
+
source_dict = ckpt_state_dict
|
| 67 |
+
elif onnx_key.startswith("vits."):
|
| 68 |
+
source_key = onnx_key[len("vits."):]
|
| 69 |
+
source_dict = pth_state_dict
|
| 70 |
+
|
| 71 |
+
if source_dict is None:
|
| 72 |
+
raise ValueError(
|
| 73 |
+
f"❌ Critical error: Unable to determine the weight source for ONNX key '{onnx_key}'.")
|
| 74 |
+
# 从源文件中提取张量
|
| 75 |
+
tensor = source_dict.get(source_key)
|
| 76 |
+
if tensor is None:
|
| 77 |
+
raise ValueError(
|
| 78 |
+
f"❌ Critical error: Key '{source_key}' (corresponding to ONNX key '{onnx_key}') not found in the source file.")
|
| 79 |
+
|
| 80 |
+
# 转换为 fp32 numpy 数组并获取字节
|
| 81 |
+
numpy_array_fp32 = tensor.to(torch.float32).cpu().numpy()
|
| 82 |
+
tensor_bytes = numpy_array_fp32.tobytes()
|
| 83 |
+
tensor_length = len(tensor_bytes)
|
| 84 |
+
f_bin.write(tensor_bytes)
|
| 85 |
+
|
| 86 |
+
# 在 ONNX 模型中找到对应的 initializer 并修改它
|
| 87 |
+
if onnx_key in initializer_map:
|
| 88 |
+
tensor_proto = initializer_map[onnx_key]
|
| 89 |
+
|
| 90 |
+
tensor_proto.ClearField('raw_data')
|
| 91 |
+
tensor_proto.data_location = onnx.TensorProto.EXTERNAL
|
| 92 |
+
del tensor_proto.external_data[:]
|
| 93 |
+
|
| 94 |
+
keys_to_set = ["location", "offset", "length"]
|
| 95 |
+
values_to_set = [bin_filename, str(current_offset), str(tensor_length)]
|
| 96 |
+
|
| 97 |
+
for k, v in zip(keys_to_set, values_to_set):
|
| 98 |
+
entry = tensor_proto.external_data.add()
|
| 99 |
+
entry.key = k
|
| 100 |
+
entry.value = v
|
| 101 |
+
|
| 102 |
+
# 更新下一个权重的偏移量
|
| 103 |
+
current_offset += tensor_length
|
| 104 |
+
|
| 105 |
+
# 4. 保存修改后的 ONNX 模型
|
| 106 |
+
onnx.save(model, self.output_onnx_path)
|
genie_tts/Converter/v2/T2SConverter.py
CHANGED
|
@@ -1,125 +1,125 @@
|
|
| 1 |
-
import torch
|
| 2 |
-
import onnx
|
| 3 |
-
import numpy as np
|
| 4 |
-
import json
|
| 5 |
-
import os
|
| 6 |
-
from collections import OrderedDict
|
| 7 |
-
|
| 8 |
-
from ..load_state_dict import load_gpt_model
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
class T2SModelConverter:
|
| 12 |
-
"""
|
| 13 |
-
一个专门的转换器,用于处理 t2s (Text-to-Speech) 模型。
|
| 14 |
-
- PyTorch 模型: .ckpt 文件
|
| 15 |
-
- ONNX 模型: t2s_stage_decoder_fp32.onnx
|
| 16 |
-
- 遵循特定的键名映射规则。
|
| 17 |
-
"""
|
| 18 |
-
|
| 19 |
-
def __init__(self,
|
| 20 |
-
torch_ckpt_path: str,
|
| 21 |
-
stage_decoder_onnx_path: str,
|
| 22 |
-
first_stage_decoder_onnx_path: str,
|
| 23 |
-
key_list_file: str,
|
| 24 |
-
output_dir: str,
|
| 25 |
-
cache_dir: str,
|
| 26 |
-
):
|
| 27 |
-
self.torch_ckpt_path: str = torch_ckpt_path
|
| 28 |
-
self.stage_decoder_onnx_path: str = stage_decoder_onnx_path
|
| 29 |
-
self.first_stage_decoder_onnx_path: str = first_stage_decoder_onnx_path
|
| 30 |
-
self.key_list_file: str = key_list_file
|
| 31 |
-
self.output_dir: str = output_dir
|
| 32 |
-
self.cache_dir: str = cache_dir
|
| 33 |
-
|
| 34 |
-
os.makedirs(self.output_dir, exist_ok=True)
|
| 35 |
-
os.makedirs(self.output_dir, exist_ok=True)
|
| 36 |
-
|
| 37 |
-
# 定义输出文件路径
|
| 38 |
-
self.fp16_bin_path: str = os.path.join(self.output_dir, "t2s_shared_fp16.bin")
|
| 39 |
-
self.index_table_path: str = os.path.join(self.cache_dir, "t2s_weights_index_fp32.json")
|
| 40 |
-
self.relinked_encoder_path: str = os.path.join(self.output_dir, "t2s_encoder_fp32.onnx")
|
| 41 |
-
self.relinked_stage_decoder_path: str = os.path.join(self.output_dir, "t2s_stage_decoder_fp32.onnx")
|
| 42 |
-
self.relinked_first_stage_decoder_path: str = os.path.join(self.output_dir, "t2s_first_stage_decoder_fp32.onnx")
|
| 43 |
-
self.reconstructed_fp32_bin_path = os.path.join(self.output_dir, "t2s_shared_fp32.bin")
|
| 44 |
-
|
| 45 |
-
def step1_create_fp16_bin_with_key_mapping(self):
|
| 46 |
-
"""
|
| 47 |
-
(1) 根据特定的键映射规则,从 .ckpt 创建 fp16 .bin 和 fp32 索引。
|
| 48 |
-
(已根据用户验证脚本的正确逻辑进行最终修正)
|
| 49 |
-
"""
|
| 50 |
-
if not os.path.exists(self.key_list_file):
|
| 51 |
-
raise FileNotFoundError(
|
| 52 |
-
f"Error: Stage 1 requires the key list file, but it was not found: {self.key_list_file}")
|
| 53 |
-
|
| 54 |
-
with open(self.key_list_file, 'r') as f:
|
| 55 |
-
onnx_keys = [line.strip() for line in f.readlines()]
|
| 56 |
-
|
| 57 |
-
ckpt_data = load_gpt_model(self.torch_ckpt_path)
|
| 58 |
-
if 'weight' not in ckpt_data:
|
| 59 |
-
raise KeyError(
|
| 60 |
-
f"❌ Error: 'weight' key not found in the .ckpt file. Top-level keys in the file are: {list(ckpt_data.keys())}")
|
| 61 |
-
|
| 62 |
-
torch_state_dict = ckpt_data['weight']
|
| 63 |
-
|
| 64 |
-
index_table = OrderedDict()
|
| 65 |
-
current_fp32_offset = 0
|
| 66 |
-
|
| 67 |
-
with open(self.fp16_bin_path, 'wb') as f_bin:
|
| 68 |
-
for onnx_key in onnx_keys:
|
| 69 |
-
transformed_onnx_key = onnx_key.replace('transformer_encoder', 'h')
|
| 70 |
-
torch_lookup_key = f"model.{transformed_onnx_key}"
|
| 71 |
-
torch_tensor = torch_state_dict.get(torch_lookup_key)
|
| 72 |
-
numpy_array_fp16 = torch_tensor.to(torch.float16).cpu().numpy()
|
| 73 |
-
f_bin.write(numpy_array_fp16.tobytes())
|
| 74 |
-
tensor_length_fp32 = numpy_array_fp16.nbytes * 2
|
| 75 |
-
index_table[onnx_key] = {'offset': current_fp32_offset, 'length': tensor_length_fp32}
|
| 76 |
-
current_fp32_offset += tensor_length_fp32
|
| 77 |
-
|
| 78 |
-
with open(self.index_table_path, 'w') as f_json:
|
| 79 |
-
json.dump(index_table, f_json, indent=4) # type: ignore
|
| 80 |
-
|
| 81 |
-
def step2_relink_onnx_for_fp32(self, old_model: str, new_model: str):
|
| 82 |
-
"""
|
| 83 |
-
(2) 根据 fp32 索引表,修改 ONNX 模型,使其链接到未来的全精度 .bin。
|
| 84 |
-
(使用与第一个脚本相同的、更稳定的底层方法)
|
| 85 |
-
"""
|
| 86 |
-
if not os.path.exists(self.index_table_path):
|
| 87 |
-
raise FileNotFoundError(
|
| 88 |
-
f"Error: Stage 2 requires the index file, but it was not found: {self.index_table_path}")
|
| 89 |
-
|
| 90 |
-
# 加载描述 fp32 布局的索引表
|
| 91 |
-
with open(self.index_table_path, 'r') as f:
|
| 92 |
-
index_table = json.load(f)
|
| 93 |
-
|
| 94 |
-
model = onnx.load_model(old_model, load_external_data=False)
|
| 95 |
-
reconstructed_bin_filename = os.path.basename(self.reconstructed_fp32_bin_path)
|
| 96 |
-
|
| 97 |
-
for tensor in model.graph.initializer:
|
| 98 |
-
if tensor.name in index_table:
|
| 99 |
-
tensor.ClearField('raw_data')
|
| 100 |
-
tensor.data_location = onnx.TensorProto.EXTERNAL
|
| 101 |
-
info = index_table[tensor.name]
|
| 102 |
-
del tensor.external_data[:]
|
| 103 |
-
keys = ["location", "offset", "length"]
|
| 104 |
-
values = [reconstructed_bin_filename, str(info['offset']), str(info['length'])]
|
| 105 |
-
|
| 106 |
-
for k, v in zip(keys, values):
|
| 107 |
-
entry = tensor.external_data.add()
|
| 108 |
-
entry.key = k
|
| 109 |
-
entry.value = v
|
| 110 |
-
|
| 111 |
-
onnx.save(model, new_model)
|
| 112 |
-
|
| 113 |
-
@staticmethod
|
| 114 |
-
def step3_reconstruct_fp32_bin_from_fp16(fp16_bin_path: str, output_fp32_bin_path: str):
|
| 115 |
-
"""
|
| 116 |
-
(3) 静态工具函数:从半精度 .bin 文件还原出全精度 .bin 文件。
|
| 117 |
-
"""
|
| 118 |
-
fp16_array = np.fromfile(fp16_bin_path, dtype=np.float16)
|
| 119 |
-
fp32_array = fp16_array.astype(np.float32)
|
| 120 |
-
fp32_array.tofile(output_fp32_bin_path)
|
| 121 |
-
|
| 122 |
-
def run_full_process(self):
|
| 123 |
-
self.step1_create_fp16_bin_with_key_mapping()
|
| 124 |
-
self.step2_relink_onnx_for_fp32(self.stage_decoder_onnx_path, self.relinked_stage_decoder_path)
|
| 125 |
-
self.step2_relink_onnx_for_fp32(self.first_stage_decoder_onnx_path, self.relinked_first_stage_decoder_path)
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import onnx
|
| 3 |
+
import numpy as np
|
| 4 |
+
import json
|
| 5 |
+
import os
|
| 6 |
+
from collections import OrderedDict
|
| 7 |
+
|
| 8 |
+
from ..load_state_dict import load_gpt_model
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class T2SModelConverter:
|
| 12 |
+
"""
|
| 13 |
+
一个专门的转换器,用于处理 t2s (Text-to-Speech) 模型。
|
| 14 |
+
- PyTorch 模型: .ckpt 文件
|
| 15 |
+
- ONNX 模型: t2s_stage_decoder_fp32.onnx
|
| 16 |
+
- 遵循特定的键名映射规则。
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
def __init__(self,
|
| 20 |
+
torch_ckpt_path: str,
|
| 21 |
+
stage_decoder_onnx_path: str,
|
| 22 |
+
first_stage_decoder_onnx_path: str,
|
| 23 |
+
key_list_file: str,
|
| 24 |
+
output_dir: str,
|
| 25 |
+
cache_dir: str,
|
| 26 |
+
):
|
| 27 |
+
self.torch_ckpt_path: str = torch_ckpt_path
|
| 28 |
+
self.stage_decoder_onnx_path: str = stage_decoder_onnx_path
|
| 29 |
+
self.first_stage_decoder_onnx_path: str = first_stage_decoder_onnx_path
|
| 30 |
+
self.key_list_file: str = key_list_file
|
| 31 |
+
self.output_dir: str = output_dir
|
| 32 |
+
self.cache_dir: str = cache_dir
|
| 33 |
+
|
| 34 |
+
os.makedirs(self.output_dir, exist_ok=True)
|
| 35 |
+
os.makedirs(self.output_dir, exist_ok=True)
|
| 36 |
+
|
| 37 |
+
# 定义输出文件路径
|
| 38 |
+
self.fp16_bin_path: str = os.path.join(self.output_dir, "t2s_shared_fp16.bin")
|
| 39 |
+
self.index_table_path: str = os.path.join(self.cache_dir, "t2s_weights_index_fp32.json")
|
| 40 |
+
self.relinked_encoder_path: str = os.path.join(self.output_dir, "t2s_encoder_fp32.onnx")
|
| 41 |
+
self.relinked_stage_decoder_path: str = os.path.join(self.output_dir, "t2s_stage_decoder_fp32.onnx")
|
| 42 |
+
self.relinked_first_stage_decoder_path: str = os.path.join(self.output_dir, "t2s_first_stage_decoder_fp32.onnx")
|
| 43 |
+
self.reconstructed_fp32_bin_path = os.path.join(self.output_dir, "t2s_shared_fp32.bin")
|
| 44 |
+
|
| 45 |
+
def step1_create_fp16_bin_with_key_mapping(self):
|
| 46 |
+
"""
|
| 47 |
+
(1) 根据特定的键映射规则,从 .ckpt 创建 fp16 .bin 和 fp32 索引。
|
| 48 |
+
(已根据用户验证脚本的正确逻辑进行最终修正)
|
| 49 |
+
"""
|
| 50 |
+
if not os.path.exists(self.key_list_file):
|
| 51 |
+
raise FileNotFoundError(
|
| 52 |
+
f"Error: Stage 1 requires the key list file, but it was not found: {self.key_list_file}")
|
| 53 |
+
|
| 54 |
+
with open(self.key_list_file, 'r') as f:
|
| 55 |
+
onnx_keys = [line.strip() for line in f.readlines()]
|
| 56 |
+
|
| 57 |
+
ckpt_data = load_gpt_model(self.torch_ckpt_path)
|
| 58 |
+
if 'weight' not in ckpt_data:
|
| 59 |
+
raise KeyError(
|
| 60 |
+
f"❌ Error: 'weight' key not found in the .ckpt file. Top-level keys in the file are: {list(ckpt_data.keys())}")
|
| 61 |
+
|
| 62 |
+
torch_state_dict = ckpt_data['weight']
|
| 63 |
+
|
| 64 |
+
index_table = OrderedDict()
|
| 65 |
+
current_fp32_offset = 0
|
| 66 |
+
|
| 67 |
+
with open(self.fp16_bin_path, 'wb') as f_bin:
|
| 68 |
+
for onnx_key in onnx_keys:
|
| 69 |
+
transformed_onnx_key = onnx_key.replace('transformer_encoder', 'h')
|
| 70 |
+
torch_lookup_key = f"model.{transformed_onnx_key}"
|
| 71 |
+
torch_tensor = torch_state_dict.get(torch_lookup_key)
|
| 72 |
+
numpy_array_fp16 = torch_tensor.to(torch.float16).cpu().numpy()
|
| 73 |
+
f_bin.write(numpy_array_fp16.tobytes())
|
| 74 |
+
tensor_length_fp32 = numpy_array_fp16.nbytes * 2
|
| 75 |
+
index_table[onnx_key] = {'offset': current_fp32_offset, 'length': tensor_length_fp32}
|
| 76 |
+
current_fp32_offset += tensor_length_fp32
|
| 77 |
+
|
| 78 |
+
with open(self.index_table_path, 'w') as f_json:
|
| 79 |
+
json.dump(index_table, f_json, indent=4) # type: ignore
|
| 80 |
+
|
| 81 |
+
def step2_relink_onnx_for_fp32(self, old_model: str, new_model: str):
|
| 82 |
+
"""
|
| 83 |
+
(2) 根据 fp32 索引表,修改 ONNX 模型,使其链接到未来的全精度 .bin。
|
| 84 |
+
(使用与第一个脚本相同的、更稳定的底层方法)
|
| 85 |
+
"""
|
| 86 |
+
if not os.path.exists(self.index_table_path):
|
| 87 |
+
raise FileNotFoundError(
|
| 88 |
+
f"Error: Stage 2 requires the index file, but it was not found: {self.index_table_path}")
|
| 89 |
+
|
| 90 |
+
# 加载描述 fp32 布局的索引表
|
| 91 |
+
with open(self.index_table_path, 'r') as f:
|
| 92 |
+
index_table = json.load(f)
|
| 93 |
+
|
| 94 |
+
model = onnx.load_model(old_model, load_external_data=False)
|
| 95 |
+
reconstructed_bin_filename = os.path.basename(self.reconstructed_fp32_bin_path)
|
| 96 |
+
|
| 97 |
+
for tensor in model.graph.initializer:
|
| 98 |
+
if tensor.name in index_table:
|
| 99 |
+
tensor.ClearField('raw_data')
|
| 100 |
+
tensor.data_location = onnx.TensorProto.EXTERNAL
|
| 101 |
+
info = index_table[tensor.name]
|
| 102 |
+
del tensor.external_data[:]
|
| 103 |
+
keys = ["location", "offset", "length"]
|
| 104 |
+
values = [reconstructed_bin_filename, str(info['offset']), str(info['length'])]
|
| 105 |
+
|
| 106 |
+
for k, v in zip(keys, values):
|
| 107 |
+
entry = tensor.external_data.add()
|
| 108 |
+
entry.key = k
|
| 109 |
+
entry.value = v
|
| 110 |
+
|
| 111 |
+
onnx.save(model, new_model)
|
| 112 |
+
|
| 113 |
+
@staticmethod
|
| 114 |
+
def step3_reconstruct_fp32_bin_from_fp16(fp16_bin_path: str, output_fp32_bin_path: str):
|
| 115 |
+
"""
|
| 116 |
+
(3) 静态工具函数:从半精度 .bin 文件还原出全精度 .bin 文件。
|
| 117 |
+
"""
|
| 118 |
+
fp16_array = np.fromfile(fp16_bin_path, dtype=np.float16)
|
| 119 |
+
fp32_array = fp16_array.astype(np.float32)
|
| 120 |
+
fp32_array.tofile(output_fp32_bin_path)
|
| 121 |
+
|
| 122 |
+
def run_full_process(self):
|
| 123 |
+
self.step1_create_fp16_bin_with_key_mapping()
|
| 124 |
+
self.step2_relink_onnx_for_fp32(self.stage_decoder_onnx_path, self.relinked_stage_decoder_path)
|
| 125 |
+
self.step2_relink_onnx_for_fp32(self.first_stage_decoder_onnx_path, self.relinked_first_stage_decoder_path)
|
genie_tts/Converter/v2/VITSConverter.py
CHANGED
|
@@ -1,129 +1,129 @@
|
|
| 1 |
-
import torch
|
| 2 |
-
import onnx
|
| 3 |
-
import numpy as np
|
| 4 |
-
import json
|
| 5 |
-
import os
|
| 6 |
-
from collections import OrderedDict
|
| 7 |
-
|
| 8 |
-
from ..load_state_dict import load_sovits_model
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
class VITSConverter:
|
| 12 |
-
"""
|
| 13 |
-
一个转换器,用于从 PyTorch 模型创建:
|
| 14 |
-
1. 一个用于分发的半精度 (fp16) .bin 权重文件。
|
| 15 |
-
2. 一个与全精度 (fp32) 布局兼容的 ONNX 模型。
|
| 16 |
-
3. 一个可以将 fp16 .bin 文件还原为 fp32 .bin 的工具函数。
|
| 17 |
-
"""
|
| 18 |
-
|
| 19 |
-
def __init__(self,
|
| 20 |
-
torch_pth_path: str,
|
| 21 |
-
vits_onnx_path: str,
|
| 22 |
-
key_list_file: str,
|
| 23 |
-
output_dir: str,
|
| 24 |
-
cache_dir: str,
|
| 25 |
-
):
|
| 26 |
-
self.torch_pth_path: str = torch_pth_path
|
| 27 |
-
self.vits_onnx_path: str = vits_onnx_path
|
| 28 |
-
self.key_list_file: str = key_list_file
|
| 29 |
-
self.output_dir: str = output_dir
|
| 30 |
-
self.cache_dir: str = cache_dir
|
| 31 |
-
# 定义输出文件路径
|
| 32 |
-
self.fp16_bin_path: str = os.path.join(self.output_dir, "vits_fp16.bin")
|
| 33 |
-
self.index_table_path: str = os.path.join(self.cache_dir, "vits_weights_index_fp32.json")
|
| 34 |
-
self.relinked_fp32_onnx_path: str = os.path.join(self.output_dir, "vits_fp32.onnx")
|
| 35 |
-
self.reconstructed_fp32_bin_path: str = os.path.join(self.output_dir, "vits_fp32.bin")
|
| 36 |
-
|
| 37 |
-
# 确保输出目录存在
|
| 38 |
-
os.makedirs(self.cache_dir, exist_ok=True)
|
| 39 |
-
os.makedirs(self.output_dir, exist_ok=True)
|
| 40 |
-
|
| 41 |
-
if not os.path.exists(self.key_list_file):
|
| 42 |
-
raise FileNotFoundError(f"Error: Key list file not found! Path: {self.key_list_file}")
|
| 43 |
-
|
| 44 |
-
def step1_create_fp16_bin_and_fp32_index(self):
|
| 45 |
-
"""
|
| 46 |
-
(1) 创建一个半精度 (fp16) 的 .bin 文件,但生成一个
|
| 47 |
-
描述全精度 (fp32) 布局的索引表。
|
| 48 |
-
"""
|
| 49 |
-
# 加载 key 列表
|
| 50 |
-
with open(self.key_list_file, 'r') as f:
|
| 51 |
-
onnx_keys = [line.strip() for line in f.readlines()]
|
| 52 |
-
|
| 53 |
-
# 加载 PyTorch 模型权重
|
| 54 |
-
torch_state_dict = load_sovits_model(self.torch_pth_path)['weight']
|
| 55 |
-
|
| 56 |
-
index_table = OrderedDict()
|
| 57 |
-
current_fp32_offset = 0
|
| 58 |
-
|
| 59 |
-
with open(self.fp16_bin_path, 'wb') as f_bin:
|
| 60 |
-
for onnx_key in onnx_keys:
|
| 61 |
-
torch_key = onnx_key[len("vq_model."):] if onnx_key.startswith("vq_model.") else onnx_key
|
| 62 |
-
|
| 63 |
-
torch_tensor = torch_state_dict.get(torch_key)
|
| 64 |
-
if torch_tensor is None:
|
| 65 |
-
raise ValueError(f"❌ Critical error: Key '{torch_key}' not found in the PyTorch weights")
|
| 66 |
-
|
| 67 |
-
# 转换为 fp16 并写入文件
|
| 68 |
-
torch_tensor_fp16 = torch_tensor.to(torch.float16)
|
| 69 |
-
numpy_array_fp16 = torch_tensor_fp16.cpu().numpy()
|
| 70 |
-
tensor_bytes_fp16 = numpy_array_fp16.tobytes()
|
| 71 |
-
f_bin.write(tensor_bytes_fp16)
|
| 72 |
-
tensor_length_fp32 = len(tensor_bytes_fp16) * 2
|
| 73 |
-
index_table[onnx_key] = {
|
| 74 |
-
'offset': current_fp32_offset,
|
| 75 |
-
'length': tensor_length_fp32
|
| 76 |
-
}
|
| 77 |
-
current_fp32_offset += tensor_length_fp32
|
| 78 |
-
|
| 79 |
-
# 保存描述 fp32 布局的索引表
|
| 80 |
-
with open(self.index_table_path, 'w') as f_json:
|
| 81 |
-
json.dump(index_table, f_json, indent=4) # type: ignore
|
| 82 |
-
|
| 83 |
-
def step2_relink_onnx_for_fp32(self):
|
| 84 |
-
"""
|
| 85 |
-
(2) 根据 fp32 索引表,修改 ONNX 模型,使其链接到一个
|
| 86 |
-
未来的、全精度的 .bin 文件。
|
| 87 |
-
"""
|
| 88 |
-
# 加载描述 fp32 布局的索引表
|
| 89 |
-
with open(self.index_table_path, 'r') as f:
|
| 90 |
-
index_table = json.load(f)
|
| 91 |
-
|
| 92 |
-
model = onnx.load_model(self.vits_onnx_path, load_external_data=False)
|
| 93 |
-
reconstructed_bin_filename = os.path.basename(self.reconstructed_fp32_bin_path)
|
| 94 |
-
|
| 95 |
-
for tensor in model.graph.initializer:
|
| 96 |
-
if tensor.name in index_table:
|
| 97 |
-
tensor.ClearField('raw_data')
|
| 98 |
-
tensor.data_location = onnx.TensorProto.EXTERNAL
|
| 99 |
-
info = index_table[tensor.name]
|
| 100 |
-
|
| 101 |
-
del tensor.external_data[:]
|
| 102 |
-
|
| 103 |
-
keys = ["location", "offset", "length"]
|
| 104 |
-
values = [reconstructed_bin_filename, str(info['offset']), str(info['length'])]
|
| 105 |
-
|
| 106 |
-
for k, v in zip(keys, values):
|
| 107 |
-
entry = tensor.external_data.add()
|
| 108 |
-
entry.key = k
|
| 109 |
-
entry.value = v
|
| 110 |
-
|
| 111 |
-
# 保存修改后的、链接到 fp32 权重的 ONNX 模型
|
| 112 |
-
onnx.save(model, self.relinked_fp32_onnx_path)
|
| 113 |
-
|
| 114 |
-
@staticmethod
|
| 115 |
-
def step3_reconstruct_fp32_bin_from_fp16(fp16_bin_path: str, output_fp32_bin_path: str):
|
| 116 |
-
"""
|
| 117 |
-
(3) 静态工具函数:从半精度 .bin 文件还原出全精度 .bin 文件。
|
| 118 |
-
|
| 119 |
-
Args:
|
| 120 |
-
fp16_bin_path (str): 输入的半精度 .bin 文件路径。
|
| 121 |
-
output_fp32_bin_path (str): 输出的全精度 .bin 文件路径。
|
| 122 |
-
"""
|
| 123 |
-
fp16_array = np.fromfile(fp16_bin_path, dtype=np.float16)
|
| 124 |
-
fp32_array = fp16_array.astype(np.float32)
|
| 125 |
-
fp32_array.tofile(output_fp32_bin_path)
|
| 126 |
-
|
| 127 |
-
def run_full_process(self):
|
| 128 |
-
self.step1_create_fp16_bin_and_fp32_index()
|
| 129 |
-
self.step2_relink_onnx_for_fp32()
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import onnx
|
| 3 |
+
import numpy as np
|
| 4 |
+
import json
|
| 5 |
+
import os
|
| 6 |
+
from collections import OrderedDict
|
| 7 |
+
|
| 8 |
+
from ..load_state_dict import load_sovits_model
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class VITSConverter:
|
| 12 |
+
"""
|
| 13 |
+
一个转换器,用于从 PyTorch 模型创建:
|
| 14 |
+
1. 一个用于分发的半精度 (fp16) .bin 权重文件。
|
| 15 |
+
2. 一个与全精度 (fp32) 布局兼容的 ONNX 模型。
|
| 16 |
+
3. 一个可以将 fp16 .bin 文件还原为 fp32 .bin 的工具函数。
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
def __init__(self,
|
| 20 |
+
torch_pth_path: str,
|
| 21 |
+
vits_onnx_path: str,
|
| 22 |
+
key_list_file: str,
|
| 23 |
+
output_dir: str,
|
| 24 |
+
cache_dir: str,
|
| 25 |
+
):
|
| 26 |
+
self.torch_pth_path: str = torch_pth_path
|
| 27 |
+
self.vits_onnx_path: str = vits_onnx_path
|
| 28 |
+
self.key_list_file: str = key_list_file
|
| 29 |
+
self.output_dir: str = output_dir
|
| 30 |
+
self.cache_dir: str = cache_dir
|
| 31 |
+
# 定义输出文件路径
|
| 32 |
+
self.fp16_bin_path: str = os.path.join(self.output_dir, "vits_fp16.bin")
|
| 33 |
+
self.index_table_path: str = os.path.join(self.cache_dir, "vits_weights_index_fp32.json")
|
| 34 |
+
self.relinked_fp32_onnx_path: str = os.path.join(self.output_dir, "vits_fp32.onnx")
|
| 35 |
+
self.reconstructed_fp32_bin_path: str = os.path.join(self.output_dir, "vits_fp32.bin")
|
| 36 |
+
|
| 37 |
+
# 确保输出目录存在
|
| 38 |
+
os.makedirs(self.cache_dir, exist_ok=True)
|
| 39 |
+
os.makedirs(self.output_dir, exist_ok=True)
|
| 40 |
+
|
| 41 |
+
if not os.path.exists(self.key_list_file):
|
| 42 |
+
raise FileNotFoundError(f"Error: Key list file not found! Path: {self.key_list_file}")
|
| 43 |
+
|
| 44 |
+
def step1_create_fp16_bin_and_fp32_index(self):
|
| 45 |
+
"""
|
| 46 |
+
(1) 创建一个半精度 (fp16) 的 .bin 文件,但生成一个
|
| 47 |
+
描述全精度 (fp32) 布局的索引表。
|
| 48 |
+
"""
|
| 49 |
+
# 加载 key 列表
|
| 50 |
+
with open(self.key_list_file, 'r') as f:
|
| 51 |
+
onnx_keys = [line.strip() for line in f.readlines()]
|
| 52 |
+
|
| 53 |
+
# 加载 PyTorch 模型权重
|
| 54 |
+
torch_state_dict = load_sovits_model(self.torch_pth_path)['weight']
|
| 55 |
+
|
| 56 |
+
index_table = OrderedDict()
|
| 57 |
+
current_fp32_offset = 0
|
| 58 |
+
|
| 59 |
+
with open(self.fp16_bin_path, 'wb') as f_bin:
|
| 60 |
+
for onnx_key in onnx_keys:
|
| 61 |
+
torch_key = onnx_key[len("vq_model."):] if onnx_key.startswith("vq_model.") else onnx_key
|
| 62 |
+
|
| 63 |
+
torch_tensor = torch_state_dict.get(torch_key)
|
| 64 |
+
if torch_tensor is None:
|
| 65 |
+
raise ValueError(f"❌ Critical error: Key '{torch_key}' not found in the PyTorch weights")
|
| 66 |
+
|
| 67 |
+
# 转换为 fp16 并写入文件
|
| 68 |
+
torch_tensor_fp16 = torch_tensor.to(torch.float16)
|
| 69 |
+
numpy_array_fp16 = torch_tensor_fp16.cpu().numpy()
|
| 70 |
+
tensor_bytes_fp16 = numpy_array_fp16.tobytes()
|
| 71 |
+
f_bin.write(tensor_bytes_fp16)
|
| 72 |
+
tensor_length_fp32 = len(tensor_bytes_fp16) * 2
|
| 73 |
+
index_table[onnx_key] = {
|
| 74 |
+
'offset': current_fp32_offset,
|
| 75 |
+
'length': tensor_length_fp32
|
| 76 |
+
}
|
| 77 |
+
current_fp32_offset += tensor_length_fp32
|
| 78 |
+
|
| 79 |
+
# 保存描述 fp32 布局的索引表
|
| 80 |
+
with open(self.index_table_path, 'w') as f_json:
|
| 81 |
+
json.dump(index_table, f_json, indent=4) # type: ignore
|
| 82 |
+
|
| 83 |
+
def step2_relink_onnx_for_fp32(self):
|
| 84 |
+
"""
|
| 85 |
+
(2) 根据 fp32 索引表,修改 ONNX 模型,使其链接到一个
|
| 86 |
+
未来的、全精度的 .bin 文件。
|
| 87 |
+
"""
|
| 88 |
+
# 加载描述 fp32 布局的索引表
|
| 89 |
+
with open(self.index_table_path, 'r') as f:
|
| 90 |
+
index_table = json.load(f)
|
| 91 |
+
|
| 92 |
+
model = onnx.load_model(self.vits_onnx_path, load_external_data=False)
|
| 93 |
+
reconstructed_bin_filename = os.path.basename(self.reconstructed_fp32_bin_path)
|
| 94 |
+
|
| 95 |
+
for tensor in model.graph.initializer:
|
| 96 |
+
if tensor.name in index_table:
|
| 97 |
+
tensor.ClearField('raw_data')
|
| 98 |
+
tensor.data_location = onnx.TensorProto.EXTERNAL
|
| 99 |
+
info = index_table[tensor.name]
|
| 100 |
+
|
| 101 |
+
del tensor.external_data[:]
|
| 102 |
+
|
| 103 |
+
keys = ["location", "offset", "length"]
|
| 104 |
+
values = [reconstructed_bin_filename, str(info['offset']), str(info['length'])]
|
| 105 |
+
|
| 106 |
+
for k, v in zip(keys, values):
|
| 107 |
+
entry = tensor.external_data.add()
|
| 108 |
+
entry.key = k
|
| 109 |
+
entry.value = v
|
| 110 |
+
|
| 111 |
+
# 保存修改后的、链接到 fp32 权重的 ONNX 模型
|
| 112 |
+
onnx.save(model, self.relinked_fp32_onnx_path)
|
| 113 |
+
|
| 114 |
+
@staticmethod
|
| 115 |
+
def step3_reconstruct_fp32_bin_from_fp16(fp16_bin_path: str, output_fp32_bin_path: str):
|
| 116 |
+
"""
|
| 117 |
+
(3) 静态工具函数:从半精度 .bin 文件还原出全精度 .bin 文件。
|
| 118 |
+
|
| 119 |
+
Args:
|
| 120 |
+
fp16_bin_path (str): 输入的半精度 .bin 文件路径。
|
| 121 |
+
output_fp32_bin_path (str): 输出的全精度 .bin 文件路径。
|
| 122 |
+
"""
|
| 123 |
+
fp16_array = np.fromfile(fp16_bin_path, dtype=np.float16)
|
| 124 |
+
fp32_array = fp16_array.astype(np.float32)
|
| 125 |
+
fp32_array.tofile(output_fp32_bin_path)
|
| 126 |
+
|
| 127 |
+
def run_full_process(self):
|
| 128 |
+
self.step1_create_fp16_bin_and_fp32_index()
|
| 129 |
+
self.step2_relink_onnx_for_fp32()
|
genie_tts/Converter/v2/__pycache__/Converter.cpython-311.pyc
ADDED
|
Binary file (7.94 kB). View file
|
|
|
genie_tts/Converter/v2/__pycache__/EncoderConverter.cpython-311.pyc
ADDED
|
Binary file (5.57 kB). View file
|
|
|
genie_tts/Converter/v2/__pycache__/T2SConverter.cpython-311.pyc
ADDED
|
Binary file (9.11 kB). View file
|
|
|
genie_tts/Converter/v2/__pycache__/VITSConverter.cpython-311.pyc
ADDED
|
Binary file (8.21 kB). View file
|
|
|
genie_tts/Converter/v2/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (191 Bytes). View file
|
|
|
genie_tts/Converter/v2ProPlus/Converter.py
CHANGED
|
@@ -1,89 +1,89 @@
|
|
| 1 |
-
import logging
|
| 2 |
-
import traceback
|
| 3 |
-
import os
|
| 4 |
-
import contextlib
|
| 5 |
-
import importlib.resources
|
| 6 |
-
|
| 7 |
-
from ...Utils.Constants import PACKAGE_NAME
|
| 8 |
-
from ..v2.VITSConverter import VITSConverter
|
| 9 |
-
from ..v2.T2SConverter import T2SModelConverter
|
| 10 |
-
from ..v2.EncoderConverter import EncoderConverter
|
| 11 |
-
from ..v2.Converter import (ENCODER_RESOURCE_PATH, STAGE_DECODER_RESOURCE_PATH,
|
| 12 |
-
FIRST_STAGE_DECODER_RESOURCE_PATH, T2S_KEYS_RESOURCE_PATH, CACHE_DIR, remove_folder)
|
| 13 |
-
from .PromptEncoderConverter import PromptEncoderConverter
|
| 14 |
-
|
| 15 |
-
logger = logging.getLogger()
|
| 16 |
-
|
| 17 |
-
# 使用 V2 ProPlus 的文件。
|
| 18 |
-
VITS_RESOURCE_PATH = "Data/v2ProPlus/Models/vits_fp32.onnx"
|
| 19 |
-
PROMPT_ENCODER_RESOURCE_PATH = "Data/v2ProPlus/Models/prompt_encoder_fp32.onnx"
|
| 20 |
-
VITS_KEYS_RESOURCE_PATH = "Data/v2ProPlus/Keys/vits_weights.txt"
|
| 21 |
-
PROMPT_ENCODER_KEYS_RESOURCE_PATH = "Data/v2ProPlus/Keys/prompt_encoder_weights.txt"
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
def convert(torch_ckpt_path: str, torch_pth_path: str, output_dir: str) -> None:
|
| 25 |
-
# 确保缓存和输出目录存在
|
| 26 |
-
os.makedirs(CACHE_DIR, exist_ok=True)
|
| 27 |
-
os.makedirs(output_dir, exist_ok=True)
|
| 28 |
-
|
| 29 |
-
if len(os.listdir(output_dir)) > 0:
|
| 30 |
-
logger.warning(f"The output directory {output_dir} is not empty!")
|
| 31 |
-
|
| 32 |
-
with contextlib.ExitStack() as stack:
|
| 33 |
-
files = importlib.resources.files(PACKAGE_NAME)
|
| 34 |
-
|
| 35 |
-
def enter(p: str) -> str:
|
| 36 |
-
return str(stack.enter_context(importlib.resources.as_file(files.joinpath(p))))
|
| 37 |
-
|
| 38 |
-
encoder_onnx_path = enter(ENCODER_RESOURCE_PATH)
|
| 39 |
-
stage_decoder_path = enter(STAGE_DECODER_RESOURCE_PATH)
|
| 40 |
-
first_stage_decoder_path = enter(FIRST_STAGE_DECODER_RESOURCE_PATH)
|
| 41 |
-
vits_onnx_path = enter(VITS_RESOURCE_PATH)
|
| 42 |
-
t2s_keys_path = enter(T2S_KEYS_RESOURCE_PATH)
|
| 43 |
-
vits_keys_path = enter(VITS_KEYS_RESOURCE_PATH)
|
| 44 |
-
prompt_encoder_path = enter(PROMPT_ENCODER_RESOURCE_PATH)
|
| 45 |
-
prompt_encoder_keys_path = enter(PROMPT_ENCODER_KEYS_RESOURCE_PATH)
|
| 46 |
-
|
| 47 |
-
converter_1 = T2SModelConverter(
|
| 48 |
-
torch_ckpt_path=torch_ckpt_path,
|
| 49 |
-
stage_decoder_onnx_path=stage_decoder_path,
|
| 50 |
-
first_stage_decoder_onnx_path=first_stage_decoder_path,
|
| 51 |
-
key_list_file=t2s_keys_path,
|
| 52 |
-
output_dir=output_dir,
|
| 53 |
-
cache_dir=CACHE_DIR,
|
| 54 |
-
)
|
| 55 |
-
converter_2 = VITSConverter(
|
| 56 |
-
torch_pth_path=torch_pth_path,
|
| 57 |
-
vits_onnx_path=vits_onnx_path,
|
| 58 |
-
key_list_file=vits_keys_path,
|
| 59 |
-
output_dir=output_dir,
|
| 60 |
-
cache_dir=CACHE_DIR,
|
| 61 |
-
)
|
| 62 |
-
converter_3 = EncoderConverter(
|
| 63 |
-
ckpt_path=torch_ckpt_path,
|
| 64 |
-
pth_path=torch_pth_path,
|
| 65 |
-
onnx_input_path=encoder_onnx_path,
|
| 66 |
-
output_dir=output_dir,
|
| 67 |
-
)
|
| 68 |
-
converter_4 = PromptEncoderConverter(
|
| 69 |
-
torch_pth_path=torch_pth_path,
|
| 70 |
-
prompt_encoder_onnx_path=prompt_encoder_path,
|
| 71 |
-
key_list_file=prompt_encoder_keys_path,
|
| 72 |
-
output_dir=output_dir,
|
| 73 |
-
cache_dir=CACHE_DIR,
|
| 74 |
-
)
|
| 75 |
-
|
| 76 |
-
try:
|
| 77 |
-
converter_1.run_full_process()
|
| 78 |
-
converter_2.run_full_process()
|
| 79 |
-
converter_3.run_full_process()
|
| 80 |
-
converter_4.run_full_process()
|
| 81 |
-
logger.info(f"🎉 Conversion successful! Saved to: {os.path.abspath(output_dir)}\n"
|
| 82 |
-
f"- Model Type: V2ProPlus")
|
| 83 |
-
except Exception:
|
| 84 |
-
logger.error(f"❌ A critical error occurred during the conversion process")
|
| 85 |
-
logger.error(traceback.format_exc())
|
| 86 |
-
remove_folder(output_dir) # 只在失败时清理输出目录
|
| 87 |
-
finally:
|
| 88 |
-
# 无论成功还是失败,都尝试清理缓存目录
|
| 89 |
-
remove_folder(CACHE_DIR)
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import traceback
|
| 3 |
+
import os
|
| 4 |
+
import contextlib
|
| 5 |
+
import importlib.resources
|
| 6 |
+
|
| 7 |
+
from ...Utils.Constants import PACKAGE_NAME
|
| 8 |
+
from ..v2.VITSConverter import VITSConverter
|
| 9 |
+
from ..v2.T2SConverter import T2SModelConverter
|
| 10 |
+
from ..v2.EncoderConverter import EncoderConverter
|
| 11 |
+
from ..v2.Converter import (ENCODER_RESOURCE_PATH, STAGE_DECODER_RESOURCE_PATH,
|
| 12 |
+
FIRST_STAGE_DECODER_RESOURCE_PATH, T2S_KEYS_RESOURCE_PATH, CACHE_DIR, remove_folder)
|
| 13 |
+
from .PromptEncoderConverter import PromptEncoderConverter
|
| 14 |
+
|
| 15 |
+
logger = logging.getLogger()
|
| 16 |
+
|
| 17 |
+
# 使用 V2 ProPlus 的文件。
|
| 18 |
+
VITS_RESOURCE_PATH = "Data/v2ProPlus/Models/vits_fp32.onnx"
|
| 19 |
+
PROMPT_ENCODER_RESOURCE_PATH = "Data/v2ProPlus/Models/prompt_encoder_fp32.onnx"
|
| 20 |
+
VITS_KEYS_RESOURCE_PATH = "./Data/v2ProPlus/Keys/vits_weights.txt"
|
| 21 |
+
PROMPT_ENCODER_KEYS_RESOURCE_PATH = "./Data/v2ProPlus/Keys/prompt_encoder_weights.txt"
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def convert(torch_ckpt_path: str, torch_pth_path: str, output_dir: str) -> None:
|
| 25 |
+
# 确保缓存和输出目录存在
|
| 26 |
+
os.makedirs(CACHE_DIR, exist_ok=True)
|
| 27 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 28 |
+
|
| 29 |
+
if len(os.listdir(output_dir)) > 0:
|
| 30 |
+
logger.warning(f"The output directory {output_dir} is not empty!")
|
| 31 |
+
|
| 32 |
+
with contextlib.ExitStack() as stack:
|
| 33 |
+
files = importlib.resources.files(PACKAGE_NAME)
|
| 34 |
+
|
| 35 |
+
def enter(p: str) -> str:
|
| 36 |
+
return str(stack.enter_context(importlib.resources.as_file(files.joinpath(p))))
|
| 37 |
+
|
| 38 |
+
encoder_onnx_path = enter(ENCODER_RESOURCE_PATH)
|
| 39 |
+
stage_decoder_path = enter(STAGE_DECODER_RESOURCE_PATH)
|
| 40 |
+
first_stage_decoder_path = enter(FIRST_STAGE_DECODER_RESOURCE_PATH)
|
| 41 |
+
vits_onnx_path = enter(VITS_RESOURCE_PATH)
|
| 42 |
+
t2s_keys_path = enter(T2S_KEYS_RESOURCE_PATH)
|
| 43 |
+
vits_keys_path = enter(VITS_KEYS_RESOURCE_PATH)
|
| 44 |
+
prompt_encoder_path = enter(PROMPT_ENCODER_RESOURCE_PATH)
|
| 45 |
+
prompt_encoder_keys_path = enter(PROMPT_ENCODER_KEYS_RESOURCE_PATH)
|
| 46 |
+
|
| 47 |
+
converter_1 = T2SModelConverter(
|
| 48 |
+
torch_ckpt_path=torch_ckpt_path,
|
| 49 |
+
stage_decoder_onnx_path=stage_decoder_path,
|
| 50 |
+
first_stage_decoder_onnx_path=first_stage_decoder_path,
|
| 51 |
+
key_list_file=t2s_keys_path,
|
| 52 |
+
output_dir=output_dir,
|
| 53 |
+
cache_dir=CACHE_DIR,
|
| 54 |
+
)
|
| 55 |
+
converter_2 = VITSConverter(
|
| 56 |
+
torch_pth_path=torch_pth_path,
|
| 57 |
+
vits_onnx_path=vits_onnx_path,
|
| 58 |
+
key_list_file=vits_keys_path,
|
| 59 |
+
output_dir=output_dir,
|
| 60 |
+
cache_dir=CACHE_DIR,
|
| 61 |
+
)
|
| 62 |
+
converter_3 = EncoderConverter(
|
| 63 |
+
ckpt_path=torch_ckpt_path,
|
| 64 |
+
pth_path=torch_pth_path,
|
| 65 |
+
onnx_input_path=encoder_onnx_path,
|
| 66 |
+
output_dir=output_dir,
|
| 67 |
+
)
|
| 68 |
+
converter_4 = PromptEncoderConverter(
|
| 69 |
+
torch_pth_path=torch_pth_path,
|
| 70 |
+
prompt_encoder_onnx_path=prompt_encoder_path,
|
| 71 |
+
key_list_file=prompt_encoder_keys_path,
|
| 72 |
+
output_dir=output_dir,
|
| 73 |
+
cache_dir=CACHE_DIR,
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
try:
|
| 77 |
+
converter_1.run_full_process()
|
| 78 |
+
converter_2.run_full_process()
|
| 79 |
+
converter_3.run_full_process()
|
| 80 |
+
converter_4.run_full_process()
|
| 81 |
+
logger.info(f"🎉 Conversion successful! Saved to: {os.path.abspath(output_dir)}\n"
|
| 82 |
+
f"- Model Type: V2ProPlus")
|
| 83 |
+
except Exception:
|
| 84 |
+
logger.error(f"❌ A critical error occurred during the conversion process")
|
| 85 |
+
logger.error(traceback.format_exc())
|
| 86 |
+
remove_folder(output_dir) # 只在失败时清理输出目录
|
| 87 |
+
finally:
|
| 88 |
+
# 无论成功还是失败,都尝试清理缓存目录
|
| 89 |
+
remove_folder(CACHE_DIR)
|
genie_tts/Converter/v2ProPlus/PromptEncoderConverter.py
CHANGED
|
@@ -1,128 +1,128 @@
|
|
| 1 |
-
import torch
|
| 2 |
-
import onnx
|
| 3 |
-
import json
|
| 4 |
-
import os
|
| 5 |
-
from collections import OrderedDict
|
| 6 |
-
|
| 7 |
-
from ..load_state_dict import load_sovits_model
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
class PromptEncoderConverter:
|
| 11 |
-
"""
|
| 12 |
-
一个转换器,用于从 PyTorch 模型创建:
|
| 13 |
-
1. 一个用于分发的半精度 (fp16) .bin 权重文件。
|
| 14 |
-
2. 一个与全精度 (fp32) 布局兼容的 ONNX 模型。
|
| 15 |
-
3. 一个可以将 fp16 .bin 文件还原为 fp32 .bin 的工具函数。
|
| 16 |
-
"""
|
| 17 |
-
|
| 18 |
-
def __init__(self,
|
| 19 |
-
torch_pth_path: str,
|
| 20 |
-
prompt_encoder_onnx_path: str,
|
| 21 |
-
key_list_file: str,
|
| 22 |
-
output_dir: str,
|
| 23 |
-
cache_dir: str,
|
| 24 |
-
):
|
| 25 |
-
self.torch_pth_path: str = torch_pth_path
|
| 26 |
-
self.vits_onnx_path: str = prompt_encoder_onnx_path
|
| 27 |
-
self.key_list_file: str = key_list_file
|
| 28 |
-
self.output_dir: str = output_dir
|
| 29 |
-
self.cache_dir: str = cache_dir
|
| 30 |
-
# 定义输出文件路径
|
| 31 |
-
self.fp16_bin_path: str = os.path.join(self.output_dir, "prompt_encoder_fp16.bin")
|
| 32 |
-
self.index_table_path: str = os.path.join(self.cache_dir, "prompt_encoder_weights_index_fp32.json")
|
| 33 |
-
self.relinked_fp32_onnx_path: str = os.path.join(self.output_dir, "prompt_encoder_fp32.onnx")
|
| 34 |
-
self.reconstructed_fp32_bin_path: str = os.path.join(self.output_dir, "prompt_encoder_fp32.bin")
|
| 35 |
-
|
| 36 |
-
# 确保输出目录存在
|
| 37 |
-
os.makedirs(self.cache_dir, exist_ok=True)
|
| 38 |
-
os.makedirs(self.output_dir, exist_ok=True)
|
| 39 |
-
|
| 40 |
-
if not os.path.exists(self.key_list_file):
|
| 41 |
-
raise FileNotFoundError(f"错误: Key 列表文件未找到! 路径: {self.key_list_file}")
|
| 42 |
-
|
| 43 |
-
def step1_create_fp16_bin_and_fp32_index(self):
|
| 44 |
-
"""
|
| 45 |
-
(1) 创建一个半精度 (fp16) 的 .bin 文件,但生成一个
|
| 46 |
-
描述全精度 (fp32) 布局的索引表。
|
| 47 |
-
"""
|
| 48 |
-
# 加载 key 列表
|
| 49 |
-
with open(self.key_list_file, 'r') as f:
|
| 50 |
-
onnx_keys = [line.strip() for line in f.readlines()]
|
| 51 |
-
|
| 52 |
-
# 加载 PyTorch 模型权重
|
| 53 |
-
torch_state_dict = load_sovits_model(self.torch_pth_path)['weight']
|
| 54 |
-
|
| 55 |
-
index_table = OrderedDict()
|
| 56 |
-
# 这个偏移量将按照 fp32 的大小进行累加
|
| 57 |
-
current_fp32_offset = 0
|
| 58 |
-
|
| 59 |
-
with open(self.fp16_bin_path, 'wb') as f_bin:
|
| 60 |
-
for onnx_key in onnx_keys:
|
| 61 |
-
torch_key = onnx_key[len("vq_model."):] if onnx_key.startswith("vq_model.") else onnx_key
|
| 62 |
-
|
| 63 |
-
torch_tensor = torch_state_dict.get(torch_key)
|
| 64 |
-
if torch_tensor is None:
|
| 65 |
-
raise ValueError(f"❌ 严重错误: 在 PyTorch 权重中找不到 Key '{torch_key}'")
|
| 66 |
-
|
| 67 |
-
# 转换为 fp16 并写入文件
|
| 68 |
-
torch_tensor_fp16 = torch_tensor.to(torch.float16)
|
| 69 |
-
numpy_array_fp16 = torch_tensor_fp16.cpu().numpy()
|
| 70 |
-
tensor_bytes_fp16 = numpy_array_fp16.tobytes()
|
| 71 |
-
f_bin.write(tensor_bytes_fp16)
|
| 72 |
-
|
| 73 |
-
# 关键步骤:计算并记录 fp32 的长度和偏移量
|
| 74 |
-
# 一个 fp32 = 4 字节, 一个 fp16 = 2 字节。所以 fp32 长度是 fp16 的两倍。
|
| 75 |
-
tensor_length_fp32 = len(tensor_bytes_fp16) * 2
|
| 76 |
-
|
| 77 |
-
index_table[onnx_key] = {
|
| 78 |
-
'offset': current_fp32_offset,
|
| 79 |
-
'length': tensor_length_fp32
|
| 80 |
-
}
|
| 81 |
-
|
| 82 |
-
# 偏移量也按照 fp32 的长度进行累加
|
| 83 |
-
current_fp32_offset += tensor_length_fp32
|
| 84 |
-
|
| 85 |
-
# 保存描述 fp32 布局的索引表
|
| 86 |
-
with open(self.index_table_path, 'w') as f_json:
|
| 87 |
-
json.dump(index_table, f_json, indent=4) # type: ignore
|
| 88 |
-
|
| 89 |
-
def step2_relink_onnx_for_fp32(self):
|
| 90 |
-
"""
|
| 91 |
-
(2) 根据 fp32 索引表,修改 ONNX 模型,使其链接到一个
|
| 92 |
-
未来的、全精度的 .bin 文件。
|
| 93 |
-
"""
|
| 94 |
-
# 加载描述 fp32 布局的索引表
|
| 95 |
-
with open(self.index_table_path, 'r') as f:
|
| 96 |
-
index_table = json.load(f)
|
| 97 |
-
|
| 98 |
-
# 加载 ONNX 模型结构
|
| 99 |
-
model = onnx.load_model(self.vits_onnx_path, load_external_data=False)
|
| 100 |
-
|
| 101 |
-
# 这个 ONNX 模型将要链接的 .bin 文件名
|
| 102 |
-
reconstructed_bin_filename = os.path.basename(self.reconstructed_fp32_bin_path)
|
| 103 |
-
|
| 104 |
-
for tensor in model.graph.initializer:
|
| 105 |
-
if tensor.name in index_table:
|
| 106 |
-
tensor.ClearField('raw_data')
|
| 107 |
-
tensor.data_location = onnx.TensorProto.EXTERNAL
|
| 108 |
-
info = index_table[tensor.name]
|
| 109 |
-
|
| 110 |
-
del tensor.external_data[:]
|
| 111 |
-
|
| 112 |
-
keys = ["location", "offset", "length"]
|
| 113 |
-
values = [reconstructed_bin_filename, str(info['offset']), str(info['length'])]
|
| 114 |
-
|
| 115 |
-
for k, v in zip(keys, values):
|
| 116 |
-
entry = tensor.external_data.add()
|
| 117 |
-
entry.key = k
|
| 118 |
-
entry.value = v
|
| 119 |
-
|
| 120 |
-
# 保存修改后的、链接到 fp32 权重的 ONNX 模型
|
| 121 |
-
onnx.save(model, self.relinked_fp32_onnx_path)
|
| 122 |
-
|
| 123 |
-
def run_full_process(self):
|
| 124 |
-
"""
|
| 125 |
-
按顺序执行核心的转换步骤 (1 和 2)。
|
| 126 |
-
"""
|
| 127 |
-
self.step1_create_fp16_bin_and_fp32_index()
|
| 128 |
-
self.step2_relink_onnx_for_fp32()
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import onnx
|
| 3 |
+
import json
|
| 4 |
+
import os
|
| 5 |
+
from collections import OrderedDict
|
| 6 |
+
|
| 7 |
+
from ..load_state_dict import load_sovits_model
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class PromptEncoderConverter:
|
| 11 |
+
"""
|
| 12 |
+
一个转换器,用于从 PyTorch 模型创建:
|
| 13 |
+
1. 一个用于分发的半精度 (fp16) .bin 权重文件。
|
| 14 |
+
2. 一个与全精度 (fp32) 布局兼容的 ONNX 模型。
|
| 15 |
+
3. 一个可以将 fp16 .bin 文件还原为 fp32 .bin 的工具函数。
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
def __init__(self,
|
| 19 |
+
torch_pth_path: str,
|
| 20 |
+
prompt_encoder_onnx_path: str,
|
| 21 |
+
key_list_file: str,
|
| 22 |
+
output_dir: str,
|
| 23 |
+
cache_dir: str,
|
| 24 |
+
):
|
| 25 |
+
self.torch_pth_path: str = torch_pth_path
|
| 26 |
+
self.vits_onnx_path: str = prompt_encoder_onnx_path
|
| 27 |
+
self.key_list_file: str = key_list_file
|
| 28 |
+
self.output_dir: str = output_dir
|
| 29 |
+
self.cache_dir: str = cache_dir
|
| 30 |
+
# 定义输出文件路径
|
| 31 |
+
self.fp16_bin_path: str = os.path.join(self.output_dir, "prompt_encoder_fp16.bin")
|
| 32 |
+
self.index_table_path: str = os.path.join(self.cache_dir, "prompt_encoder_weights_index_fp32.json")
|
| 33 |
+
self.relinked_fp32_onnx_path: str = os.path.join(self.output_dir, "prompt_encoder_fp32.onnx")
|
| 34 |
+
self.reconstructed_fp32_bin_path: str = os.path.join(self.output_dir, "prompt_encoder_fp32.bin")
|
| 35 |
+
|
| 36 |
+
# 确保输出目录存在
|
| 37 |
+
os.makedirs(self.cache_dir, exist_ok=True)
|
| 38 |
+
os.makedirs(self.output_dir, exist_ok=True)
|
| 39 |
+
|
| 40 |
+
if not os.path.exists(self.key_list_file):
|
| 41 |
+
raise FileNotFoundError(f"错误: Key 列表文件未找到! 路径: {self.key_list_file}")
|
| 42 |
+
|
| 43 |
+
def step1_create_fp16_bin_and_fp32_index(self):
|
| 44 |
+
"""
|
| 45 |
+
(1) 创建一个半精度 (fp16) 的 .bin 文件,但生成一个
|
| 46 |
+
描述全精度 (fp32) 布局的索引表。
|
| 47 |
+
"""
|
| 48 |
+
# 加载 key 列表
|
| 49 |
+
with open(self.key_list_file, 'r') as f:
|
| 50 |
+
onnx_keys = [line.strip() for line in f.readlines()]
|
| 51 |
+
|
| 52 |
+
# 加载 PyTorch 模型权重
|
| 53 |
+
torch_state_dict = load_sovits_model(self.torch_pth_path)['weight']
|
| 54 |
+
|
| 55 |
+
index_table = OrderedDict()
|
| 56 |
+
# 这个偏移量将按照 fp32 的大小进行累加
|
| 57 |
+
current_fp32_offset = 0
|
| 58 |
+
|
| 59 |
+
with open(self.fp16_bin_path, 'wb') as f_bin:
|
| 60 |
+
for onnx_key in onnx_keys:
|
| 61 |
+
torch_key = onnx_key[len("vq_model."):] if onnx_key.startswith("vq_model.") else onnx_key
|
| 62 |
+
|
| 63 |
+
torch_tensor = torch_state_dict.get(torch_key)
|
| 64 |
+
if torch_tensor is None:
|
| 65 |
+
raise ValueError(f"❌ 严重错误: 在 PyTorch 权重中找不到 Key '{torch_key}'")
|
| 66 |
+
|
| 67 |
+
# 转换为 fp16 并写入文件
|
| 68 |
+
torch_tensor_fp16 = torch_tensor.to(torch.float16)
|
| 69 |
+
numpy_array_fp16 = torch_tensor_fp16.cpu().numpy()
|
| 70 |
+
tensor_bytes_fp16 = numpy_array_fp16.tobytes()
|
| 71 |
+
f_bin.write(tensor_bytes_fp16)
|
| 72 |
+
|
| 73 |
+
# 关键步骤:计算并记录 fp32 的长度和偏移量
|
| 74 |
+
# 一个 fp32 = 4 字节, 一个 fp16 = 2 字节。所以 fp32 长度是 fp16 的两倍。
|
| 75 |
+
tensor_length_fp32 = len(tensor_bytes_fp16) * 2
|
| 76 |
+
|
| 77 |
+
index_table[onnx_key] = {
|
| 78 |
+
'offset': current_fp32_offset,
|
| 79 |
+
'length': tensor_length_fp32
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
# 偏移量也按照 fp32 的长度进行累加
|
| 83 |
+
current_fp32_offset += tensor_length_fp32
|
| 84 |
+
|
| 85 |
+
# 保存描述 fp32 布局的索引表
|
| 86 |
+
with open(self.index_table_path, 'w') as f_json:
|
| 87 |
+
json.dump(index_table, f_json, indent=4) # type: ignore
|
| 88 |
+
|
| 89 |
+
def step2_relink_onnx_for_fp32(self):
|
| 90 |
+
"""
|
| 91 |
+
(2) 根据 fp32 索引表,修改 ONNX 模型,使其链接到一个
|
| 92 |
+
未来的、全精度的 .bin 文件。
|
| 93 |
+
"""
|
| 94 |
+
# 加载描述 fp32 布局的索引表
|
| 95 |
+
with open(self.index_table_path, 'r') as f:
|
| 96 |
+
index_table = json.load(f)
|
| 97 |
+
|
| 98 |
+
# 加载 ONNX 模型结构
|
| 99 |
+
model = onnx.load_model(self.vits_onnx_path, load_external_data=False)
|
| 100 |
+
|
| 101 |
+
# 这个 ONNX 模型将要链接的 .bin 文件名
|
| 102 |
+
reconstructed_bin_filename = os.path.basename(self.reconstructed_fp32_bin_path)
|
| 103 |
+
|
| 104 |
+
for tensor in model.graph.initializer:
|
| 105 |
+
if tensor.name in index_table:
|
| 106 |
+
tensor.ClearField('raw_data')
|
| 107 |
+
tensor.data_location = onnx.TensorProto.EXTERNAL
|
| 108 |
+
info = index_table[tensor.name]
|
| 109 |
+
|
| 110 |
+
del tensor.external_data[:]
|
| 111 |
+
|
| 112 |
+
keys = ["location", "offset", "length"]
|
| 113 |
+
values = [reconstructed_bin_filename, str(info['offset']), str(info['length'])]
|
| 114 |
+
|
| 115 |
+
for k, v in zip(keys, values):
|
| 116 |
+
entry = tensor.external_data.add()
|
| 117 |
+
entry.key = k
|
| 118 |
+
entry.value = v
|
| 119 |
+
|
| 120 |
+
# 保存修改后的、链接到 fp32 权重的 ONNX 模型
|
| 121 |
+
onnx.save(model, self.relinked_fp32_onnx_path)
|
| 122 |
+
|
| 123 |
+
def run_full_process(self):
|
| 124 |
+
"""
|
| 125 |
+
按顺序执行核心的转换步骤 (1 和 2)。
|
| 126 |
+
"""
|
| 127 |
+
self.step1_create_fp16_bin_and_fp32_index()
|
| 128 |
+
self.step2_relink_onnx_for_fp32()
|
genie_tts/Converter/v2ProPlus/__pycache__/Converter.cpython-311.pyc
ADDED
|
Binary file (5.34 kB). View file
|
|
|
genie_tts/Converter/v2ProPlus/__pycache__/PromptEncoderConverter.cpython-311.pyc
ADDED
|
Binary file (7.51 kB). View file
|
|
|
genie_tts/Core/Resources.py
CHANGED
|
@@ -1,76 +1,76 @@
|
|
| 1 |
-
import os
|
| 2 |
-
from huggingface_hub import snapshot_download
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
def download_genie_data() -> None:
|
| 6 |
-
print(f"🚀 Starting download Genie-TTS resources… This may take a few moments. ⏳")
|
| 7 |
-
snapshot_download(
|
| 8 |
-
repo_id="High-Logic/Genie",
|
| 9 |
-
repo_type="model",
|
| 10 |
-
allow_patterns="GenieData/*",
|
| 11 |
-
local_dir=".",
|
| 12 |
-
local_dir_use_symlinks=True, # 软链接
|
| 13 |
-
)
|
| 14 |
-
print("✅ Genie-TTS resources downloaded successfully.")
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
def ensure_exists(path: str, name: str):
|
| 18 |
-
if not os.path.exists(path):
|
| 19 |
-
raise FileNotFoundError(
|
| 20 |
-
f"Required directory or file '{name}' was not found at: {path}\n"
|
| 21 |
-
f"Please download the pretrained models and place them under './GenieData', "
|
| 22 |
-
f"or set the environment variable GENIE_DATA_DIR to the correct directory."
|
| 23 |
-
)
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
"""
|
| 27 |
-
文件结构与项目 Midori 同步。
|
| 28 |
-
"""
|
| 29 |
-
|
| 30 |
-
GENIE_DATA_DIR: str = os.getenv(
|
| 31 |
-
"GENIE_DATA_DIR",
|
| 32 |
-
"./GenieData"
|
| 33 |
-
)
|
| 34 |
-
|
| 35 |
-
"""
|
| 36 |
-
Japanese_G2P_DIR: str = os.getenv(
|
| 37 |
-
"Japanese_G2P_DIR",
|
| 38 |
-
f"{GENIE_DATA_DIR}/G2P/JapaneseG2P"
|
| 39 |
-
)
|
| 40 |
-
"""
|
| 41 |
-
|
| 42 |
-
English_G2P_DIR: str = os.getenv(
|
| 43 |
-
"English_G2P_DIR",
|
| 44 |
-
f"{GENIE_DATA_DIR}/G2P/EnglishG2P"
|
| 45 |
-
)
|
| 46 |
-
|
| 47 |
-
Chinese_G2P_DIR: str = os.getenv(
|
| 48 |
-
"Chinese_G2P_DIR",
|
| 49 |
-
f"{GENIE_DATA_DIR}/G2P/ChineseG2P"
|
| 50 |
-
)
|
| 51 |
-
|
| 52 |
-
HUBERT_MODEL_DIR: str = os.getenv(
|
| 53 |
-
"HUBERT_MODEL_DIR",
|
| 54 |
-
f"{GENIE_DATA_DIR}/chinese-hubert-base"
|
| 55 |
-
)
|
| 56 |
-
|
| 57 |
-
SV_MODEL: str = os.getenv(
|
| 58 |
-
"SV_MODEL",
|
| 59 |
-
f"{GENIE_DATA_DIR}/speaker_encoder.onnx"
|
| 60 |
-
)
|
| 61 |
-
|
| 62 |
-
ROBERTA_MODEL_DIR: str = os.getenv(
|
| 63 |
-
"ROBERTA_MODEL_DIR",
|
| 64 |
-
f"{GENIE_DATA_DIR}/RoBERTa"
|
| 65 |
-
)
|
| 66 |
-
|
| 67 |
-
if not os.path.exists(GENIE_DATA_DIR):
|
| 68 |
-
print("⚠️ GenieData folder not found.")
|
| 69 |
-
choice = input("Would you like to download it automatically from HuggingFace? (y/N): ").strip().lower()
|
| 70 |
-
if choice == "y":
|
| 71 |
-
download_genie_data()
|
| 72 |
-
|
| 73 |
-
# ---- Run directory checks ----
|
| 74 |
-
ensure_exists(HUBERT_MODEL_DIR, "HUBERT_MODEL_DIR")
|
| 75 |
-
ensure_exists(SV_MODEL, "SV_MODEL")
|
| 76 |
-
# ensure_exists(ROBERTA_MODEL_DIR, "ROBERTA_MODEL_DIR")
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from huggingface_hub import snapshot_download
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def download_genie_data() -> None:
|
| 6 |
+
print(f"🚀 Starting download Genie-TTS resources… This may take a few moments. ⏳")
|
| 7 |
+
snapshot_download(
|
| 8 |
+
repo_id="High-Logic/Genie",
|
| 9 |
+
repo_type="model",
|
| 10 |
+
allow_patterns="GenieData/*",
|
| 11 |
+
local_dir=".",
|
| 12 |
+
local_dir_use_symlinks=True, # 软链接
|
| 13 |
+
)
|
| 14 |
+
print("✅ Genie-TTS resources downloaded successfully.")
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def ensure_exists(path: str, name: str):
|
| 18 |
+
if not os.path.exists(path):
|
| 19 |
+
raise FileNotFoundError(
|
| 20 |
+
f"Required directory or file '{name}' was not found at: {path}\n"
|
| 21 |
+
f"Please download the pretrained models and place them under './GenieData', "
|
| 22 |
+
f"or set the environment variable GENIE_DATA_DIR to the correct directory."
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
"""
|
| 27 |
+
文件结构与项目 Midori 同步。
|
| 28 |
+
"""
|
| 29 |
+
|
| 30 |
+
GENIE_DATA_DIR: str = os.getenv(
|
| 31 |
+
"GENIE_DATA_DIR",
|
| 32 |
+
"./GenieData"
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
"""
|
| 36 |
+
Japanese_G2P_DIR: str = os.getenv(
|
| 37 |
+
"Japanese_G2P_DIR",
|
| 38 |
+
f"{GENIE_DATA_DIR}/G2P/JapaneseG2P"
|
| 39 |
+
)
|
| 40 |
+
"""
|
| 41 |
+
|
| 42 |
+
English_G2P_DIR: str = os.getenv(
|
| 43 |
+
"English_G2P_DIR",
|
| 44 |
+
f"{GENIE_DATA_DIR}/G2P/EnglishG2P"
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
Chinese_G2P_DIR: str = os.getenv(
|
| 48 |
+
"Chinese_G2P_DIR",
|
| 49 |
+
f"{GENIE_DATA_DIR}/G2P/ChineseG2P"
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
HUBERT_MODEL_DIR: str = os.getenv(
|
| 53 |
+
"HUBERT_MODEL_DIR",
|
| 54 |
+
f"{GENIE_DATA_DIR}/chinese-hubert-base"
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
SV_MODEL: str = os.getenv(
|
| 58 |
+
"SV_MODEL",
|
| 59 |
+
f"{GENIE_DATA_DIR}/speaker_encoder.onnx"
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
ROBERTA_MODEL_DIR: str = os.getenv(
|
| 63 |
+
"ROBERTA_MODEL_DIR",
|
| 64 |
+
f"{GENIE_DATA_DIR}/RoBERTa"
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
if not os.path.exists(GENIE_DATA_DIR):
|
| 68 |
+
print("⚠️ GenieData folder not found.")
|
| 69 |
+
choice = input("Would you like to download it automatically from HuggingFace? (y/N): ").strip().lower()
|
| 70 |
+
if choice == "y":
|
| 71 |
+
download_genie_data()
|
| 72 |
+
|
| 73 |
+
# ---- Run directory checks ----
|
| 74 |
+
ensure_exists(HUBERT_MODEL_DIR, "HUBERT_MODEL_DIR")
|
| 75 |
+
ensure_exists(SV_MODEL, "SV_MODEL")
|
| 76 |
+
# ensure_exists(ROBERTA_MODEL_DIR, "ROBERTA_MODEL_DIR")
|
genie_tts/Core/__pycache__/Inference.cpython-311.pyc
ADDED
|
Binary file (4.79 kB). View file
|
|
|
genie_tts/Core/__pycache__/Resources.cpython-311.pyc
ADDED
|
Binary file (2.89 kB). View file
|
|
|
genie_tts/Core/__pycache__/TTSPlayer.cpython-311.pyc
ADDED
|
Binary file (15 kB). View file
|
|
|
genie_tts/Core/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (183 Bytes). View file
|
|
|
genie_tts/Data/v2/Keys/t2s_onnx_keys.txt
CHANGED
|
@@ -1,291 +1,291 @@
|
|
| 1 |
-
ar_audio_embedding.word_embeddings.weight
|
| 2 |
-
ar_audio_position.alpha
|
| 3 |
-
transformer_encoder.layers.0.self_attn.in_proj_weight
|
| 4 |
-
transformer_encoder.layers.0.self_attn.in_proj_bias
|
| 5 |
-
transformer_encoder.layers.0.self_attn.out_proj.weight
|
| 6 |
-
transformer_encoder.layers.0.self_attn.out_proj.bias
|
| 7 |
-
transformer_encoder.layers.0.linear1.weight
|
| 8 |
-
transformer_encoder.layers.0.linear1.bias
|
| 9 |
-
transformer_encoder.layers.0.linear2.weight
|
| 10 |
-
transformer_encoder.layers.0.linear2.bias
|
| 11 |
-
transformer_encoder.layers.0.norm1.weight
|
| 12 |
-
transformer_encoder.layers.0.norm1.bias
|
| 13 |
-
transformer_encoder.layers.0.norm2.weight
|
| 14 |
-
transformer_encoder.layers.0.norm2.bias
|
| 15 |
-
transformer_encoder.layers.1.self_attn.in_proj_weight
|
| 16 |
-
transformer_encoder.layers.1.self_attn.in_proj_bias
|
| 17 |
-
transformer_encoder.layers.1.self_attn.out_proj.weight
|
| 18 |
-
transformer_encoder.layers.1.self_attn.out_proj.bias
|
| 19 |
-
transformer_encoder.layers.1.linear1.weight
|
| 20 |
-
transformer_encoder.layers.1.linear1.bias
|
| 21 |
-
transformer_encoder.layers.1.linear2.weight
|
| 22 |
-
transformer_encoder.layers.1.linear2.bias
|
| 23 |
-
transformer_encoder.layers.1.norm1.weight
|
| 24 |
-
transformer_encoder.layers.1.norm1.bias
|
| 25 |
-
transformer_encoder.layers.1.norm2.weight
|
| 26 |
-
transformer_encoder.layers.1.norm2.bias
|
| 27 |
-
transformer_encoder.layers.2.self_attn.in_proj_weight
|
| 28 |
-
transformer_encoder.layers.2.self_attn.in_proj_bias
|
| 29 |
-
transformer_encoder.layers.2.self_attn.out_proj.weight
|
| 30 |
-
transformer_encoder.layers.2.self_attn.out_proj.bias
|
| 31 |
-
transformer_encoder.layers.2.linear1.weight
|
| 32 |
-
transformer_encoder.layers.2.linear1.bias
|
| 33 |
-
transformer_encoder.layers.2.linear2.weight
|
| 34 |
-
transformer_encoder.layers.2.linear2.bias
|
| 35 |
-
transformer_encoder.layers.2.norm1.weight
|
| 36 |
-
transformer_encoder.layers.2.norm1.bias
|
| 37 |
-
transformer_encoder.layers.2.norm2.weight
|
| 38 |
-
transformer_encoder.layers.2.norm2.bias
|
| 39 |
-
transformer_encoder.layers.3.self_attn.in_proj_weight
|
| 40 |
-
transformer_encoder.layers.3.self_attn.in_proj_bias
|
| 41 |
-
transformer_encoder.layers.3.self_attn.out_proj.weight
|
| 42 |
-
transformer_encoder.layers.3.self_attn.out_proj.bias
|
| 43 |
-
transformer_encoder.layers.3.linear1.weight
|
| 44 |
-
transformer_encoder.layers.3.linear1.bias
|
| 45 |
-
transformer_encoder.layers.3.linear2.weight
|
| 46 |
-
transformer_encoder.layers.3.linear2.bias
|
| 47 |
-
transformer_encoder.layers.3.norm1.weight
|
| 48 |
-
transformer_encoder.layers.3.norm1.bias
|
| 49 |
-
transformer_encoder.layers.3.norm2.weight
|
| 50 |
-
transformer_encoder.layers.3.norm2.bias
|
| 51 |
-
transformer_encoder.layers.4.self_attn.in_proj_weight
|
| 52 |
-
transformer_encoder.layers.4.self_attn.in_proj_bias
|
| 53 |
-
transformer_encoder.layers.4.self_attn.out_proj.weight
|
| 54 |
-
transformer_encoder.layers.4.self_attn.out_proj.bias
|
| 55 |
-
transformer_encoder.layers.4.linear1.weight
|
| 56 |
-
transformer_encoder.layers.4.linear1.bias
|
| 57 |
-
transformer_encoder.layers.4.linear2.weight
|
| 58 |
-
transformer_encoder.layers.4.linear2.bias
|
| 59 |
-
transformer_encoder.layers.4.norm1.weight
|
| 60 |
-
transformer_encoder.layers.4.norm1.bias
|
| 61 |
-
transformer_encoder.layers.4.norm2.weight
|
| 62 |
-
transformer_encoder.layers.4.norm2.bias
|
| 63 |
-
transformer_encoder.layers.5.self_attn.in_proj_weight
|
| 64 |
-
transformer_encoder.layers.5.self_attn.in_proj_bias
|
| 65 |
-
transformer_encoder.layers.5.self_attn.out_proj.weight
|
| 66 |
-
transformer_encoder.layers.5.self_attn.out_proj.bias
|
| 67 |
-
transformer_encoder.layers.5.linear1.weight
|
| 68 |
-
transformer_encoder.layers.5.linear1.bias
|
| 69 |
-
transformer_encoder.layers.5.linear2.weight
|
| 70 |
-
transformer_encoder.layers.5.linear2.bias
|
| 71 |
-
transformer_encoder.layers.5.norm1.weight
|
| 72 |
-
transformer_encoder.layers.5.norm1.bias
|
| 73 |
-
transformer_encoder.layers.5.norm2.weight
|
| 74 |
-
transformer_encoder.layers.5.norm2.bias
|
| 75 |
-
transformer_encoder.layers.6.self_attn.in_proj_weight
|
| 76 |
-
transformer_encoder.layers.6.self_attn.in_proj_bias
|
| 77 |
-
transformer_encoder.layers.6.self_attn.out_proj.weight
|
| 78 |
-
transformer_encoder.layers.6.self_attn.out_proj.bias
|
| 79 |
-
transformer_encoder.layers.6.linear1.weight
|
| 80 |
-
transformer_encoder.layers.6.linear1.bias
|
| 81 |
-
transformer_encoder.layers.6.linear2.weight
|
| 82 |
-
transformer_encoder.layers.6.linear2.bias
|
| 83 |
-
transformer_encoder.layers.6.norm1.weight
|
| 84 |
-
transformer_encoder.layers.6.norm1.bias
|
| 85 |
-
transformer_encoder.layers.6.norm2.weight
|
| 86 |
-
transformer_encoder.layers.6.norm2.bias
|
| 87 |
-
transformer_encoder.layers.7.self_attn.in_proj_weight
|
| 88 |
-
transformer_encoder.layers.7.self_attn.in_proj_bias
|
| 89 |
-
transformer_encoder.layers.7.self_attn.out_proj.weight
|
| 90 |
-
transformer_encoder.layers.7.self_attn.out_proj.bias
|
| 91 |
-
transformer_encoder.layers.7.linear1.weight
|
| 92 |
-
transformer_encoder.layers.7.linear1.bias
|
| 93 |
-
transformer_encoder.layers.7.linear2.weight
|
| 94 |
-
transformer_encoder.layers.7.linear2.bias
|
| 95 |
-
transformer_encoder.layers.7.norm1.weight
|
| 96 |
-
transformer_encoder.layers.7.norm1.bias
|
| 97 |
-
transformer_encoder.layers.7.norm2.weight
|
| 98 |
-
transformer_encoder.layers.7.norm2.bias
|
| 99 |
-
transformer_encoder.layers.8.self_attn.in_proj_weight
|
| 100 |
-
transformer_encoder.layers.8.self_attn.in_proj_bias
|
| 101 |
-
transformer_encoder.layers.8.self_attn.out_proj.weight
|
| 102 |
-
transformer_encoder.layers.8.self_attn.out_proj.bias
|
| 103 |
-
transformer_encoder.layers.8.linear1.weight
|
| 104 |
-
transformer_encoder.layers.8.linear1.bias
|
| 105 |
-
transformer_encoder.layers.8.linear2.weight
|
| 106 |
-
transformer_encoder.layers.8.linear2.bias
|
| 107 |
-
transformer_encoder.layers.8.norm1.weight
|
| 108 |
-
transformer_encoder.layers.8.norm1.bias
|
| 109 |
-
transformer_encoder.layers.8.norm2.weight
|
| 110 |
-
transformer_encoder.layers.8.norm2.bias
|
| 111 |
-
transformer_encoder.layers.9.self_attn.in_proj_weight
|
| 112 |
-
transformer_encoder.layers.9.self_attn.in_proj_bias
|
| 113 |
-
transformer_encoder.layers.9.self_attn.out_proj.weight
|
| 114 |
-
transformer_encoder.layers.9.self_attn.out_proj.bias
|
| 115 |
-
transformer_encoder.layers.9.linear1.weight
|
| 116 |
-
transformer_encoder.layers.9.linear1.bias
|
| 117 |
-
transformer_encoder.layers.9.linear2.weight
|
| 118 |
-
transformer_encoder.layers.9.linear2.bias
|
| 119 |
-
transformer_encoder.layers.9.norm1.weight
|
| 120 |
-
transformer_encoder.layers.9.norm1.bias
|
| 121 |
-
transformer_encoder.layers.9.norm2.weight
|
| 122 |
-
transformer_encoder.layers.9.norm2.bias
|
| 123 |
-
transformer_encoder.layers.10.self_attn.in_proj_weight
|
| 124 |
-
transformer_encoder.layers.10.self_attn.in_proj_bias
|
| 125 |
-
transformer_encoder.layers.10.self_attn.out_proj.weight
|
| 126 |
-
transformer_encoder.layers.10.self_attn.out_proj.bias
|
| 127 |
-
transformer_encoder.layers.10.linear1.weight
|
| 128 |
-
transformer_encoder.layers.10.linear1.bias
|
| 129 |
-
transformer_encoder.layers.10.linear2.weight
|
| 130 |
-
transformer_encoder.layers.10.linear2.bias
|
| 131 |
-
transformer_encoder.layers.10.norm1.weight
|
| 132 |
-
transformer_encoder.layers.10.norm1.bias
|
| 133 |
-
transformer_encoder.layers.10.norm2.weight
|
| 134 |
-
transformer_encoder.layers.10.norm2.bias
|
| 135 |
-
transformer_encoder.layers.11.self_attn.in_proj_weight
|
| 136 |
-
transformer_encoder.layers.11.self_attn.in_proj_bias
|
| 137 |
-
transformer_encoder.layers.11.self_attn.out_proj.weight
|
| 138 |
-
transformer_encoder.layers.11.self_attn.out_proj.bias
|
| 139 |
-
transformer_encoder.layers.11.linear1.weight
|
| 140 |
-
transformer_encoder.layers.11.linear1.bias
|
| 141 |
-
transformer_encoder.layers.11.linear2.weight
|
| 142 |
-
transformer_encoder.layers.11.linear2.bias
|
| 143 |
-
transformer_encoder.layers.11.norm1.weight
|
| 144 |
-
transformer_encoder.layers.11.norm1.bias
|
| 145 |
-
transformer_encoder.layers.11.norm2.weight
|
| 146 |
-
transformer_encoder.layers.11.norm2.bias
|
| 147 |
-
transformer_encoder.layers.12.self_attn.in_proj_weight
|
| 148 |
-
transformer_encoder.layers.12.self_attn.in_proj_bias
|
| 149 |
-
transformer_encoder.layers.12.self_attn.out_proj.weight
|
| 150 |
-
transformer_encoder.layers.12.self_attn.out_proj.bias
|
| 151 |
-
transformer_encoder.layers.12.linear1.weight
|
| 152 |
-
transformer_encoder.layers.12.linear1.bias
|
| 153 |
-
transformer_encoder.layers.12.linear2.weight
|
| 154 |
-
transformer_encoder.layers.12.linear2.bias
|
| 155 |
-
transformer_encoder.layers.12.norm1.weight
|
| 156 |
-
transformer_encoder.layers.12.norm1.bias
|
| 157 |
-
transformer_encoder.layers.12.norm2.weight
|
| 158 |
-
transformer_encoder.layers.12.norm2.bias
|
| 159 |
-
transformer_encoder.layers.13.self_attn.in_proj_weight
|
| 160 |
-
transformer_encoder.layers.13.self_attn.in_proj_bias
|
| 161 |
-
transformer_encoder.layers.13.self_attn.out_proj.weight
|
| 162 |
-
transformer_encoder.layers.13.self_attn.out_proj.bias
|
| 163 |
-
transformer_encoder.layers.13.linear1.weight
|
| 164 |
-
transformer_encoder.layers.13.linear1.bias
|
| 165 |
-
transformer_encoder.layers.13.linear2.weight
|
| 166 |
-
transformer_encoder.layers.13.linear2.bias
|
| 167 |
-
transformer_encoder.layers.13.norm1.weight
|
| 168 |
-
transformer_encoder.layers.13.norm1.bias
|
| 169 |
-
transformer_encoder.layers.13.norm2.weight
|
| 170 |
-
transformer_encoder.layers.13.norm2.bias
|
| 171 |
-
transformer_encoder.layers.14.self_attn.in_proj_weight
|
| 172 |
-
transformer_encoder.layers.14.self_attn.in_proj_bias
|
| 173 |
-
transformer_encoder.layers.14.self_attn.out_proj.weight
|
| 174 |
-
transformer_encoder.layers.14.self_attn.out_proj.bias
|
| 175 |
-
transformer_encoder.layers.14.linear1.weight
|
| 176 |
-
transformer_encoder.layers.14.linear1.bias
|
| 177 |
-
transformer_encoder.layers.14.linear2.weight
|
| 178 |
-
transformer_encoder.layers.14.linear2.bias
|
| 179 |
-
transformer_encoder.layers.14.norm1.weight
|
| 180 |
-
transformer_encoder.layers.14.norm1.bias
|
| 181 |
-
transformer_encoder.layers.14.norm2.weight
|
| 182 |
-
transformer_encoder.layers.14.norm2.bias
|
| 183 |
-
transformer_encoder.layers.15.self_attn.in_proj_weight
|
| 184 |
-
transformer_encoder.layers.15.self_attn.in_proj_bias
|
| 185 |
-
transformer_encoder.layers.15.self_attn.out_proj.weight
|
| 186 |
-
transformer_encoder.layers.15.self_attn.out_proj.bias
|
| 187 |
-
transformer_encoder.layers.15.linear1.weight
|
| 188 |
-
transformer_encoder.layers.15.linear1.bias
|
| 189 |
-
transformer_encoder.layers.15.linear2.weight
|
| 190 |
-
transformer_encoder.layers.15.linear2.bias
|
| 191 |
-
transformer_encoder.layers.15.norm1.weight
|
| 192 |
-
transformer_encoder.layers.15.norm1.bias
|
| 193 |
-
transformer_encoder.layers.15.norm2.weight
|
| 194 |
-
transformer_encoder.layers.15.norm2.bias
|
| 195 |
-
transformer_encoder.layers.16.self_attn.in_proj_weight
|
| 196 |
-
transformer_encoder.layers.16.self_attn.in_proj_bias
|
| 197 |
-
transformer_encoder.layers.16.self_attn.out_proj.weight
|
| 198 |
-
transformer_encoder.layers.16.self_attn.out_proj.bias
|
| 199 |
-
transformer_encoder.layers.16.linear1.weight
|
| 200 |
-
transformer_encoder.layers.16.linear1.bias
|
| 201 |
-
transformer_encoder.layers.16.linear2.weight
|
| 202 |
-
transformer_encoder.layers.16.linear2.bias
|
| 203 |
-
transformer_encoder.layers.16.norm1.weight
|
| 204 |
-
transformer_encoder.layers.16.norm1.bias
|
| 205 |
-
transformer_encoder.layers.16.norm2.weight
|
| 206 |
-
transformer_encoder.layers.16.norm2.bias
|
| 207 |
-
transformer_encoder.layers.17.self_attn.in_proj_weight
|
| 208 |
-
transformer_encoder.layers.17.self_attn.in_proj_bias
|
| 209 |
-
transformer_encoder.layers.17.self_attn.out_proj.weight
|
| 210 |
-
transformer_encoder.layers.17.self_attn.out_proj.bias
|
| 211 |
-
transformer_encoder.layers.17.linear1.weight
|
| 212 |
-
transformer_encoder.layers.17.linear1.bias
|
| 213 |
-
transformer_encoder.layers.17.linear2.weight
|
| 214 |
-
transformer_encoder.layers.17.linear2.bias
|
| 215 |
-
transformer_encoder.layers.17.norm1.weight
|
| 216 |
-
transformer_encoder.layers.17.norm1.bias
|
| 217 |
-
transformer_encoder.layers.17.norm2.weight
|
| 218 |
-
transformer_encoder.layers.17.norm2.bias
|
| 219 |
-
transformer_encoder.layers.18.self_attn.in_proj_weight
|
| 220 |
-
transformer_encoder.layers.18.self_attn.in_proj_bias
|
| 221 |
-
transformer_encoder.layers.18.self_attn.out_proj.weight
|
| 222 |
-
transformer_encoder.layers.18.self_attn.out_proj.bias
|
| 223 |
-
transformer_encoder.layers.18.linear1.weight
|
| 224 |
-
transformer_encoder.layers.18.linear1.bias
|
| 225 |
-
transformer_encoder.layers.18.linear2.weight
|
| 226 |
-
transformer_encoder.layers.18.linear2.bias
|
| 227 |
-
transformer_encoder.layers.18.norm1.weight
|
| 228 |
-
transformer_encoder.layers.18.norm1.bias
|
| 229 |
-
transformer_encoder.layers.18.norm2.weight
|
| 230 |
-
transformer_encoder.layers.18.norm2.bias
|
| 231 |
-
transformer_encoder.layers.19.self_attn.in_proj_weight
|
| 232 |
-
transformer_encoder.layers.19.self_attn.in_proj_bias
|
| 233 |
-
transformer_encoder.layers.19.self_attn.out_proj.weight
|
| 234 |
-
transformer_encoder.layers.19.self_attn.out_proj.bias
|
| 235 |
-
transformer_encoder.layers.19.linear1.weight
|
| 236 |
-
transformer_encoder.layers.19.linear1.bias
|
| 237 |
-
transformer_encoder.layers.19.linear2.weight
|
| 238 |
-
transformer_encoder.layers.19.linear2.bias
|
| 239 |
-
transformer_encoder.layers.19.norm1.weight
|
| 240 |
-
transformer_encoder.layers.19.norm1.bias
|
| 241 |
-
transformer_encoder.layers.19.norm2.weight
|
| 242 |
-
transformer_encoder.layers.19.norm2.bias
|
| 243 |
-
transformer_encoder.layers.20.self_attn.in_proj_weight
|
| 244 |
-
transformer_encoder.layers.20.self_attn.in_proj_bias
|
| 245 |
-
transformer_encoder.layers.20.self_attn.out_proj.weight
|
| 246 |
-
transformer_encoder.layers.20.self_attn.out_proj.bias
|
| 247 |
-
transformer_encoder.layers.20.linear1.weight
|
| 248 |
-
transformer_encoder.layers.20.linear1.bias
|
| 249 |
-
transformer_encoder.layers.20.linear2.weight
|
| 250 |
-
transformer_encoder.layers.20.linear2.bias
|
| 251 |
-
transformer_encoder.layers.20.norm1.weight
|
| 252 |
-
transformer_encoder.layers.20.norm1.bias
|
| 253 |
-
transformer_encoder.layers.20.norm2.weight
|
| 254 |
-
transformer_encoder.layers.20.norm2.bias
|
| 255 |
-
transformer_encoder.layers.21.self_attn.in_proj_weight
|
| 256 |
-
transformer_encoder.layers.21.self_attn.in_proj_bias
|
| 257 |
-
transformer_encoder.layers.21.self_attn.out_proj.weight
|
| 258 |
-
transformer_encoder.layers.21.self_attn.out_proj.bias
|
| 259 |
-
transformer_encoder.layers.21.linear1.weight
|
| 260 |
-
transformer_encoder.layers.21.linear1.bias
|
| 261 |
-
transformer_encoder.layers.21.linear2.weight
|
| 262 |
-
transformer_encoder.layers.21.linear2.bias
|
| 263 |
-
transformer_encoder.layers.21.norm1.weight
|
| 264 |
-
transformer_encoder.layers.21.norm1.bias
|
| 265 |
-
transformer_encoder.layers.21.norm2.weight
|
| 266 |
-
transformer_encoder.layers.21.norm2.bias
|
| 267 |
-
transformer_encoder.layers.22.self_attn.in_proj_weight
|
| 268 |
-
transformer_encoder.layers.22.self_attn.in_proj_bias
|
| 269 |
-
transformer_encoder.layers.22.self_attn.out_proj.weight
|
| 270 |
-
transformer_encoder.layers.22.self_attn.out_proj.bias
|
| 271 |
-
transformer_encoder.layers.22.linear1.weight
|
| 272 |
-
transformer_encoder.layers.22.linear1.bias
|
| 273 |
-
transformer_encoder.layers.22.linear2.weight
|
| 274 |
-
transformer_encoder.layers.22.linear2.bias
|
| 275 |
-
transformer_encoder.layers.22.norm1.weight
|
| 276 |
-
transformer_encoder.layers.22.norm1.bias
|
| 277 |
-
transformer_encoder.layers.22.norm2.weight
|
| 278 |
-
transformer_encoder.layers.22.norm2.bias
|
| 279 |
-
transformer_encoder.layers.23.self_attn.in_proj_weight
|
| 280 |
-
transformer_encoder.layers.23.self_attn.in_proj_bias
|
| 281 |
-
transformer_encoder.layers.23.self_attn.out_proj.weight
|
| 282 |
-
transformer_encoder.layers.23.self_attn.out_proj.bias
|
| 283 |
-
transformer_encoder.layers.23.linear1.weight
|
| 284 |
-
transformer_encoder.layers.23.linear1.bias
|
| 285 |
-
transformer_encoder.layers.23.linear2.weight
|
| 286 |
-
transformer_encoder.layers.23.linear2.bias
|
| 287 |
-
transformer_encoder.layers.23.norm1.weight
|
| 288 |
-
transformer_encoder.layers.23.norm1.bias
|
| 289 |
-
transformer_encoder.layers.23.norm2.weight
|
| 290 |
-
transformer_encoder.layers.23.norm2.bias
|
| 291 |
-
ar_predict_layer.weight
|
|
|
|
| 1 |
+
ar_audio_embedding.word_embeddings.weight
|
| 2 |
+
ar_audio_position.alpha
|
| 3 |
+
transformer_encoder.layers.0.self_attn.in_proj_weight
|
| 4 |
+
transformer_encoder.layers.0.self_attn.in_proj_bias
|
| 5 |
+
transformer_encoder.layers.0.self_attn.out_proj.weight
|
| 6 |
+
transformer_encoder.layers.0.self_attn.out_proj.bias
|
| 7 |
+
transformer_encoder.layers.0.linear1.weight
|
| 8 |
+
transformer_encoder.layers.0.linear1.bias
|
| 9 |
+
transformer_encoder.layers.0.linear2.weight
|
| 10 |
+
transformer_encoder.layers.0.linear2.bias
|
| 11 |
+
transformer_encoder.layers.0.norm1.weight
|
| 12 |
+
transformer_encoder.layers.0.norm1.bias
|
| 13 |
+
transformer_encoder.layers.0.norm2.weight
|
| 14 |
+
transformer_encoder.layers.0.norm2.bias
|
| 15 |
+
transformer_encoder.layers.1.self_attn.in_proj_weight
|
| 16 |
+
transformer_encoder.layers.1.self_attn.in_proj_bias
|
| 17 |
+
transformer_encoder.layers.1.self_attn.out_proj.weight
|
| 18 |
+
transformer_encoder.layers.1.self_attn.out_proj.bias
|
| 19 |
+
transformer_encoder.layers.1.linear1.weight
|
| 20 |
+
transformer_encoder.layers.1.linear1.bias
|
| 21 |
+
transformer_encoder.layers.1.linear2.weight
|
| 22 |
+
transformer_encoder.layers.1.linear2.bias
|
| 23 |
+
transformer_encoder.layers.1.norm1.weight
|
| 24 |
+
transformer_encoder.layers.1.norm1.bias
|
| 25 |
+
transformer_encoder.layers.1.norm2.weight
|
| 26 |
+
transformer_encoder.layers.1.norm2.bias
|
| 27 |
+
transformer_encoder.layers.2.self_attn.in_proj_weight
|
| 28 |
+
transformer_encoder.layers.2.self_attn.in_proj_bias
|
| 29 |
+
transformer_encoder.layers.2.self_attn.out_proj.weight
|
| 30 |
+
transformer_encoder.layers.2.self_attn.out_proj.bias
|
| 31 |
+
transformer_encoder.layers.2.linear1.weight
|
| 32 |
+
transformer_encoder.layers.2.linear1.bias
|
| 33 |
+
transformer_encoder.layers.2.linear2.weight
|
| 34 |
+
transformer_encoder.layers.2.linear2.bias
|
| 35 |
+
transformer_encoder.layers.2.norm1.weight
|
| 36 |
+
transformer_encoder.layers.2.norm1.bias
|
| 37 |
+
transformer_encoder.layers.2.norm2.weight
|
| 38 |
+
transformer_encoder.layers.2.norm2.bias
|
| 39 |
+
transformer_encoder.layers.3.self_attn.in_proj_weight
|
| 40 |
+
transformer_encoder.layers.3.self_attn.in_proj_bias
|
| 41 |
+
transformer_encoder.layers.3.self_attn.out_proj.weight
|
| 42 |
+
transformer_encoder.layers.3.self_attn.out_proj.bias
|
| 43 |
+
transformer_encoder.layers.3.linear1.weight
|
| 44 |
+
transformer_encoder.layers.3.linear1.bias
|
| 45 |
+
transformer_encoder.layers.3.linear2.weight
|
| 46 |
+
transformer_encoder.layers.3.linear2.bias
|
| 47 |
+
transformer_encoder.layers.3.norm1.weight
|
| 48 |
+
transformer_encoder.layers.3.norm1.bias
|
| 49 |
+
transformer_encoder.layers.3.norm2.weight
|
| 50 |
+
transformer_encoder.layers.3.norm2.bias
|
| 51 |
+
transformer_encoder.layers.4.self_attn.in_proj_weight
|
| 52 |
+
transformer_encoder.layers.4.self_attn.in_proj_bias
|
| 53 |
+
transformer_encoder.layers.4.self_attn.out_proj.weight
|
| 54 |
+
transformer_encoder.layers.4.self_attn.out_proj.bias
|
| 55 |
+
transformer_encoder.layers.4.linear1.weight
|
| 56 |
+
transformer_encoder.layers.4.linear1.bias
|
| 57 |
+
transformer_encoder.layers.4.linear2.weight
|
| 58 |
+
transformer_encoder.layers.4.linear2.bias
|
| 59 |
+
transformer_encoder.layers.4.norm1.weight
|
| 60 |
+
transformer_encoder.layers.4.norm1.bias
|
| 61 |
+
transformer_encoder.layers.4.norm2.weight
|
| 62 |
+
transformer_encoder.layers.4.norm2.bias
|
| 63 |
+
transformer_encoder.layers.5.self_attn.in_proj_weight
|
| 64 |
+
transformer_encoder.layers.5.self_attn.in_proj_bias
|
| 65 |
+
transformer_encoder.layers.5.self_attn.out_proj.weight
|
| 66 |
+
transformer_encoder.layers.5.self_attn.out_proj.bias
|
| 67 |
+
transformer_encoder.layers.5.linear1.weight
|
| 68 |
+
transformer_encoder.layers.5.linear1.bias
|
| 69 |
+
transformer_encoder.layers.5.linear2.weight
|
| 70 |
+
transformer_encoder.layers.5.linear2.bias
|
| 71 |
+
transformer_encoder.layers.5.norm1.weight
|
| 72 |
+
transformer_encoder.layers.5.norm1.bias
|
| 73 |
+
transformer_encoder.layers.5.norm2.weight
|
| 74 |
+
transformer_encoder.layers.5.norm2.bias
|
| 75 |
+
transformer_encoder.layers.6.self_attn.in_proj_weight
|
| 76 |
+
transformer_encoder.layers.6.self_attn.in_proj_bias
|
| 77 |
+
transformer_encoder.layers.6.self_attn.out_proj.weight
|
| 78 |
+
transformer_encoder.layers.6.self_attn.out_proj.bias
|
| 79 |
+
transformer_encoder.layers.6.linear1.weight
|
| 80 |
+
transformer_encoder.layers.6.linear1.bias
|
| 81 |
+
transformer_encoder.layers.6.linear2.weight
|
| 82 |
+
transformer_encoder.layers.6.linear2.bias
|
| 83 |
+
transformer_encoder.layers.6.norm1.weight
|
| 84 |
+
transformer_encoder.layers.6.norm1.bias
|
| 85 |
+
transformer_encoder.layers.6.norm2.weight
|
| 86 |
+
transformer_encoder.layers.6.norm2.bias
|
| 87 |
+
transformer_encoder.layers.7.self_attn.in_proj_weight
|
| 88 |
+
transformer_encoder.layers.7.self_attn.in_proj_bias
|
| 89 |
+
transformer_encoder.layers.7.self_attn.out_proj.weight
|
| 90 |
+
transformer_encoder.layers.7.self_attn.out_proj.bias
|
| 91 |
+
transformer_encoder.layers.7.linear1.weight
|
| 92 |
+
transformer_encoder.layers.7.linear1.bias
|
| 93 |
+
transformer_encoder.layers.7.linear2.weight
|
| 94 |
+
transformer_encoder.layers.7.linear2.bias
|
| 95 |
+
transformer_encoder.layers.7.norm1.weight
|
| 96 |
+
transformer_encoder.layers.7.norm1.bias
|
| 97 |
+
transformer_encoder.layers.7.norm2.weight
|
| 98 |
+
transformer_encoder.layers.7.norm2.bias
|
| 99 |
+
transformer_encoder.layers.8.self_attn.in_proj_weight
|
| 100 |
+
transformer_encoder.layers.8.self_attn.in_proj_bias
|
| 101 |
+
transformer_encoder.layers.8.self_attn.out_proj.weight
|
| 102 |
+
transformer_encoder.layers.8.self_attn.out_proj.bias
|
| 103 |
+
transformer_encoder.layers.8.linear1.weight
|
| 104 |
+
transformer_encoder.layers.8.linear1.bias
|
| 105 |
+
transformer_encoder.layers.8.linear2.weight
|
| 106 |
+
transformer_encoder.layers.8.linear2.bias
|
| 107 |
+
transformer_encoder.layers.8.norm1.weight
|
| 108 |
+
transformer_encoder.layers.8.norm1.bias
|
| 109 |
+
transformer_encoder.layers.8.norm2.weight
|
| 110 |
+
transformer_encoder.layers.8.norm2.bias
|
| 111 |
+
transformer_encoder.layers.9.self_attn.in_proj_weight
|
| 112 |
+
transformer_encoder.layers.9.self_attn.in_proj_bias
|
| 113 |
+
transformer_encoder.layers.9.self_attn.out_proj.weight
|
| 114 |
+
transformer_encoder.layers.9.self_attn.out_proj.bias
|
| 115 |
+
transformer_encoder.layers.9.linear1.weight
|
| 116 |
+
transformer_encoder.layers.9.linear1.bias
|
| 117 |
+
transformer_encoder.layers.9.linear2.weight
|
| 118 |
+
transformer_encoder.layers.9.linear2.bias
|
| 119 |
+
transformer_encoder.layers.9.norm1.weight
|
| 120 |
+
transformer_encoder.layers.9.norm1.bias
|
| 121 |
+
transformer_encoder.layers.9.norm2.weight
|
| 122 |
+
transformer_encoder.layers.9.norm2.bias
|
| 123 |
+
transformer_encoder.layers.10.self_attn.in_proj_weight
|
| 124 |
+
transformer_encoder.layers.10.self_attn.in_proj_bias
|
| 125 |
+
transformer_encoder.layers.10.self_attn.out_proj.weight
|
| 126 |
+
transformer_encoder.layers.10.self_attn.out_proj.bias
|
| 127 |
+
transformer_encoder.layers.10.linear1.weight
|
| 128 |
+
transformer_encoder.layers.10.linear1.bias
|
| 129 |
+
transformer_encoder.layers.10.linear2.weight
|
| 130 |
+
transformer_encoder.layers.10.linear2.bias
|
| 131 |
+
transformer_encoder.layers.10.norm1.weight
|
| 132 |
+
transformer_encoder.layers.10.norm1.bias
|
| 133 |
+
transformer_encoder.layers.10.norm2.weight
|
| 134 |
+
transformer_encoder.layers.10.norm2.bias
|
| 135 |
+
transformer_encoder.layers.11.self_attn.in_proj_weight
|
| 136 |
+
transformer_encoder.layers.11.self_attn.in_proj_bias
|
| 137 |
+
transformer_encoder.layers.11.self_attn.out_proj.weight
|
| 138 |
+
transformer_encoder.layers.11.self_attn.out_proj.bias
|
| 139 |
+
transformer_encoder.layers.11.linear1.weight
|
| 140 |
+
transformer_encoder.layers.11.linear1.bias
|
| 141 |
+
transformer_encoder.layers.11.linear2.weight
|
| 142 |
+
transformer_encoder.layers.11.linear2.bias
|
| 143 |
+
transformer_encoder.layers.11.norm1.weight
|
| 144 |
+
transformer_encoder.layers.11.norm1.bias
|
| 145 |
+
transformer_encoder.layers.11.norm2.weight
|
| 146 |
+
transformer_encoder.layers.11.norm2.bias
|
| 147 |
+
transformer_encoder.layers.12.self_attn.in_proj_weight
|
| 148 |
+
transformer_encoder.layers.12.self_attn.in_proj_bias
|
| 149 |
+
transformer_encoder.layers.12.self_attn.out_proj.weight
|
| 150 |
+
transformer_encoder.layers.12.self_attn.out_proj.bias
|
| 151 |
+
transformer_encoder.layers.12.linear1.weight
|
| 152 |
+
transformer_encoder.layers.12.linear1.bias
|
| 153 |
+
transformer_encoder.layers.12.linear2.weight
|
| 154 |
+
transformer_encoder.layers.12.linear2.bias
|
| 155 |
+
transformer_encoder.layers.12.norm1.weight
|
| 156 |
+
transformer_encoder.layers.12.norm1.bias
|
| 157 |
+
transformer_encoder.layers.12.norm2.weight
|
| 158 |
+
transformer_encoder.layers.12.norm2.bias
|
| 159 |
+
transformer_encoder.layers.13.self_attn.in_proj_weight
|
| 160 |
+
transformer_encoder.layers.13.self_attn.in_proj_bias
|
| 161 |
+
transformer_encoder.layers.13.self_attn.out_proj.weight
|
| 162 |
+
transformer_encoder.layers.13.self_attn.out_proj.bias
|
| 163 |
+
transformer_encoder.layers.13.linear1.weight
|
| 164 |
+
transformer_encoder.layers.13.linear1.bias
|
| 165 |
+
transformer_encoder.layers.13.linear2.weight
|
| 166 |
+
transformer_encoder.layers.13.linear2.bias
|
| 167 |
+
transformer_encoder.layers.13.norm1.weight
|
| 168 |
+
transformer_encoder.layers.13.norm1.bias
|
| 169 |
+
transformer_encoder.layers.13.norm2.weight
|
| 170 |
+
transformer_encoder.layers.13.norm2.bias
|
| 171 |
+
transformer_encoder.layers.14.self_attn.in_proj_weight
|
| 172 |
+
transformer_encoder.layers.14.self_attn.in_proj_bias
|
| 173 |
+
transformer_encoder.layers.14.self_attn.out_proj.weight
|
| 174 |
+
transformer_encoder.layers.14.self_attn.out_proj.bias
|
| 175 |
+
transformer_encoder.layers.14.linear1.weight
|
| 176 |
+
transformer_encoder.layers.14.linear1.bias
|
| 177 |
+
transformer_encoder.layers.14.linear2.weight
|
| 178 |
+
transformer_encoder.layers.14.linear2.bias
|
| 179 |
+
transformer_encoder.layers.14.norm1.weight
|
| 180 |
+
transformer_encoder.layers.14.norm1.bias
|
| 181 |
+
transformer_encoder.layers.14.norm2.weight
|
| 182 |
+
transformer_encoder.layers.14.norm2.bias
|
| 183 |
+
transformer_encoder.layers.15.self_attn.in_proj_weight
|
| 184 |
+
transformer_encoder.layers.15.self_attn.in_proj_bias
|
| 185 |
+
transformer_encoder.layers.15.self_attn.out_proj.weight
|
| 186 |
+
transformer_encoder.layers.15.self_attn.out_proj.bias
|
| 187 |
+
transformer_encoder.layers.15.linear1.weight
|
| 188 |
+
transformer_encoder.layers.15.linear1.bias
|
| 189 |
+
transformer_encoder.layers.15.linear2.weight
|
| 190 |
+
transformer_encoder.layers.15.linear2.bias
|
| 191 |
+
transformer_encoder.layers.15.norm1.weight
|
| 192 |
+
transformer_encoder.layers.15.norm1.bias
|
| 193 |
+
transformer_encoder.layers.15.norm2.weight
|
| 194 |
+
transformer_encoder.layers.15.norm2.bias
|
| 195 |
+
transformer_encoder.layers.16.self_attn.in_proj_weight
|
| 196 |
+
transformer_encoder.layers.16.self_attn.in_proj_bias
|
| 197 |
+
transformer_encoder.layers.16.self_attn.out_proj.weight
|
| 198 |
+
transformer_encoder.layers.16.self_attn.out_proj.bias
|
| 199 |
+
transformer_encoder.layers.16.linear1.weight
|
| 200 |
+
transformer_encoder.layers.16.linear1.bias
|
| 201 |
+
transformer_encoder.layers.16.linear2.weight
|
| 202 |
+
transformer_encoder.layers.16.linear2.bias
|
| 203 |
+
transformer_encoder.layers.16.norm1.weight
|
| 204 |
+
transformer_encoder.layers.16.norm1.bias
|
| 205 |
+
transformer_encoder.layers.16.norm2.weight
|
| 206 |
+
transformer_encoder.layers.16.norm2.bias
|
| 207 |
+
transformer_encoder.layers.17.self_attn.in_proj_weight
|
| 208 |
+
transformer_encoder.layers.17.self_attn.in_proj_bias
|
| 209 |
+
transformer_encoder.layers.17.self_attn.out_proj.weight
|
| 210 |
+
transformer_encoder.layers.17.self_attn.out_proj.bias
|
| 211 |
+
transformer_encoder.layers.17.linear1.weight
|
| 212 |
+
transformer_encoder.layers.17.linear1.bias
|
| 213 |
+
transformer_encoder.layers.17.linear2.weight
|
| 214 |
+
transformer_encoder.layers.17.linear2.bias
|
| 215 |
+
transformer_encoder.layers.17.norm1.weight
|
| 216 |
+
transformer_encoder.layers.17.norm1.bias
|
| 217 |
+
transformer_encoder.layers.17.norm2.weight
|
| 218 |
+
transformer_encoder.layers.17.norm2.bias
|
| 219 |
+
transformer_encoder.layers.18.self_attn.in_proj_weight
|
| 220 |
+
transformer_encoder.layers.18.self_attn.in_proj_bias
|
| 221 |
+
transformer_encoder.layers.18.self_attn.out_proj.weight
|
| 222 |
+
transformer_encoder.layers.18.self_attn.out_proj.bias
|
| 223 |
+
transformer_encoder.layers.18.linear1.weight
|
| 224 |
+
transformer_encoder.layers.18.linear1.bias
|
| 225 |
+
transformer_encoder.layers.18.linear2.weight
|
| 226 |
+
transformer_encoder.layers.18.linear2.bias
|
| 227 |
+
transformer_encoder.layers.18.norm1.weight
|
| 228 |
+
transformer_encoder.layers.18.norm1.bias
|
| 229 |
+
transformer_encoder.layers.18.norm2.weight
|
| 230 |
+
transformer_encoder.layers.18.norm2.bias
|
| 231 |
+
transformer_encoder.layers.19.self_attn.in_proj_weight
|
| 232 |
+
transformer_encoder.layers.19.self_attn.in_proj_bias
|
| 233 |
+
transformer_encoder.layers.19.self_attn.out_proj.weight
|
| 234 |
+
transformer_encoder.layers.19.self_attn.out_proj.bias
|
| 235 |
+
transformer_encoder.layers.19.linear1.weight
|
| 236 |
+
transformer_encoder.layers.19.linear1.bias
|
| 237 |
+
transformer_encoder.layers.19.linear2.weight
|
| 238 |
+
transformer_encoder.layers.19.linear2.bias
|
| 239 |
+
transformer_encoder.layers.19.norm1.weight
|
| 240 |
+
transformer_encoder.layers.19.norm1.bias
|
| 241 |
+
transformer_encoder.layers.19.norm2.weight
|
| 242 |
+
transformer_encoder.layers.19.norm2.bias
|
| 243 |
+
transformer_encoder.layers.20.self_attn.in_proj_weight
|
| 244 |
+
transformer_encoder.layers.20.self_attn.in_proj_bias
|
| 245 |
+
transformer_encoder.layers.20.self_attn.out_proj.weight
|
| 246 |
+
transformer_encoder.layers.20.self_attn.out_proj.bias
|
| 247 |
+
transformer_encoder.layers.20.linear1.weight
|
| 248 |
+
transformer_encoder.layers.20.linear1.bias
|
| 249 |
+
transformer_encoder.layers.20.linear2.weight
|
| 250 |
+
transformer_encoder.layers.20.linear2.bias
|
| 251 |
+
transformer_encoder.layers.20.norm1.weight
|
| 252 |
+
transformer_encoder.layers.20.norm1.bias
|
| 253 |
+
transformer_encoder.layers.20.norm2.weight
|
| 254 |
+
transformer_encoder.layers.20.norm2.bias
|
| 255 |
+
transformer_encoder.layers.21.self_attn.in_proj_weight
|
| 256 |
+
transformer_encoder.layers.21.self_attn.in_proj_bias
|
| 257 |
+
transformer_encoder.layers.21.self_attn.out_proj.weight
|
| 258 |
+
transformer_encoder.layers.21.self_attn.out_proj.bias
|
| 259 |
+
transformer_encoder.layers.21.linear1.weight
|
| 260 |
+
transformer_encoder.layers.21.linear1.bias
|
| 261 |
+
transformer_encoder.layers.21.linear2.weight
|
| 262 |
+
transformer_encoder.layers.21.linear2.bias
|
| 263 |
+
transformer_encoder.layers.21.norm1.weight
|
| 264 |
+
transformer_encoder.layers.21.norm1.bias
|
| 265 |
+
transformer_encoder.layers.21.norm2.weight
|
| 266 |
+
transformer_encoder.layers.21.norm2.bias
|
| 267 |
+
transformer_encoder.layers.22.self_attn.in_proj_weight
|
| 268 |
+
transformer_encoder.layers.22.self_attn.in_proj_bias
|
| 269 |
+
transformer_encoder.layers.22.self_attn.out_proj.weight
|
| 270 |
+
transformer_encoder.layers.22.self_attn.out_proj.bias
|
| 271 |
+
transformer_encoder.layers.22.linear1.weight
|
| 272 |
+
transformer_encoder.layers.22.linear1.bias
|
| 273 |
+
transformer_encoder.layers.22.linear2.weight
|
| 274 |
+
transformer_encoder.layers.22.linear2.bias
|
| 275 |
+
transformer_encoder.layers.22.norm1.weight
|
| 276 |
+
transformer_encoder.layers.22.norm1.bias
|
| 277 |
+
transformer_encoder.layers.22.norm2.weight
|
| 278 |
+
transformer_encoder.layers.22.norm2.bias
|
| 279 |
+
transformer_encoder.layers.23.self_attn.in_proj_weight
|
| 280 |
+
transformer_encoder.layers.23.self_attn.in_proj_bias
|
| 281 |
+
transformer_encoder.layers.23.self_attn.out_proj.weight
|
| 282 |
+
transformer_encoder.layers.23.self_attn.out_proj.bias
|
| 283 |
+
transformer_encoder.layers.23.linear1.weight
|
| 284 |
+
transformer_encoder.layers.23.linear1.bias
|
| 285 |
+
transformer_encoder.layers.23.linear2.weight
|
| 286 |
+
transformer_encoder.layers.23.linear2.bias
|
| 287 |
+
transformer_encoder.layers.23.norm1.weight
|
| 288 |
+
transformer_encoder.layers.23.norm1.bias
|
| 289 |
+
transformer_encoder.layers.23.norm2.weight
|
| 290 |
+
transformer_encoder.layers.23.norm2.bias
|
| 291 |
+
ar_predict_layer.weight
|
genie_tts/Data/v2/Keys/vits_onnx_keys.txt
CHANGED
|
@@ -1,668 +1,668 @@
|
|
| 1 |
-
vq_model.dec.cond.bias
|
| 2 |
-
vq_model.dec.cond.weight
|
| 3 |
-
vq_model.dec.conv_post.weight
|
| 4 |
-
vq_model.dec.conv_pre.bias
|
| 5 |
-
vq_model.dec.conv_pre.weight
|
| 6 |
-
vq_model.dec.resblocks.0.convs1.0.bias
|
| 7 |
-
vq_model.dec.resblocks.0.convs1.0.weight_g
|
| 8 |
-
vq_model.dec.resblocks.0.convs1.0.weight_v
|
| 9 |
-
vq_model.dec.resblocks.0.convs1.1.bias
|
| 10 |
-
vq_model.dec.resblocks.0.convs1.1.weight_g
|
| 11 |
-
vq_model.dec.resblocks.0.convs1.1.weight_v
|
| 12 |
-
vq_model.dec.resblocks.0.convs1.2.bias
|
| 13 |
-
vq_model.dec.resblocks.0.convs1.2.weight_g
|
| 14 |
-
vq_model.dec.resblocks.0.convs1.2.weight_v
|
| 15 |
-
vq_model.dec.resblocks.0.convs2.0.bias
|
| 16 |
-
vq_model.dec.resblocks.0.convs2.0.weight_g
|
| 17 |
-
vq_model.dec.resblocks.0.convs2.0.weight_v
|
| 18 |
-
vq_model.dec.resblocks.0.convs2.1.bias
|
| 19 |
-
vq_model.dec.resblocks.0.convs2.1.weight_g
|
| 20 |
-
vq_model.dec.resblocks.0.convs2.1.weight_v
|
| 21 |
-
vq_model.dec.resblocks.0.convs2.2.bias
|
| 22 |
-
vq_model.dec.resblocks.0.convs2.2.weight_g
|
| 23 |
-
vq_model.dec.resblocks.0.convs2.2.weight_v
|
| 24 |
-
vq_model.dec.resblocks.1.convs1.0.bias
|
| 25 |
-
vq_model.dec.resblocks.1.convs1.0.weight_g
|
| 26 |
-
vq_model.dec.resblocks.1.convs1.0.weight_v
|
| 27 |
-
vq_model.dec.resblocks.1.convs1.1.bias
|
| 28 |
-
vq_model.dec.resblocks.1.convs1.1.weight_g
|
| 29 |
-
vq_model.dec.resblocks.1.convs1.1.weight_v
|
| 30 |
-
vq_model.dec.resblocks.1.convs1.2.bias
|
| 31 |
-
vq_model.dec.resblocks.1.convs1.2.weight_g
|
| 32 |
-
vq_model.dec.resblocks.1.convs1.2.weight_v
|
| 33 |
-
vq_model.dec.resblocks.1.convs2.0.bias
|
| 34 |
-
vq_model.dec.resblocks.1.convs2.0.weight_g
|
| 35 |
-
vq_model.dec.resblocks.1.convs2.0.weight_v
|
| 36 |
-
vq_model.dec.resblocks.1.convs2.1.bias
|
| 37 |
-
vq_model.dec.resblocks.1.convs2.1.weight_g
|
| 38 |
-
vq_model.dec.resblocks.1.convs2.1.weight_v
|
| 39 |
-
vq_model.dec.resblocks.1.convs2.2.bias
|
| 40 |
-
vq_model.dec.resblocks.1.convs2.2.weight_g
|
| 41 |
-
vq_model.dec.resblocks.1.convs2.2.weight_v
|
| 42 |
-
vq_model.dec.resblocks.10.convs1.0.bias
|
| 43 |
-
vq_model.dec.resblocks.10.convs1.0.weight_g
|
| 44 |
-
vq_model.dec.resblocks.10.convs1.0.weight_v
|
| 45 |
-
vq_model.dec.resblocks.10.convs1.1.bias
|
| 46 |
-
vq_model.dec.resblocks.10.convs1.1.weight_g
|
| 47 |
-
vq_model.dec.resblocks.10.convs1.1.weight_v
|
| 48 |
-
vq_model.dec.resblocks.10.convs1.2.bias
|
| 49 |
-
vq_model.dec.resblocks.10.convs1.2.weight_g
|
| 50 |
-
vq_model.dec.resblocks.10.convs1.2.weight_v
|
| 51 |
-
vq_model.dec.resblocks.10.convs2.0.bias
|
| 52 |
-
vq_model.dec.resblocks.10.convs2.0.weight_g
|
| 53 |
-
vq_model.dec.resblocks.10.convs2.0.weight_v
|
| 54 |
-
vq_model.dec.resblocks.10.convs2.1.bias
|
| 55 |
-
vq_model.dec.resblocks.10.convs2.1.weight_g
|
| 56 |
-
vq_model.dec.resblocks.10.convs2.1.weight_v
|
| 57 |
-
vq_model.dec.resblocks.10.convs2.2.bias
|
| 58 |
-
vq_model.dec.resblocks.10.convs2.2.weight_g
|
| 59 |
-
vq_model.dec.resblocks.10.convs2.2.weight_v
|
| 60 |
-
vq_model.dec.resblocks.11.convs1.0.bias
|
| 61 |
-
vq_model.dec.resblocks.11.convs1.0.weight_g
|
| 62 |
-
vq_model.dec.resblocks.11.convs1.0.weight_v
|
| 63 |
-
vq_model.dec.resblocks.11.convs1.1.bias
|
| 64 |
-
vq_model.dec.resblocks.11.convs1.1.weight_g
|
| 65 |
-
vq_model.dec.resblocks.11.convs1.1.weight_v
|
| 66 |
-
vq_model.dec.resblocks.11.convs1.2.bias
|
| 67 |
-
vq_model.dec.resblocks.11.convs1.2.weight_g
|
| 68 |
-
vq_model.dec.resblocks.11.convs1.2.weight_v
|
| 69 |
-
vq_model.dec.resblocks.11.convs2.0.bias
|
| 70 |
-
vq_model.dec.resblocks.11.convs2.0.weight_g
|
| 71 |
-
vq_model.dec.resblocks.11.convs2.0.weight_v
|
| 72 |
-
vq_model.dec.resblocks.11.convs2.1.bias
|
| 73 |
-
vq_model.dec.resblocks.11.convs2.1.weight_g
|
| 74 |
-
vq_model.dec.resblocks.11.convs2.1.weight_v
|
| 75 |
-
vq_model.dec.resblocks.11.convs2.2.bias
|
| 76 |
-
vq_model.dec.resblocks.11.convs2.2.weight_g
|
| 77 |
-
vq_model.dec.resblocks.11.convs2.2.weight_v
|
| 78 |
-
vq_model.dec.resblocks.12.convs1.0.bias
|
| 79 |
-
vq_model.dec.resblocks.12.convs1.0.weight_g
|
| 80 |
-
vq_model.dec.resblocks.12.convs1.0.weight_v
|
| 81 |
-
vq_model.dec.resblocks.12.convs1.1.bias
|
| 82 |
-
vq_model.dec.resblocks.12.convs1.1.weight_g
|
| 83 |
-
vq_model.dec.resblocks.12.convs1.1.weight_v
|
| 84 |
-
vq_model.dec.resblocks.12.convs1.2.bias
|
| 85 |
-
vq_model.dec.resblocks.12.convs1.2.weight_g
|
| 86 |
-
vq_model.dec.resblocks.12.convs1.2.weight_v
|
| 87 |
-
vq_model.dec.resblocks.12.convs2.0.bias
|
| 88 |
-
vq_model.dec.resblocks.12.convs2.0.weight_g
|
| 89 |
-
vq_model.dec.resblocks.12.convs2.0.weight_v
|
| 90 |
-
vq_model.dec.resblocks.12.convs2.1.bias
|
| 91 |
-
vq_model.dec.resblocks.12.convs2.1.weight_g
|
| 92 |
-
vq_model.dec.resblocks.12.convs2.1.weight_v
|
| 93 |
-
vq_model.dec.resblocks.12.convs2.2.bias
|
| 94 |
-
vq_model.dec.resblocks.12.convs2.2.weight_g
|
| 95 |
-
vq_model.dec.resblocks.12.convs2.2.weight_v
|
| 96 |
-
vq_model.dec.resblocks.13.convs1.0.bias
|
| 97 |
-
vq_model.dec.resblocks.13.convs1.0.weight_g
|
| 98 |
-
vq_model.dec.resblocks.13.convs1.0.weight_v
|
| 99 |
-
vq_model.dec.resblocks.13.convs1.1.bias
|
| 100 |
-
vq_model.dec.resblocks.13.convs1.1.weight_g
|
| 101 |
-
vq_model.dec.resblocks.13.convs1.1.weight_v
|
| 102 |
-
vq_model.dec.resblocks.13.convs1.2.bias
|
| 103 |
-
vq_model.dec.resblocks.13.convs1.2.weight_g
|
| 104 |
-
vq_model.dec.resblocks.13.convs1.2.weight_v
|
| 105 |
-
vq_model.dec.resblocks.13.convs2.0.bias
|
| 106 |
-
vq_model.dec.resblocks.13.convs2.0.weight_g
|
| 107 |
-
vq_model.dec.resblocks.13.convs2.0.weight_v
|
| 108 |
-
vq_model.dec.resblocks.13.convs2.1.bias
|
| 109 |
-
vq_model.dec.resblocks.13.convs2.1.weight_g
|
| 110 |
-
vq_model.dec.resblocks.13.convs2.1.weight_v
|
| 111 |
-
vq_model.dec.resblocks.13.convs2.2.bias
|
| 112 |
-
vq_model.dec.resblocks.13.convs2.2.weight_g
|
| 113 |
-
vq_model.dec.resblocks.13.convs2.2.weight_v
|
| 114 |
-
vq_model.dec.resblocks.14.convs1.0.bias
|
| 115 |
-
vq_model.dec.resblocks.14.convs1.0.weight_g
|
| 116 |
-
vq_model.dec.resblocks.14.convs1.0.weight_v
|
| 117 |
-
vq_model.dec.resblocks.14.convs1.1.bias
|
| 118 |
-
vq_model.dec.resblocks.14.convs1.1.weight_g
|
| 119 |
-
vq_model.dec.resblocks.14.convs1.1.weight_v
|
| 120 |
-
vq_model.dec.resblocks.14.convs1.2.bias
|
| 121 |
-
vq_model.dec.resblocks.14.convs1.2.weight_g
|
| 122 |
-
vq_model.dec.resblocks.14.convs1.2.weight_v
|
| 123 |
-
vq_model.dec.resblocks.14.convs2.0.bias
|
| 124 |
-
vq_model.dec.resblocks.14.convs2.0.weight_g
|
| 125 |
-
vq_model.dec.resblocks.14.convs2.0.weight_v
|
| 126 |
-
vq_model.dec.resblocks.14.convs2.1.bias
|
| 127 |
-
vq_model.dec.resblocks.14.convs2.1.weight_g
|
| 128 |
-
vq_model.dec.resblocks.14.convs2.1.weight_v
|
| 129 |
-
vq_model.dec.resblocks.14.convs2.2.bias
|
| 130 |
-
vq_model.dec.resblocks.14.convs2.2.weight_g
|
| 131 |
-
vq_model.dec.resblocks.14.convs2.2.weight_v
|
| 132 |
-
vq_model.dec.resblocks.2.convs1.0.bias
|
| 133 |
-
vq_model.dec.resblocks.2.convs1.0.weight_g
|
| 134 |
-
vq_model.dec.resblocks.2.convs1.0.weight_v
|
| 135 |
-
vq_model.dec.resblocks.2.convs1.1.bias
|
| 136 |
-
vq_model.dec.resblocks.2.convs1.1.weight_g
|
| 137 |
-
vq_model.dec.resblocks.2.convs1.1.weight_v
|
| 138 |
-
vq_model.dec.resblocks.2.convs1.2.bias
|
| 139 |
-
vq_model.dec.resblocks.2.convs1.2.weight_g
|
| 140 |
-
vq_model.dec.resblocks.2.convs1.2.weight_v
|
| 141 |
-
vq_model.dec.resblocks.2.convs2.0.bias
|
| 142 |
-
vq_model.dec.resblocks.2.convs2.0.weight_g
|
| 143 |
-
vq_model.dec.resblocks.2.convs2.0.weight_v
|
| 144 |
-
vq_model.dec.resblocks.2.convs2.1.bias
|
| 145 |
-
vq_model.dec.resblocks.2.convs2.1.weight_g
|
| 146 |
-
vq_model.dec.resblocks.2.convs2.1.weight_v
|
| 147 |
-
vq_model.dec.resblocks.2.convs2.2.bias
|
| 148 |
-
vq_model.dec.resblocks.2.convs2.2.weight_g
|
| 149 |
-
vq_model.dec.resblocks.2.convs2.2.weight_v
|
| 150 |
-
vq_model.dec.resblocks.3.convs1.0.bias
|
| 151 |
-
vq_model.dec.resblocks.3.convs1.0.weight_g
|
| 152 |
-
vq_model.dec.resblocks.3.convs1.0.weight_v
|
| 153 |
-
vq_model.dec.resblocks.3.convs1.1.bias
|
| 154 |
-
vq_model.dec.resblocks.3.convs1.1.weight_g
|
| 155 |
-
vq_model.dec.resblocks.3.convs1.1.weight_v
|
| 156 |
-
vq_model.dec.resblocks.3.convs1.2.bias
|
| 157 |
-
vq_model.dec.resblocks.3.convs1.2.weight_g
|
| 158 |
-
vq_model.dec.resblocks.3.convs1.2.weight_v
|
| 159 |
-
vq_model.dec.resblocks.3.convs2.0.bias
|
| 160 |
-
vq_model.dec.resblocks.3.convs2.0.weight_g
|
| 161 |
-
vq_model.dec.resblocks.3.convs2.0.weight_v
|
| 162 |
-
vq_model.dec.resblocks.3.convs2.1.bias
|
| 163 |
-
vq_model.dec.resblocks.3.convs2.1.weight_g
|
| 164 |
-
vq_model.dec.resblocks.3.convs2.1.weight_v
|
| 165 |
-
vq_model.dec.resblocks.3.convs2.2.bias
|
| 166 |
-
vq_model.dec.resblocks.3.convs2.2.weight_g
|
| 167 |
-
vq_model.dec.resblocks.3.convs2.2.weight_v
|
| 168 |
-
vq_model.dec.resblocks.4.convs1.0.bias
|
| 169 |
-
vq_model.dec.resblocks.4.convs1.0.weight_g
|
| 170 |
-
vq_model.dec.resblocks.4.convs1.0.weight_v
|
| 171 |
-
vq_model.dec.resblocks.4.convs1.1.bias
|
| 172 |
-
vq_model.dec.resblocks.4.convs1.1.weight_g
|
| 173 |
-
vq_model.dec.resblocks.4.convs1.1.weight_v
|
| 174 |
-
vq_model.dec.resblocks.4.convs1.2.bias
|
| 175 |
-
vq_model.dec.resblocks.4.convs1.2.weight_g
|
| 176 |
-
vq_model.dec.resblocks.4.convs1.2.weight_v
|
| 177 |
-
vq_model.dec.resblocks.4.convs2.0.bias
|
| 178 |
-
vq_model.dec.resblocks.4.convs2.0.weight_g
|
| 179 |
-
vq_model.dec.resblocks.4.convs2.0.weight_v
|
| 180 |
-
vq_model.dec.resblocks.4.convs2.1.bias
|
| 181 |
-
vq_model.dec.resblocks.4.convs2.1.weight_g
|
| 182 |
-
vq_model.dec.resblocks.4.convs2.1.weight_v
|
| 183 |
-
vq_model.dec.resblocks.4.convs2.2.bias
|
| 184 |
-
vq_model.dec.resblocks.4.convs2.2.weight_g
|
| 185 |
-
vq_model.dec.resblocks.4.convs2.2.weight_v
|
| 186 |
-
vq_model.dec.resblocks.5.convs1.0.bias
|
| 187 |
-
vq_model.dec.resblocks.5.convs1.0.weight_g
|
| 188 |
-
vq_model.dec.resblocks.5.convs1.0.weight_v
|
| 189 |
-
vq_model.dec.resblocks.5.convs1.1.bias
|
| 190 |
-
vq_model.dec.resblocks.5.convs1.1.weight_g
|
| 191 |
-
vq_model.dec.resblocks.5.convs1.1.weight_v
|
| 192 |
-
vq_model.dec.resblocks.5.convs1.2.bias
|
| 193 |
-
vq_model.dec.resblocks.5.convs1.2.weight_g
|
| 194 |
-
vq_model.dec.resblocks.5.convs1.2.weight_v
|
| 195 |
-
vq_model.dec.resblocks.5.convs2.0.bias
|
| 196 |
-
vq_model.dec.resblocks.5.convs2.0.weight_g
|
| 197 |
-
vq_model.dec.resblocks.5.convs2.0.weight_v
|
| 198 |
-
vq_model.dec.resblocks.5.convs2.1.bias
|
| 199 |
-
vq_model.dec.resblocks.5.convs2.1.weight_g
|
| 200 |
-
vq_model.dec.resblocks.5.convs2.1.weight_v
|
| 201 |
-
vq_model.dec.resblocks.5.convs2.2.bias
|
| 202 |
-
vq_model.dec.resblocks.5.convs2.2.weight_g
|
| 203 |
-
vq_model.dec.resblocks.5.convs2.2.weight_v
|
| 204 |
-
vq_model.dec.resblocks.6.convs1.0.bias
|
| 205 |
-
vq_model.dec.resblocks.6.convs1.0.weight_g
|
| 206 |
-
vq_model.dec.resblocks.6.convs1.0.weight_v
|
| 207 |
-
vq_model.dec.resblocks.6.convs1.1.bias
|
| 208 |
-
vq_model.dec.resblocks.6.convs1.1.weight_g
|
| 209 |
-
vq_model.dec.resblocks.6.convs1.1.weight_v
|
| 210 |
-
vq_model.dec.resblocks.6.convs1.2.bias
|
| 211 |
-
vq_model.dec.resblocks.6.convs1.2.weight_g
|
| 212 |
-
vq_model.dec.resblocks.6.convs1.2.weight_v
|
| 213 |
-
vq_model.dec.resblocks.6.convs2.0.bias
|
| 214 |
-
vq_model.dec.resblocks.6.convs2.0.weight_g
|
| 215 |
-
vq_model.dec.resblocks.6.convs2.0.weight_v
|
| 216 |
-
vq_model.dec.resblocks.6.convs2.1.bias
|
| 217 |
-
vq_model.dec.resblocks.6.convs2.1.weight_g
|
| 218 |
-
vq_model.dec.resblocks.6.convs2.1.weight_v
|
| 219 |
-
vq_model.dec.resblocks.6.convs2.2.bias
|
| 220 |
-
vq_model.dec.resblocks.6.convs2.2.weight_g
|
| 221 |
-
vq_model.dec.resblocks.6.convs2.2.weight_v
|
| 222 |
-
vq_model.dec.resblocks.7.convs1.0.bias
|
| 223 |
-
vq_model.dec.resblocks.7.convs1.0.weight_g
|
| 224 |
-
vq_model.dec.resblocks.7.convs1.0.weight_v
|
| 225 |
-
vq_model.dec.resblocks.7.convs1.1.bias
|
| 226 |
-
vq_model.dec.resblocks.7.convs1.1.weight_g
|
| 227 |
-
vq_model.dec.resblocks.7.convs1.1.weight_v
|
| 228 |
-
vq_model.dec.resblocks.7.convs1.2.bias
|
| 229 |
-
vq_model.dec.resblocks.7.convs1.2.weight_g
|
| 230 |
-
vq_model.dec.resblocks.7.convs1.2.weight_v
|
| 231 |
-
vq_model.dec.resblocks.7.convs2.0.bias
|
| 232 |
-
vq_model.dec.resblocks.7.convs2.0.weight_g
|
| 233 |
-
vq_model.dec.resblocks.7.convs2.0.weight_v
|
| 234 |
-
vq_model.dec.resblocks.7.convs2.1.bias
|
| 235 |
-
vq_model.dec.resblocks.7.convs2.1.weight_g
|
| 236 |
-
vq_model.dec.resblocks.7.convs2.1.weight_v
|
| 237 |
-
vq_model.dec.resblocks.7.convs2.2.bias
|
| 238 |
-
vq_model.dec.resblocks.7.convs2.2.weight_g
|
| 239 |
-
vq_model.dec.resblocks.7.convs2.2.weight_v
|
| 240 |
-
vq_model.dec.resblocks.8.convs1.0.bias
|
| 241 |
-
vq_model.dec.resblocks.8.convs1.0.weight_g
|
| 242 |
-
vq_model.dec.resblocks.8.convs1.0.weight_v
|
| 243 |
-
vq_model.dec.resblocks.8.convs1.1.bias
|
| 244 |
-
vq_model.dec.resblocks.8.convs1.1.weight_g
|
| 245 |
-
vq_model.dec.resblocks.8.convs1.1.weight_v
|
| 246 |
-
vq_model.dec.resblocks.8.convs1.2.bias
|
| 247 |
-
vq_model.dec.resblocks.8.convs1.2.weight_g
|
| 248 |
-
vq_model.dec.resblocks.8.convs1.2.weight_v
|
| 249 |
-
vq_model.dec.resblocks.8.convs2.0.bias
|
| 250 |
-
vq_model.dec.resblocks.8.convs2.0.weight_g
|
| 251 |
-
vq_model.dec.resblocks.8.convs2.0.weight_v
|
| 252 |
-
vq_model.dec.resblocks.8.convs2.1.bias
|
| 253 |
-
vq_model.dec.resblocks.8.convs2.1.weight_g
|
| 254 |
-
vq_model.dec.resblocks.8.convs2.1.weight_v
|
| 255 |
-
vq_model.dec.resblocks.8.convs2.2.bias
|
| 256 |
-
vq_model.dec.resblocks.8.convs2.2.weight_g
|
| 257 |
-
vq_model.dec.resblocks.8.convs2.2.weight_v
|
| 258 |
-
vq_model.dec.resblocks.9.convs1.0.bias
|
| 259 |
-
vq_model.dec.resblocks.9.convs1.0.weight_g
|
| 260 |
-
vq_model.dec.resblocks.9.convs1.0.weight_v
|
| 261 |
-
vq_model.dec.resblocks.9.convs1.1.bias
|
| 262 |
-
vq_model.dec.resblocks.9.convs1.1.weight_g
|
| 263 |
-
vq_model.dec.resblocks.9.convs1.1.weight_v
|
| 264 |
-
vq_model.dec.resblocks.9.convs1.2.bias
|
| 265 |
-
vq_model.dec.resblocks.9.convs1.2.weight_g
|
| 266 |
-
vq_model.dec.resblocks.9.convs1.2.weight_v
|
| 267 |
-
vq_model.dec.resblocks.9.convs2.0.bias
|
| 268 |
-
vq_model.dec.resblocks.9.convs2.0.weight_g
|
| 269 |
-
vq_model.dec.resblocks.9.convs2.0.weight_v
|
| 270 |
-
vq_model.dec.resblocks.9.convs2.1.bias
|
| 271 |
-
vq_model.dec.resblocks.9.convs2.1.weight_g
|
| 272 |
-
vq_model.dec.resblocks.9.convs2.1.weight_v
|
| 273 |
-
vq_model.dec.resblocks.9.convs2.2.bias
|
| 274 |
-
vq_model.dec.resblocks.9.convs2.2.weight_g
|
| 275 |
-
vq_model.dec.resblocks.9.convs2.2.weight_v
|
| 276 |
-
vq_model.dec.ups.0.bias
|
| 277 |
-
vq_model.dec.ups.0.weight_g
|
| 278 |
-
vq_model.dec.ups.0.weight_v
|
| 279 |
-
vq_model.dec.ups.1.bias
|
| 280 |
-
vq_model.dec.ups.1.weight_g
|
| 281 |
-
vq_model.dec.ups.1.weight_v
|
| 282 |
-
vq_model.dec.ups.2.bias
|
| 283 |
-
vq_model.dec.ups.2.weight_g
|
| 284 |
-
vq_model.dec.ups.2.weight_v
|
| 285 |
-
vq_model.dec.ups.3.bias
|
| 286 |
-
vq_model.dec.ups.3.weight_g
|
| 287 |
-
vq_model.dec.ups.3.weight_v
|
| 288 |
-
vq_model.dec.ups.4.bias
|
| 289 |
-
vq_model.dec.ups.4.weight_g
|
| 290 |
-
vq_model.dec.ups.4.weight_v
|
| 291 |
-
vq_model.enc_p.encoder2.attn_layers.0.conv_k.bias
|
| 292 |
-
vq_model.enc_p.encoder2.attn_layers.0.conv_k.weight
|
| 293 |
-
vq_model.enc_p.encoder2.attn_layers.0.conv_o.bias
|
| 294 |
-
vq_model.enc_p.encoder2.attn_layers.0.conv_o.weight
|
| 295 |
-
vq_model.enc_p.encoder2.attn_layers.0.conv_q.bias
|
| 296 |
-
vq_model.enc_p.encoder2.attn_layers.0.conv_q.weight
|
| 297 |
-
vq_model.enc_p.encoder2.attn_layers.0.conv_v.bias
|
| 298 |
-
vq_model.enc_p.encoder2.attn_layers.0.conv_v.weight
|
| 299 |
-
vq_model.enc_p.encoder2.attn_layers.0.emb_rel_k
|
| 300 |
-
vq_model.enc_p.encoder2.attn_layers.0.emb_rel_v
|
| 301 |
-
vq_model.enc_p.encoder2.attn_layers.1.conv_k.bias
|
| 302 |
-
vq_model.enc_p.encoder2.attn_layers.1.conv_k.weight
|
| 303 |
-
vq_model.enc_p.encoder2.attn_layers.1.conv_o.bias
|
| 304 |
-
vq_model.enc_p.encoder2.attn_layers.1.conv_o.weight
|
| 305 |
-
vq_model.enc_p.encoder2.attn_layers.1.conv_q.bias
|
| 306 |
-
vq_model.enc_p.encoder2.attn_layers.1.conv_q.weight
|
| 307 |
-
vq_model.enc_p.encoder2.attn_layers.1.conv_v.bias
|
| 308 |
-
vq_model.enc_p.encoder2.attn_layers.1.conv_v.weight
|
| 309 |
-
vq_model.enc_p.encoder2.attn_layers.1.emb_rel_k
|
| 310 |
-
vq_model.enc_p.encoder2.attn_layers.1.emb_rel_v
|
| 311 |
-
vq_model.enc_p.encoder2.attn_layers.2.conv_k.bias
|
| 312 |
-
vq_model.enc_p.encoder2.attn_layers.2.conv_k.weight
|
| 313 |
-
vq_model.enc_p.encoder2.attn_layers.2.conv_o.bias
|
| 314 |
-
vq_model.enc_p.encoder2.attn_layers.2.conv_o.weight
|
| 315 |
-
vq_model.enc_p.encoder2.attn_layers.2.conv_q.bias
|
| 316 |
-
vq_model.enc_p.encoder2.attn_layers.2.conv_q.weight
|
| 317 |
-
vq_model.enc_p.encoder2.attn_layers.2.conv_v.bias
|
| 318 |
-
vq_model.enc_p.encoder2.attn_layers.2.conv_v.weight
|
| 319 |
-
vq_model.enc_p.encoder2.attn_layers.2.emb_rel_k
|
| 320 |
-
vq_model.enc_p.encoder2.attn_layers.2.emb_rel_v
|
| 321 |
-
vq_model.enc_p.encoder2.ffn_layers.0.conv_1.bias
|
| 322 |
-
vq_model.enc_p.encoder2.ffn_layers.0.conv_1.weight
|
| 323 |
-
vq_model.enc_p.encoder2.ffn_layers.0.conv_2.bias
|
| 324 |
-
vq_model.enc_p.encoder2.ffn_layers.0.conv_2.weight
|
| 325 |
-
vq_model.enc_p.encoder2.ffn_layers.1.conv_1.bias
|
| 326 |
-
vq_model.enc_p.encoder2.ffn_layers.1.conv_1.weight
|
| 327 |
-
vq_model.enc_p.encoder2.ffn_layers.1.conv_2.bias
|
| 328 |
-
vq_model.enc_p.encoder2.ffn_layers.1.conv_2.weight
|
| 329 |
-
vq_model.enc_p.encoder2.ffn_layers.2.conv_1.bias
|
| 330 |
-
vq_model.enc_p.encoder2.ffn_layers.2.conv_1.weight
|
| 331 |
-
vq_model.enc_p.encoder2.ffn_layers.2.conv_2.bias
|
| 332 |
-
vq_model.enc_p.encoder2.ffn_layers.2.conv_2.weight
|
| 333 |
-
vq_model.enc_p.encoder2.norm_layers_1.0.beta
|
| 334 |
-
vq_model.enc_p.encoder2.norm_layers_1.0.gamma
|
| 335 |
-
vq_model.enc_p.encoder2.norm_layers_1.1.beta
|
| 336 |
-
vq_model.enc_p.encoder2.norm_layers_1.1.gamma
|
| 337 |
-
vq_model.enc_p.encoder2.norm_layers_1.2.beta
|
| 338 |
-
vq_model.enc_p.encoder2.norm_layers_1.2.gamma
|
| 339 |
-
vq_model.enc_p.encoder2.norm_layers_2.0.beta
|
| 340 |
-
vq_model.enc_p.encoder2.norm_layers_2.0.gamma
|
| 341 |
-
vq_model.enc_p.encoder2.norm_layers_2.1.beta
|
| 342 |
-
vq_model.enc_p.encoder2.norm_layers_2.1.gamma
|
| 343 |
-
vq_model.enc_p.encoder2.norm_layers_2.2.beta
|
| 344 |
-
vq_model.enc_p.encoder2.norm_layers_2.2.gamma
|
| 345 |
-
vq_model.enc_p.encoder_ssl.attn_layers.0.conv_k.bias
|
| 346 |
-
vq_model.enc_p.encoder_ssl.attn_layers.0.conv_k.weight
|
| 347 |
-
vq_model.enc_p.encoder_ssl.attn_layers.0.conv_o.bias
|
| 348 |
-
vq_model.enc_p.encoder_ssl.attn_layers.0.conv_o.weight
|
| 349 |
-
vq_model.enc_p.encoder_ssl.attn_layers.0.conv_q.bias
|
| 350 |
-
vq_model.enc_p.encoder_ssl.attn_layers.0.conv_q.weight
|
| 351 |
-
vq_model.enc_p.encoder_ssl.attn_layers.0.conv_v.bias
|
| 352 |
-
vq_model.enc_p.encoder_ssl.attn_layers.0.conv_v.weight
|
| 353 |
-
vq_model.enc_p.encoder_ssl.attn_layers.0.emb_rel_k
|
| 354 |
-
vq_model.enc_p.encoder_ssl.attn_layers.0.emb_rel_v
|
| 355 |
-
vq_model.enc_p.encoder_ssl.attn_layers.1.conv_k.bias
|
| 356 |
-
vq_model.enc_p.encoder_ssl.attn_layers.1.conv_k.weight
|
| 357 |
-
vq_model.enc_p.encoder_ssl.attn_layers.1.conv_o.bias
|
| 358 |
-
vq_model.enc_p.encoder_ssl.attn_layers.1.conv_o.weight
|
| 359 |
-
vq_model.enc_p.encoder_ssl.attn_layers.1.conv_q.bias
|
| 360 |
-
vq_model.enc_p.encoder_ssl.attn_layers.1.conv_q.weight
|
| 361 |
-
vq_model.enc_p.encoder_ssl.attn_layers.1.conv_v.bias
|
| 362 |
-
vq_model.enc_p.encoder_ssl.attn_layers.1.conv_v.weight
|
| 363 |
-
vq_model.enc_p.encoder_ssl.attn_layers.1.emb_rel_k
|
| 364 |
-
vq_model.enc_p.encoder_ssl.attn_layers.1.emb_rel_v
|
| 365 |
-
vq_model.enc_p.encoder_ssl.attn_layers.2.conv_k.bias
|
| 366 |
-
vq_model.enc_p.encoder_ssl.attn_layers.2.conv_k.weight
|
| 367 |
-
vq_model.enc_p.encoder_ssl.attn_layers.2.conv_o.bias
|
| 368 |
-
vq_model.enc_p.encoder_ssl.attn_layers.2.conv_o.weight
|
| 369 |
-
vq_model.enc_p.encoder_ssl.attn_layers.2.conv_q.bias
|
| 370 |
-
vq_model.enc_p.encoder_ssl.attn_layers.2.conv_q.weight
|
| 371 |
-
vq_model.enc_p.encoder_ssl.attn_layers.2.conv_v.bias
|
| 372 |
-
vq_model.enc_p.encoder_ssl.attn_layers.2.conv_v.weight
|
| 373 |
-
vq_model.enc_p.encoder_ssl.attn_layers.2.emb_rel_k
|
| 374 |
-
vq_model.enc_p.encoder_ssl.attn_layers.2.emb_rel_v
|
| 375 |
-
vq_model.enc_p.encoder_ssl.ffn_layers.0.conv_1.bias
|
| 376 |
-
vq_model.enc_p.encoder_ssl.ffn_layers.0.conv_1.weight
|
| 377 |
-
vq_model.enc_p.encoder_ssl.ffn_layers.0.conv_2.bias
|
| 378 |
-
vq_model.enc_p.encoder_ssl.ffn_layers.0.conv_2.weight
|
| 379 |
-
vq_model.enc_p.encoder_ssl.ffn_layers.1.conv_1.bias
|
| 380 |
-
vq_model.enc_p.encoder_ssl.ffn_layers.1.conv_1.weight
|
| 381 |
-
vq_model.enc_p.encoder_ssl.ffn_layers.1.conv_2.bias
|
| 382 |
-
vq_model.enc_p.encoder_ssl.ffn_layers.1.conv_2.weight
|
| 383 |
-
vq_model.enc_p.encoder_ssl.ffn_layers.2.conv_1.bias
|
| 384 |
-
vq_model.enc_p.encoder_ssl.ffn_layers.2.conv_1.weight
|
| 385 |
-
vq_model.enc_p.encoder_ssl.ffn_layers.2.conv_2.bias
|
| 386 |
-
vq_model.enc_p.encoder_ssl.ffn_layers.2.conv_2.weight
|
| 387 |
-
vq_model.enc_p.encoder_ssl.norm_layers_1.0.beta
|
| 388 |
-
vq_model.enc_p.encoder_ssl.norm_layers_1.0.gamma
|
| 389 |
-
vq_model.enc_p.encoder_ssl.norm_layers_1.1.beta
|
| 390 |
-
vq_model.enc_p.encoder_ssl.norm_layers_1.1.gamma
|
| 391 |
-
vq_model.enc_p.encoder_ssl.norm_layers_1.2.beta
|
| 392 |
-
vq_model.enc_p.encoder_ssl.norm_layers_1.2.gamma
|
| 393 |
-
vq_model.enc_p.encoder_ssl.norm_layers_2.0.beta
|
| 394 |
-
vq_model.enc_p.encoder_ssl.norm_layers_2.0.gamma
|
| 395 |
-
vq_model.enc_p.encoder_ssl.norm_layers_2.1.beta
|
| 396 |
-
vq_model.enc_p.encoder_ssl.norm_layers_2.1.gamma
|
| 397 |
-
vq_model.enc_p.encoder_ssl.norm_layers_2.2.beta
|
| 398 |
-
vq_model.enc_p.encoder_ssl.norm_layers_2.2.gamma
|
| 399 |
-
vq_model.enc_p.encoder_text.attn_layers.0.conv_k.bias
|
| 400 |
-
vq_model.enc_p.encoder_text.attn_layers.0.conv_k.weight
|
| 401 |
-
vq_model.enc_p.encoder_text.attn_layers.0.conv_o.bias
|
| 402 |
-
vq_model.enc_p.encoder_text.attn_layers.0.conv_o.weight
|
| 403 |
-
vq_model.enc_p.encoder_text.attn_layers.0.conv_q.bias
|
| 404 |
-
vq_model.enc_p.encoder_text.attn_layers.0.conv_q.weight
|
| 405 |
-
vq_model.enc_p.encoder_text.attn_layers.0.conv_v.bias
|
| 406 |
-
vq_model.enc_p.encoder_text.attn_layers.0.conv_v.weight
|
| 407 |
-
vq_model.enc_p.encoder_text.attn_layers.0.emb_rel_k
|
| 408 |
-
vq_model.enc_p.encoder_text.attn_layers.0.emb_rel_v
|
| 409 |
-
vq_model.enc_p.encoder_text.attn_layers.1.conv_k.bias
|
| 410 |
-
vq_model.enc_p.encoder_text.attn_layers.1.conv_k.weight
|
| 411 |
-
vq_model.enc_p.encoder_text.attn_layers.1.conv_o.bias
|
| 412 |
-
vq_model.enc_p.encoder_text.attn_layers.1.conv_o.weight
|
| 413 |
-
vq_model.enc_p.encoder_text.attn_layers.1.conv_q.bias
|
| 414 |
-
vq_model.enc_p.encoder_text.attn_layers.1.conv_q.weight
|
| 415 |
-
vq_model.enc_p.encoder_text.attn_layers.1.conv_v.bias
|
| 416 |
-
vq_model.enc_p.encoder_text.attn_layers.1.conv_v.weight
|
| 417 |
-
vq_model.enc_p.encoder_text.attn_layers.1.emb_rel_k
|
| 418 |
-
vq_model.enc_p.encoder_text.attn_layers.1.emb_rel_v
|
| 419 |
-
vq_model.enc_p.encoder_text.attn_layers.2.conv_k.bias
|
| 420 |
-
vq_model.enc_p.encoder_text.attn_layers.2.conv_k.weight
|
| 421 |
-
vq_model.enc_p.encoder_text.attn_layers.2.conv_o.bias
|
| 422 |
-
vq_model.enc_p.encoder_text.attn_layers.2.conv_o.weight
|
| 423 |
-
vq_model.enc_p.encoder_text.attn_layers.2.conv_q.bias
|
| 424 |
-
vq_model.enc_p.encoder_text.attn_layers.2.conv_q.weight
|
| 425 |
-
vq_model.enc_p.encoder_text.attn_layers.2.conv_v.bias
|
| 426 |
-
vq_model.enc_p.encoder_text.attn_layers.2.conv_v.weight
|
| 427 |
-
vq_model.enc_p.encoder_text.attn_layers.2.emb_rel_k
|
| 428 |
-
vq_model.enc_p.encoder_text.attn_layers.2.emb_rel_v
|
| 429 |
-
vq_model.enc_p.encoder_text.attn_layers.3.conv_k.bias
|
| 430 |
-
vq_model.enc_p.encoder_text.attn_layers.3.conv_k.weight
|
| 431 |
-
vq_model.enc_p.encoder_text.attn_layers.3.conv_o.bias
|
| 432 |
-
vq_model.enc_p.encoder_text.attn_layers.3.conv_o.weight
|
| 433 |
-
vq_model.enc_p.encoder_text.attn_layers.3.conv_q.bias
|
| 434 |
-
vq_model.enc_p.encoder_text.attn_layers.3.conv_q.weight
|
| 435 |
-
vq_model.enc_p.encoder_text.attn_layers.3.conv_v.bias
|
| 436 |
-
vq_model.enc_p.encoder_text.attn_layers.3.conv_v.weight
|
| 437 |
-
vq_model.enc_p.encoder_text.attn_layers.3.emb_rel_k
|
| 438 |
-
vq_model.enc_p.encoder_text.attn_layers.3.emb_rel_v
|
| 439 |
-
vq_model.enc_p.encoder_text.attn_layers.4.conv_k.bias
|
| 440 |
-
vq_model.enc_p.encoder_text.attn_layers.4.conv_k.weight
|
| 441 |
-
vq_model.enc_p.encoder_text.attn_layers.4.conv_o.bias
|
| 442 |
-
vq_model.enc_p.encoder_text.attn_layers.4.conv_o.weight
|
| 443 |
-
vq_model.enc_p.encoder_text.attn_layers.4.conv_q.bias
|
| 444 |
-
vq_model.enc_p.encoder_text.attn_layers.4.conv_q.weight
|
| 445 |
-
vq_model.enc_p.encoder_text.attn_layers.4.conv_v.bias
|
| 446 |
-
vq_model.enc_p.encoder_text.attn_layers.4.conv_v.weight
|
| 447 |
-
vq_model.enc_p.encoder_text.attn_layers.4.emb_rel_k
|
| 448 |
-
vq_model.enc_p.encoder_text.attn_layers.4.emb_rel_v
|
| 449 |
-
vq_model.enc_p.encoder_text.attn_layers.5.conv_k.bias
|
| 450 |
-
vq_model.enc_p.encoder_text.attn_layers.5.conv_k.weight
|
| 451 |
-
vq_model.enc_p.encoder_text.attn_layers.5.conv_o.bias
|
| 452 |
-
vq_model.enc_p.encoder_text.attn_layers.5.conv_o.weight
|
| 453 |
-
vq_model.enc_p.encoder_text.attn_layers.5.conv_q.bias
|
| 454 |
-
vq_model.enc_p.encoder_text.attn_layers.5.conv_q.weight
|
| 455 |
-
vq_model.enc_p.encoder_text.attn_layers.5.conv_v.bias
|
| 456 |
-
vq_model.enc_p.encoder_text.attn_layers.5.conv_v.weight
|
| 457 |
-
vq_model.enc_p.encoder_text.attn_layers.5.emb_rel_k
|
| 458 |
-
vq_model.enc_p.encoder_text.attn_layers.5.emb_rel_v
|
| 459 |
-
vq_model.enc_p.encoder_text.ffn_layers.0.conv_1.bias
|
| 460 |
-
vq_model.enc_p.encoder_text.ffn_layers.0.conv_1.weight
|
| 461 |
-
vq_model.enc_p.encoder_text.ffn_layers.0.conv_2.bias
|
| 462 |
-
vq_model.enc_p.encoder_text.ffn_layers.0.conv_2.weight
|
| 463 |
-
vq_model.enc_p.encoder_text.ffn_layers.1.conv_1.bias
|
| 464 |
-
vq_model.enc_p.encoder_text.ffn_layers.1.conv_1.weight
|
| 465 |
-
vq_model.enc_p.encoder_text.ffn_layers.1.conv_2.bias
|
| 466 |
-
vq_model.enc_p.encoder_text.ffn_layers.1.conv_2.weight
|
| 467 |
-
vq_model.enc_p.encoder_text.ffn_layers.2.conv_1.bias
|
| 468 |
-
vq_model.enc_p.encoder_text.ffn_layers.2.conv_1.weight
|
| 469 |
-
vq_model.enc_p.encoder_text.ffn_layers.2.conv_2.bias
|
| 470 |
-
vq_model.enc_p.encoder_text.ffn_layers.2.conv_2.weight
|
| 471 |
-
vq_model.enc_p.encoder_text.ffn_layers.3.conv_1.bias
|
| 472 |
-
vq_model.enc_p.encoder_text.ffn_layers.3.conv_1.weight
|
| 473 |
-
vq_model.enc_p.encoder_text.ffn_layers.3.conv_2.bias
|
| 474 |
-
vq_model.enc_p.encoder_text.ffn_layers.3.conv_2.weight
|
| 475 |
-
vq_model.enc_p.encoder_text.ffn_layers.4.conv_1.bias
|
| 476 |
-
vq_model.enc_p.encoder_text.ffn_layers.4.conv_1.weight
|
| 477 |
-
vq_model.enc_p.encoder_text.ffn_layers.4.conv_2.bias
|
| 478 |
-
vq_model.enc_p.encoder_text.ffn_layers.4.conv_2.weight
|
| 479 |
-
vq_model.enc_p.encoder_text.ffn_layers.5.conv_1.bias
|
| 480 |
-
vq_model.enc_p.encoder_text.ffn_layers.5.conv_1.weight
|
| 481 |
-
vq_model.enc_p.encoder_text.ffn_layers.5.conv_2.bias
|
| 482 |
-
vq_model.enc_p.encoder_text.ffn_layers.5.conv_2.weight
|
| 483 |
-
vq_model.enc_p.encoder_text.norm_layers_1.0.beta
|
| 484 |
-
vq_model.enc_p.encoder_text.norm_layers_1.0.gamma
|
| 485 |
-
vq_model.enc_p.encoder_text.norm_layers_1.1.beta
|
| 486 |
-
vq_model.enc_p.encoder_text.norm_layers_1.1.gamma
|
| 487 |
-
vq_model.enc_p.encoder_text.norm_layers_1.2.beta
|
| 488 |
-
vq_model.enc_p.encoder_text.norm_layers_1.2.gamma
|
| 489 |
-
vq_model.enc_p.encoder_text.norm_layers_1.3.beta
|
| 490 |
-
vq_model.enc_p.encoder_text.norm_layers_1.3.gamma
|
| 491 |
-
vq_model.enc_p.encoder_text.norm_layers_1.4.beta
|
| 492 |
-
vq_model.enc_p.encoder_text.norm_layers_1.4.gamma
|
| 493 |
-
vq_model.enc_p.encoder_text.norm_layers_1.5.beta
|
| 494 |
-
vq_model.enc_p.encoder_text.norm_layers_1.5.gamma
|
| 495 |
-
vq_model.enc_p.encoder_text.norm_layers_2.0.beta
|
| 496 |
-
vq_model.enc_p.encoder_text.norm_layers_2.0.gamma
|
| 497 |
-
vq_model.enc_p.encoder_text.norm_layers_2.1.beta
|
| 498 |
-
vq_model.enc_p.encoder_text.norm_layers_2.1.gamma
|
| 499 |
-
vq_model.enc_p.encoder_text.norm_layers_2.2.beta
|
| 500 |
-
vq_model.enc_p.encoder_text.norm_layers_2.2.gamma
|
| 501 |
-
vq_model.enc_p.encoder_text.norm_layers_2.3.beta
|
| 502 |
-
vq_model.enc_p.encoder_text.norm_layers_2.3.gamma
|
| 503 |
-
vq_model.enc_p.encoder_text.norm_layers_2.4.beta
|
| 504 |
-
vq_model.enc_p.encoder_text.norm_layers_2.4.gamma
|
| 505 |
-
vq_model.enc_p.encoder_text.norm_layers_2.5.beta
|
| 506 |
-
vq_model.enc_p.encoder_text.norm_layers_2.5.gamma
|
| 507 |
-
vq_model.enc_p.mrte.c_post.bias
|
| 508 |
-
vq_model.enc_p.mrte.c_post.weight
|
| 509 |
-
vq_model.enc_p.mrte.c_pre.bias
|
| 510 |
-
vq_model.enc_p.mrte.c_pre.weight
|
| 511 |
-
vq_model.enc_p.mrte.cross_attention.conv_k.bias
|
| 512 |
-
vq_model.enc_p.mrte.cross_attention.conv_k.weight
|
| 513 |
-
vq_model.enc_p.mrte.cross_attention.conv_o.bias
|
| 514 |
-
vq_model.enc_p.mrte.cross_attention.conv_o.weight
|
| 515 |
-
vq_model.enc_p.mrte.cross_attention.conv_q.bias
|
| 516 |
-
vq_model.enc_p.mrte.cross_attention.conv_q.weight
|
| 517 |
-
vq_model.enc_p.mrte.cross_attention.conv_v.bias
|
| 518 |
-
vq_model.enc_p.mrte.cross_attention.conv_v.weight
|
| 519 |
-
vq_model.enc_p.mrte.text_pre.bias
|
| 520 |
-
vq_model.enc_p.mrte.text_pre.weight
|
| 521 |
-
vq_model.enc_p.proj.bias
|
| 522 |
-
vq_model.enc_p.proj.weight
|
| 523 |
-
vq_model.enc_p.ssl_proj.bias
|
| 524 |
-
vq_model.enc_p.ssl_proj.weight
|
| 525 |
-
vq_model.enc_p.text_embedding.weight
|
| 526 |
-
vq_model.flow.flows.0.enc.cond_layer.bias
|
| 527 |
-
vq_model.flow.flows.0.enc.cond_layer.weight_g
|
| 528 |
-
vq_model.flow.flows.0.enc.cond_layer.weight_v
|
| 529 |
-
vq_model.flow.flows.0.enc.in_layers.0.bias
|
| 530 |
-
vq_model.flow.flows.0.enc.in_layers.0.weight_g
|
| 531 |
-
vq_model.flow.flows.0.enc.in_layers.0.weight_v
|
| 532 |
-
vq_model.flow.flows.0.enc.in_layers.1.bias
|
| 533 |
-
vq_model.flow.flows.0.enc.in_layers.1.weight_g
|
| 534 |
-
vq_model.flow.flows.0.enc.in_layers.1.weight_v
|
| 535 |
-
vq_model.flow.flows.0.enc.in_layers.2.bias
|
| 536 |
-
vq_model.flow.flows.0.enc.in_layers.2.weight_g
|
| 537 |
-
vq_model.flow.flows.0.enc.in_layers.2.weight_v
|
| 538 |
-
vq_model.flow.flows.0.enc.in_layers.3.bias
|
| 539 |
-
vq_model.flow.flows.0.enc.in_layers.3.weight_g
|
| 540 |
-
vq_model.flow.flows.0.enc.in_layers.3.weight_v
|
| 541 |
-
vq_model.flow.flows.0.enc.res_skip_layers.0.bias
|
| 542 |
-
vq_model.flow.flows.0.enc.res_skip_layers.0.weight_g
|
| 543 |
-
vq_model.flow.flows.0.enc.res_skip_layers.0.weight_v
|
| 544 |
-
vq_model.flow.flows.0.enc.res_skip_layers.1.bias
|
| 545 |
-
vq_model.flow.flows.0.enc.res_skip_layers.1.weight_g
|
| 546 |
-
vq_model.flow.flows.0.enc.res_skip_layers.1.weight_v
|
| 547 |
-
vq_model.flow.flows.0.enc.res_skip_layers.2.bias
|
| 548 |
-
vq_model.flow.flows.0.enc.res_skip_layers.2.weight_g
|
| 549 |
-
vq_model.flow.flows.0.enc.res_skip_layers.2.weight_v
|
| 550 |
-
vq_model.flow.flows.0.enc.res_skip_layers.3.bias
|
| 551 |
-
vq_model.flow.flows.0.enc.res_skip_layers.3.weight_g
|
| 552 |
-
vq_model.flow.flows.0.enc.res_skip_layers.3.weight_v
|
| 553 |
-
vq_model.flow.flows.0.post.bias
|
| 554 |
-
vq_model.flow.flows.0.post.weight
|
| 555 |
-
vq_model.flow.flows.0.pre.bias
|
| 556 |
-
vq_model.flow.flows.0.pre.weight
|
| 557 |
-
vq_model.flow.flows.2.enc.cond_layer.bias
|
| 558 |
-
vq_model.flow.flows.2.enc.cond_layer.weight_g
|
| 559 |
-
vq_model.flow.flows.2.enc.cond_layer.weight_v
|
| 560 |
-
vq_model.flow.flows.2.enc.in_layers.0.bias
|
| 561 |
-
vq_model.flow.flows.2.enc.in_layers.0.weight_g
|
| 562 |
-
vq_model.flow.flows.2.enc.in_layers.0.weight_v
|
| 563 |
-
vq_model.flow.flows.2.enc.in_layers.1.bias
|
| 564 |
-
vq_model.flow.flows.2.enc.in_layers.1.weight_g
|
| 565 |
-
vq_model.flow.flows.2.enc.in_layers.1.weight_v
|
| 566 |
-
vq_model.flow.flows.2.enc.in_layers.2.bias
|
| 567 |
-
vq_model.flow.flows.2.enc.in_layers.2.weight_g
|
| 568 |
-
vq_model.flow.flows.2.enc.in_layers.2.weight_v
|
| 569 |
-
vq_model.flow.flows.2.enc.in_layers.3.bias
|
| 570 |
-
vq_model.flow.flows.2.enc.in_layers.3.weight_g
|
| 571 |
-
vq_model.flow.flows.2.enc.in_layers.3.weight_v
|
| 572 |
-
vq_model.flow.flows.2.enc.res_skip_layers.0.bias
|
| 573 |
-
vq_model.flow.flows.2.enc.res_skip_layers.0.weight_g
|
| 574 |
-
vq_model.flow.flows.2.enc.res_skip_layers.0.weight_v
|
| 575 |
-
vq_model.flow.flows.2.enc.res_skip_layers.1.bias
|
| 576 |
-
vq_model.flow.flows.2.enc.res_skip_layers.1.weight_g
|
| 577 |
-
vq_model.flow.flows.2.enc.res_skip_layers.1.weight_v
|
| 578 |
-
vq_model.flow.flows.2.enc.res_skip_layers.2.bias
|
| 579 |
-
vq_model.flow.flows.2.enc.res_skip_layers.2.weight_g
|
| 580 |
-
vq_model.flow.flows.2.enc.res_skip_layers.2.weight_v
|
| 581 |
-
vq_model.flow.flows.2.enc.res_skip_layers.3.bias
|
| 582 |
-
vq_model.flow.flows.2.enc.res_skip_layers.3.weight_g
|
| 583 |
-
vq_model.flow.flows.2.enc.res_skip_layers.3.weight_v
|
| 584 |
-
vq_model.flow.flows.2.post.bias
|
| 585 |
-
vq_model.flow.flows.2.post.weight
|
| 586 |
-
vq_model.flow.flows.2.pre.bias
|
| 587 |
-
vq_model.flow.flows.2.pre.weight
|
| 588 |
-
vq_model.flow.flows.4.enc.cond_layer.bias
|
| 589 |
-
vq_model.flow.flows.4.enc.cond_layer.weight_g
|
| 590 |
-
vq_model.flow.flows.4.enc.cond_layer.weight_v
|
| 591 |
-
vq_model.flow.flows.4.enc.in_layers.0.bias
|
| 592 |
-
vq_model.flow.flows.4.enc.in_layers.0.weight_g
|
| 593 |
-
vq_model.flow.flows.4.enc.in_layers.0.weight_v
|
| 594 |
-
vq_model.flow.flows.4.enc.in_layers.1.bias
|
| 595 |
-
vq_model.flow.flows.4.enc.in_layers.1.weight_g
|
| 596 |
-
vq_model.flow.flows.4.enc.in_layers.1.weight_v
|
| 597 |
-
vq_model.flow.flows.4.enc.in_layers.2.bias
|
| 598 |
-
vq_model.flow.flows.4.enc.in_layers.2.weight_g
|
| 599 |
-
vq_model.flow.flows.4.enc.in_layers.2.weight_v
|
| 600 |
-
vq_model.flow.flows.4.enc.in_layers.3.bias
|
| 601 |
-
vq_model.flow.flows.4.enc.in_layers.3.weight_g
|
| 602 |
-
vq_model.flow.flows.4.enc.in_layers.3.weight_v
|
| 603 |
-
vq_model.flow.flows.4.enc.res_skip_layers.0.bias
|
| 604 |
-
vq_model.flow.flows.4.enc.res_skip_layers.0.weight_g
|
| 605 |
-
vq_model.flow.flows.4.enc.res_skip_layers.0.weight_v
|
| 606 |
-
vq_model.flow.flows.4.enc.res_skip_layers.1.bias
|
| 607 |
-
vq_model.flow.flows.4.enc.res_skip_layers.1.weight_g
|
| 608 |
-
vq_model.flow.flows.4.enc.res_skip_layers.1.weight_v
|
| 609 |
-
vq_model.flow.flows.4.enc.res_skip_layers.2.bias
|
| 610 |
-
vq_model.flow.flows.4.enc.res_skip_layers.2.weight_g
|
| 611 |
-
vq_model.flow.flows.4.enc.res_skip_layers.2.weight_v
|
| 612 |
-
vq_model.flow.flows.4.enc.res_skip_layers.3.bias
|
| 613 |
-
vq_model.flow.flows.4.enc.res_skip_layers.3.weight_g
|
| 614 |
-
vq_model.flow.flows.4.enc.res_skip_layers.3.weight_v
|
| 615 |
-
vq_model.flow.flows.4.post.bias
|
| 616 |
-
vq_model.flow.flows.4.post.weight
|
| 617 |
-
vq_model.flow.flows.4.pre.bias
|
| 618 |
-
vq_model.flow.flows.4.pre.weight
|
| 619 |
-
vq_model.flow.flows.6.enc.cond_layer.bias
|
| 620 |
-
vq_model.flow.flows.6.enc.cond_layer.weight_g
|
| 621 |
-
vq_model.flow.flows.6.enc.cond_layer.weight_v
|
| 622 |
-
vq_model.flow.flows.6.enc.in_layers.0.bias
|
| 623 |
-
vq_model.flow.flows.6.enc.in_layers.0.weight_g
|
| 624 |
-
vq_model.flow.flows.6.enc.in_layers.0.weight_v
|
| 625 |
-
vq_model.flow.flows.6.enc.in_layers.1.bias
|
| 626 |
-
vq_model.flow.flows.6.enc.in_layers.1.weight_g
|
| 627 |
-
vq_model.flow.flows.6.enc.in_layers.1.weight_v
|
| 628 |
-
vq_model.flow.flows.6.enc.in_layers.2.bias
|
| 629 |
-
vq_model.flow.flows.6.enc.in_layers.2.weight_g
|
| 630 |
-
vq_model.flow.flows.6.enc.in_layers.2.weight_v
|
| 631 |
-
vq_model.flow.flows.6.enc.in_layers.3.bias
|
| 632 |
-
vq_model.flow.flows.6.enc.in_layers.3.weight_g
|
| 633 |
-
vq_model.flow.flows.6.enc.in_layers.3.weight_v
|
| 634 |
-
vq_model.flow.flows.6.enc.res_skip_layers.0.bias
|
| 635 |
-
vq_model.flow.flows.6.enc.res_skip_layers.0.weight_g
|
| 636 |
-
vq_model.flow.flows.6.enc.res_skip_layers.0.weight_v
|
| 637 |
-
vq_model.flow.flows.6.enc.res_skip_layers.1.bias
|
| 638 |
-
vq_model.flow.flows.6.enc.res_skip_layers.1.weight_g
|
| 639 |
-
vq_model.flow.flows.6.enc.res_skip_layers.1.weight_v
|
| 640 |
-
vq_model.flow.flows.6.enc.res_skip_layers.2.bias
|
| 641 |
-
vq_model.flow.flows.6.enc.res_skip_layers.2.weight_g
|
| 642 |
-
vq_model.flow.flows.6.enc.res_skip_layers.2.weight_v
|
| 643 |
-
vq_model.flow.flows.6.enc.res_skip_layers.3.bias
|
| 644 |
-
vq_model.flow.flows.6.enc.res_skip_layers.3.weight_g
|
| 645 |
-
vq_model.flow.flows.6.enc.res_skip_layers.3.weight_v
|
| 646 |
-
vq_model.flow.flows.6.post.bias
|
| 647 |
-
vq_model.flow.flows.6.post.weight
|
| 648 |
-
vq_model.flow.flows.6.pre.bias
|
| 649 |
-
vq_model.flow.flows.6.pre.weight
|
| 650 |
-
vq_model.quantizer.vq.layers.0._codebook.embed
|
| 651 |
-
vq_model.ref_enc.fc.fc.bias
|
| 652 |
-
vq_model.ref_enc.fc.fc.weight
|
| 653 |
-
vq_model.ref_enc.slf_attn.fc.bias
|
| 654 |
-
vq_model.ref_enc.slf_attn.fc.weight
|
| 655 |
-
vq_model.ref_enc.slf_attn.w_ks.bias
|
| 656 |
-
vq_model.ref_enc.slf_attn.w_ks.weight
|
| 657 |
-
vq_model.ref_enc.slf_attn.w_qs.bias
|
| 658 |
-
vq_model.ref_enc.slf_attn.w_qs.weight
|
| 659 |
-
vq_model.ref_enc.slf_attn.w_vs.bias
|
| 660 |
-
vq_model.ref_enc.slf_attn.w_vs.weight
|
| 661 |
-
vq_model.ref_enc.spectral.0.fc.bias
|
| 662 |
-
vq_model.ref_enc.spectral.0.fc.weight
|
| 663 |
-
vq_model.ref_enc.spectral.3.fc.bias
|
| 664 |
-
vq_model.ref_enc.spectral.3.fc.weight
|
| 665 |
-
vq_model.ref_enc.temporal.0.conv1.conv.bias
|
| 666 |
-
vq_model.ref_enc.temporal.0.conv1.conv.weight
|
| 667 |
-
vq_model.ref_enc.temporal.1.conv1.conv.bias
|
| 668 |
-
vq_model.ref_enc.temporal.1.conv1.conv.weight
|
|
|
|
| 1 |
+
vq_model.dec.cond.bias
|
| 2 |
+
vq_model.dec.cond.weight
|
| 3 |
+
vq_model.dec.conv_post.weight
|
| 4 |
+
vq_model.dec.conv_pre.bias
|
| 5 |
+
vq_model.dec.conv_pre.weight
|
| 6 |
+
vq_model.dec.resblocks.0.convs1.0.bias
|
| 7 |
+
vq_model.dec.resblocks.0.convs1.0.weight_g
|
| 8 |
+
vq_model.dec.resblocks.0.convs1.0.weight_v
|
| 9 |
+
vq_model.dec.resblocks.0.convs1.1.bias
|
| 10 |
+
vq_model.dec.resblocks.0.convs1.1.weight_g
|
| 11 |
+
vq_model.dec.resblocks.0.convs1.1.weight_v
|
| 12 |
+
vq_model.dec.resblocks.0.convs1.2.bias
|
| 13 |
+
vq_model.dec.resblocks.0.convs1.2.weight_g
|
| 14 |
+
vq_model.dec.resblocks.0.convs1.2.weight_v
|
| 15 |
+
vq_model.dec.resblocks.0.convs2.0.bias
|
| 16 |
+
vq_model.dec.resblocks.0.convs2.0.weight_g
|
| 17 |
+
vq_model.dec.resblocks.0.convs2.0.weight_v
|
| 18 |
+
vq_model.dec.resblocks.0.convs2.1.bias
|
| 19 |
+
vq_model.dec.resblocks.0.convs2.1.weight_g
|
| 20 |
+
vq_model.dec.resblocks.0.convs2.1.weight_v
|
| 21 |
+
vq_model.dec.resblocks.0.convs2.2.bias
|
| 22 |
+
vq_model.dec.resblocks.0.convs2.2.weight_g
|
| 23 |
+
vq_model.dec.resblocks.0.convs2.2.weight_v
|
| 24 |
+
vq_model.dec.resblocks.1.convs1.0.bias
|
| 25 |
+
vq_model.dec.resblocks.1.convs1.0.weight_g
|
| 26 |
+
vq_model.dec.resblocks.1.convs1.0.weight_v
|
| 27 |
+
vq_model.dec.resblocks.1.convs1.1.bias
|
| 28 |
+
vq_model.dec.resblocks.1.convs1.1.weight_g
|
| 29 |
+
vq_model.dec.resblocks.1.convs1.1.weight_v
|
| 30 |
+
vq_model.dec.resblocks.1.convs1.2.bias
|
| 31 |
+
vq_model.dec.resblocks.1.convs1.2.weight_g
|
| 32 |
+
vq_model.dec.resblocks.1.convs1.2.weight_v
|
| 33 |
+
vq_model.dec.resblocks.1.convs2.0.bias
|
| 34 |
+
vq_model.dec.resblocks.1.convs2.0.weight_g
|
| 35 |
+
vq_model.dec.resblocks.1.convs2.0.weight_v
|
| 36 |
+
vq_model.dec.resblocks.1.convs2.1.bias
|
| 37 |
+
vq_model.dec.resblocks.1.convs2.1.weight_g
|
| 38 |
+
vq_model.dec.resblocks.1.convs2.1.weight_v
|
| 39 |
+
vq_model.dec.resblocks.1.convs2.2.bias
|
| 40 |
+
vq_model.dec.resblocks.1.convs2.2.weight_g
|
| 41 |
+
vq_model.dec.resblocks.1.convs2.2.weight_v
|
| 42 |
+
vq_model.dec.resblocks.10.convs1.0.bias
|
| 43 |
+
vq_model.dec.resblocks.10.convs1.0.weight_g
|
| 44 |
+
vq_model.dec.resblocks.10.convs1.0.weight_v
|
| 45 |
+
vq_model.dec.resblocks.10.convs1.1.bias
|
| 46 |
+
vq_model.dec.resblocks.10.convs1.1.weight_g
|
| 47 |
+
vq_model.dec.resblocks.10.convs1.1.weight_v
|
| 48 |
+
vq_model.dec.resblocks.10.convs1.2.bias
|
| 49 |
+
vq_model.dec.resblocks.10.convs1.2.weight_g
|
| 50 |
+
vq_model.dec.resblocks.10.convs1.2.weight_v
|
| 51 |
+
vq_model.dec.resblocks.10.convs2.0.bias
|
| 52 |
+
vq_model.dec.resblocks.10.convs2.0.weight_g
|
| 53 |
+
vq_model.dec.resblocks.10.convs2.0.weight_v
|
| 54 |
+
vq_model.dec.resblocks.10.convs2.1.bias
|
| 55 |
+
vq_model.dec.resblocks.10.convs2.1.weight_g
|
| 56 |
+
vq_model.dec.resblocks.10.convs2.1.weight_v
|
| 57 |
+
vq_model.dec.resblocks.10.convs2.2.bias
|
| 58 |
+
vq_model.dec.resblocks.10.convs2.2.weight_g
|
| 59 |
+
vq_model.dec.resblocks.10.convs2.2.weight_v
|
| 60 |
+
vq_model.dec.resblocks.11.convs1.0.bias
|
| 61 |
+
vq_model.dec.resblocks.11.convs1.0.weight_g
|
| 62 |
+
vq_model.dec.resblocks.11.convs1.0.weight_v
|
| 63 |
+
vq_model.dec.resblocks.11.convs1.1.bias
|
| 64 |
+
vq_model.dec.resblocks.11.convs1.1.weight_g
|
| 65 |
+
vq_model.dec.resblocks.11.convs1.1.weight_v
|
| 66 |
+
vq_model.dec.resblocks.11.convs1.2.bias
|
| 67 |
+
vq_model.dec.resblocks.11.convs1.2.weight_g
|
| 68 |
+
vq_model.dec.resblocks.11.convs1.2.weight_v
|
| 69 |
+
vq_model.dec.resblocks.11.convs2.0.bias
|
| 70 |
+
vq_model.dec.resblocks.11.convs2.0.weight_g
|
| 71 |
+
vq_model.dec.resblocks.11.convs2.0.weight_v
|
| 72 |
+
vq_model.dec.resblocks.11.convs2.1.bias
|
| 73 |
+
vq_model.dec.resblocks.11.convs2.1.weight_g
|
| 74 |
+
vq_model.dec.resblocks.11.convs2.1.weight_v
|
| 75 |
+
vq_model.dec.resblocks.11.convs2.2.bias
|
| 76 |
+
vq_model.dec.resblocks.11.convs2.2.weight_g
|
| 77 |
+
vq_model.dec.resblocks.11.convs2.2.weight_v
|
| 78 |
+
vq_model.dec.resblocks.12.convs1.0.bias
|
| 79 |
+
vq_model.dec.resblocks.12.convs1.0.weight_g
|
| 80 |
+
vq_model.dec.resblocks.12.convs1.0.weight_v
|
| 81 |
+
vq_model.dec.resblocks.12.convs1.1.bias
|
| 82 |
+
vq_model.dec.resblocks.12.convs1.1.weight_g
|
| 83 |
+
vq_model.dec.resblocks.12.convs1.1.weight_v
|
| 84 |
+
vq_model.dec.resblocks.12.convs1.2.bias
|
| 85 |
+
vq_model.dec.resblocks.12.convs1.2.weight_g
|
| 86 |
+
vq_model.dec.resblocks.12.convs1.2.weight_v
|
| 87 |
+
vq_model.dec.resblocks.12.convs2.0.bias
|
| 88 |
+
vq_model.dec.resblocks.12.convs2.0.weight_g
|
| 89 |
+
vq_model.dec.resblocks.12.convs2.0.weight_v
|
| 90 |
+
vq_model.dec.resblocks.12.convs2.1.bias
|
| 91 |
+
vq_model.dec.resblocks.12.convs2.1.weight_g
|
| 92 |
+
vq_model.dec.resblocks.12.convs2.1.weight_v
|
| 93 |
+
vq_model.dec.resblocks.12.convs2.2.bias
|
| 94 |
+
vq_model.dec.resblocks.12.convs2.2.weight_g
|
| 95 |
+
vq_model.dec.resblocks.12.convs2.2.weight_v
|
| 96 |
+
vq_model.dec.resblocks.13.convs1.0.bias
|
| 97 |
+
vq_model.dec.resblocks.13.convs1.0.weight_g
|
| 98 |
+
vq_model.dec.resblocks.13.convs1.0.weight_v
|
| 99 |
+
vq_model.dec.resblocks.13.convs1.1.bias
|
| 100 |
+
vq_model.dec.resblocks.13.convs1.1.weight_g
|
| 101 |
+
vq_model.dec.resblocks.13.convs1.1.weight_v
|
| 102 |
+
vq_model.dec.resblocks.13.convs1.2.bias
|
| 103 |
+
vq_model.dec.resblocks.13.convs1.2.weight_g
|
| 104 |
+
vq_model.dec.resblocks.13.convs1.2.weight_v
|
| 105 |
+
vq_model.dec.resblocks.13.convs2.0.bias
|
| 106 |
+
vq_model.dec.resblocks.13.convs2.0.weight_g
|
| 107 |
+
vq_model.dec.resblocks.13.convs2.0.weight_v
|
| 108 |
+
vq_model.dec.resblocks.13.convs2.1.bias
|
| 109 |
+
vq_model.dec.resblocks.13.convs2.1.weight_g
|
| 110 |
+
vq_model.dec.resblocks.13.convs2.1.weight_v
|
| 111 |
+
vq_model.dec.resblocks.13.convs2.2.bias
|
| 112 |
+
vq_model.dec.resblocks.13.convs2.2.weight_g
|
| 113 |
+
vq_model.dec.resblocks.13.convs2.2.weight_v
|
| 114 |
+
vq_model.dec.resblocks.14.convs1.0.bias
|
| 115 |
+
vq_model.dec.resblocks.14.convs1.0.weight_g
|
| 116 |
+
vq_model.dec.resblocks.14.convs1.0.weight_v
|
| 117 |
+
vq_model.dec.resblocks.14.convs1.1.bias
|
| 118 |
+
vq_model.dec.resblocks.14.convs1.1.weight_g
|
| 119 |
+
vq_model.dec.resblocks.14.convs1.1.weight_v
|
| 120 |
+
vq_model.dec.resblocks.14.convs1.2.bias
|
| 121 |
+
vq_model.dec.resblocks.14.convs1.2.weight_g
|
| 122 |
+
vq_model.dec.resblocks.14.convs1.2.weight_v
|
| 123 |
+
vq_model.dec.resblocks.14.convs2.0.bias
|
| 124 |
+
vq_model.dec.resblocks.14.convs2.0.weight_g
|
| 125 |
+
vq_model.dec.resblocks.14.convs2.0.weight_v
|
| 126 |
+
vq_model.dec.resblocks.14.convs2.1.bias
|
| 127 |
+
vq_model.dec.resblocks.14.convs2.1.weight_g
|
| 128 |
+
vq_model.dec.resblocks.14.convs2.1.weight_v
|
| 129 |
+
vq_model.dec.resblocks.14.convs2.2.bias
|
| 130 |
+
vq_model.dec.resblocks.14.convs2.2.weight_g
|
| 131 |
+
vq_model.dec.resblocks.14.convs2.2.weight_v
|
| 132 |
+
vq_model.dec.resblocks.2.convs1.0.bias
|
| 133 |
+
vq_model.dec.resblocks.2.convs1.0.weight_g
|
| 134 |
+
vq_model.dec.resblocks.2.convs1.0.weight_v
|
| 135 |
+
vq_model.dec.resblocks.2.convs1.1.bias
|
| 136 |
+
vq_model.dec.resblocks.2.convs1.1.weight_g
|
| 137 |
+
vq_model.dec.resblocks.2.convs1.1.weight_v
|
| 138 |
+
vq_model.dec.resblocks.2.convs1.2.bias
|
| 139 |
+
vq_model.dec.resblocks.2.convs1.2.weight_g
|
| 140 |
+
vq_model.dec.resblocks.2.convs1.2.weight_v
|
| 141 |
+
vq_model.dec.resblocks.2.convs2.0.bias
|
| 142 |
+
vq_model.dec.resblocks.2.convs2.0.weight_g
|
| 143 |
+
vq_model.dec.resblocks.2.convs2.0.weight_v
|
| 144 |
+
vq_model.dec.resblocks.2.convs2.1.bias
|
| 145 |
+
vq_model.dec.resblocks.2.convs2.1.weight_g
|
| 146 |
+
vq_model.dec.resblocks.2.convs2.1.weight_v
|
| 147 |
+
vq_model.dec.resblocks.2.convs2.2.bias
|
| 148 |
+
vq_model.dec.resblocks.2.convs2.2.weight_g
|
| 149 |
+
vq_model.dec.resblocks.2.convs2.2.weight_v
|
| 150 |
+
vq_model.dec.resblocks.3.convs1.0.bias
|
| 151 |
+
vq_model.dec.resblocks.3.convs1.0.weight_g
|
| 152 |
+
vq_model.dec.resblocks.3.convs1.0.weight_v
|
| 153 |
+
vq_model.dec.resblocks.3.convs1.1.bias
|
| 154 |
+
vq_model.dec.resblocks.3.convs1.1.weight_g
|
| 155 |
+
vq_model.dec.resblocks.3.convs1.1.weight_v
|
| 156 |
+
vq_model.dec.resblocks.3.convs1.2.bias
|
| 157 |
+
vq_model.dec.resblocks.3.convs1.2.weight_g
|
| 158 |
+
vq_model.dec.resblocks.3.convs1.2.weight_v
|
| 159 |
+
vq_model.dec.resblocks.3.convs2.0.bias
|
| 160 |
+
vq_model.dec.resblocks.3.convs2.0.weight_g
|
| 161 |
+
vq_model.dec.resblocks.3.convs2.0.weight_v
|
| 162 |
+
vq_model.dec.resblocks.3.convs2.1.bias
|
| 163 |
+
vq_model.dec.resblocks.3.convs2.1.weight_g
|
| 164 |
+
vq_model.dec.resblocks.3.convs2.1.weight_v
|
| 165 |
+
vq_model.dec.resblocks.3.convs2.2.bias
|
| 166 |
+
vq_model.dec.resblocks.3.convs2.2.weight_g
|
| 167 |
+
vq_model.dec.resblocks.3.convs2.2.weight_v
|
| 168 |
+
vq_model.dec.resblocks.4.convs1.0.bias
|
| 169 |
+
vq_model.dec.resblocks.4.convs1.0.weight_g
|
| 170 |
+
vq_model.dec.resblocks.4.convs1.0.weight_v
|
| 171 |
+
vq_model.dec.resblocks.4.convs1.1.bias
|
| 172 |
+
vq_model.dec.resblocks.4.convs1.1.weight_g
|
| 173 |
+
vq_model.dec.resblocks.4.convs1.1.weight_v
|
| 174 |
+
vq_model.dec.resblocks.4.convs1.2.bias
|
| 175 |
+
vq_model.dec.resblocks.4.convs1.2.weight_g
|
| 176 |
+
vq_model.dec.resblocks.4.convs1.2.weight_v
|
| 177 |
+
vq_model.dec.resblocks.4.convs2.0.bias
|
| 178 |
+
vq_model.dec.resblocks.4.convs2.0.weight_g
|
| 179 |
+
vq_model.dec.resblocks.4.convs2.0.weight_v
|
| 180 |
+
vq_model.dec.resblocks.4.convs2.1.bias
|
| 181 |
+
vq_model.dec.resblocks.4.convs2.1.weight_g
|
| 182 |
+
vq_model.dec.resblocks.4.convs2.1.weight_v
|
| 183 |
+
vq_model.dec.resblocks.4.convs2.2.bias
|
| 184 |
+
vq_model.dec.resblocks.4.convs2.2.weight_g
|
| 185 |
+
vq_model.dec.resblocks.4.convs2.2.weight_v
|
| 186 |
+
vq_model.dec.resblocks.5.convs1.0.bias
|
| 187 |
+
vq_model.dec.resblocks.5.convs1.0.weight_g
|
| 188 |
+
vq_model.dec.resblocks.5.convs1.0.weight_v
|
| 189 |
+
vq_model.dec.resblocks.5.convs1.1.bias
|
| 190 |
+
vq_model.dec.resblocks.5.convs1.1.weight_g
|
| 191 |
+
vq_model.dec.resblocks.5.convs1.1.weight_v
|
| 192 |
+
vq_model.dec.resblocks.5.convs1.2.bias
|
| 193 |
+
vq_model.dec.resblocks.5.convs1.2.weight_g
|
| 194 |
+
vq_model.dec.resblocks.5.convs1.2.weight_v
|
| 195 |
+
vq_model.dec.resblocks.5.convs2.0.bias
|
| 196 |
+
vq_model.dec.resblocks.5.convs2.0.weight_g
|
| 197 |
+
vq_model.dec.resblocks.5.convs2.0.weight_v
|
| 198 |
+
vq_model.dec.resblocks.5.convs2.1.bias
|
| 199 |
+
vq_model.dec.resblocks.5.convs2.1.weight_g
|
| 200 |
+
vq_model.dec.resblocks.5.convs2.1.weight_v
|
| 201 |
+
vq_model.dec.resblocks.5.convs2.2.bias
|
| 202 |
+
vq_model.dec.resblocks.5.convs2.2.weight_g
|
| 203 |
+
vq_model.dec.resblocks.5.convs2.2.weight_v
|
| 204 |
+
vq_model.dec.resblocks.6.convs1.0.bias
|
| 205 |
+
vq_model.dec.resblocks.6.convs1.0.weight_g
|
| 206 |
+
vq_model.dec.resblocks.6.convs1.0.weight_v
|
| 207 |
+
vq_model.dec.resblocks.6.convs1.1.bias
|
| 208 |
+
vq_model.dec.resblocks.6.convs1.1.weight_g
|
| 209 |
+
vq_model.dec.resblocks.6.convs1.1.weight_v
|
| 210 |
+
vq_model.dec.resblocks.6.convs1.2.bias
|
| 211 |
+
vq_model.dec.resblocks.6.convs1.2.weight_g
|
| 212 |
+
vq_model.dec.resblocks.6.convs1.2.weight_v
|
| 213 |
+
vq_model.dec.resblocks.6.convs2.0.bias
|
| 214 |
+
vq_model.dec.resblocks.6.convs2.0.weight_g
|
| 215 |
+
vq_model.dec.resblocks.6.convs2.0.weight_v
|
| 216 |
+
vq_model.dec.resblocks.6.convs2.1.bias
|
| 217 |
+
vq_model.dec.resblocks.6.convs2.1.weight_g
|
| 218 |
+
vq_model.dec.resblocks.6.convs2.1.weight_v
|
| 219 |
+
vq_model.dec.resblocks.6.convs2.2.bias
|
| 220 |
+
vq_model.dec.resblocks.6.convs2.2.weight_g
|
| 221 |
+
vq_model.dec.resblocks.6.convs2.2.weight_v
|
| 222 |
+
vq_model.dec.resblocks.7.convs1.0.bias
|
| 223 |
+
vq_model.dec.resblocks.7.convs1.0.weight_g
|
| 224 |
+
vq_model.dec.resblocks.7.convs1.0.weight_v
|
| 225 |
+
vq_model.dec.resblocks.7.convs1.1.bias
|
| 226 |
+
vq_model.dec.resblocks.7.convs1.1.weight_g
|
| 227 |
+
vq_model.dec.resblocks.7.convs1.1.weight_v
|
| 228 |
+
vq_model.dec.resblocks.7.convs1.2.bias
|
| 229 |
+
vq_model.dec.resblocks.7.convs1.2.weight_g
|
| 230 |
+
vq_model.dec.resblocks.7.convs1.2.weight_v
|
| 231 |
+
vq_model.dec.resblocks.7.convs2.0.bias
|
| 232 |
+
vq_model.dec.resblocks.7.convs2.0.weight_g
|
| 233 |
+
vq_model.dec.resblocks.7.convs2.0.weight_v
|
| 234 |
+
vq_model.dec.resblocks.7.convs2.1.bias
|
| 235 |
+
vq_model.dec.resblocks.7.convs2.1.weight_g
|
| 236 |
+
vq_model.dec.resblocks.7.convs2.1.weight_v
|
| 237 |
+
vq_model.dec.resblocks.7.convs2.2.bias
|
| 238 |
+
vq_model.dec.resblocks.7.convs2.2.weight_g
|
| 239 |
+
vq_model.dec.resblocks.7.convs2.2.weight_v
|
| 240 |
+
vq_model.dec.resblocks.8.convs1.0.bias
|
| 241 |
+
vq_model.dec.resblocks.8.convs1.0.weight_g
|
| 242 |
+
vq_model.dec.resblocks.8.convs1.0.weight_v
|
| 243 |
+
vq_model.dec.resblocks.8.convs1.1.bias
|
| 244 |
+
vq_model.dec.resblocks.8.convs1.1.weight_g
|
| 245 |
+
vq_model.dec.resblocks.8.convs1.1.weight_v
|
| 246 |
+
vq_model.dec.resblocks.8.convs1.2.bias
|
| 247 |
+
vq_model.dec.resblocks.8.convs1.2.weight_g
|
| 248 |
+
vq_model.dec.resblocks.8.convs1.2.weight_v
|
| 249 |
+
vq_model.dec.resblocks.8.convs2.0.bias
|
| 250 |
+
vq_model.dec.resblocks.8.convs2.0.weight_g
|
| 251 |
+
vq_model.dec.resblocks.8.convs2.0.weight_v
|
| 252 |
+
vq_model.dec.resblocks.8.convs2.1.bias
|
| 253 |
+
vq_model.dec.resblocks.8.convs2.1.weight_g
|
| 254 |
+
vq_model.dec.resblocks.8.convs2.1.weight_v
|
| 255 |
+
vq_model.dec.resblocks.8.convs2.2.bias
|
| 256 |
+
vq_model.dec.resblocks.8.convs2.2.weight_g
|
| 257 |
+
vq_model.dec.resblocks.8.convs2.2.weight_v
|
| 258 |
+
vq_model.dec.resblocks.9.convs1.0.bias
|
| 259 |
+
vq_model.dec.resblocks.9.convs1.0.weight_g
|
| 260 |
+
vq_model.dec.resblocks.9.convs1.0.weight_v
|
| 261 |
+
vq_model.dec.resblocks.9.convs1.1.bias
|
| 262 |
+
vq_model.dec.resblocks.9.convs1.1.weight_g
|
| 263 |
+
vq_model.dec.resblocks.9.convs1.1.weight_v
|
| 264 |
+
vq_model.dec.resblocks.9.convs1.2.bias
|
| 265 |
+
vq_model.dec.resblocks.9.convs1.2.weight_g
|
| 266 |
+
vq_model.dec.resblocks.9.convs1.2.weight_v
|
| 267 |
+
vq_model.dec.resblocks.9.convs2.0.bias
|
| 268 |
+
vq_model.dec.resblocks.9.convs2.0.weight_g
|
| 269 |
+
vq_model.dec.resblocks.9.convs2.0.weight_v
|
| 270 |
+
vq_model.dec.resblocks.9.convs2.1.bias
|
| 271 |
+
vq_model.dec.resblocks.9.convs2.1.weight_g
|
| 272 |
+
vq_model.dec.resblocks.9.convs2.1.weight_v
|
| 273 |
+
vq_model.dec.resblocks.9.convs2.2.bias
|
| 274 |
+
vq_model.dec.resblocks.9.convs2.2.weight_g
|
| 275 |
+
vq_model.dec.resblocks.9.convs2.2.weight_v
|
| 276 |
+
vq_model.dec.ups.0.bias
|
| 277 |
+
vq_model.dec.ups.0.weight_g
|
| 278 |
+
vq_model.dec.ups.0.weight_v
|
| 279 |
+
vq_model.dec.ups.1.bias
|
| 280 |
+
vq_model.dec.ups.1.weight_g
|
| 281 |
+
vq_model.dec.ups.1.weight_v
|
| 282 |
+
vq_model.dec.ups.2.bias
|
| 283 |
+
vq_model.dec.ups.2.weight_g
|
| 284 |
+
vq_model.dec.ups.2.weight_v
|
| 285 |
+
vq_model.dec.ups.3.bias
|
| 286 |
+
vq_model.dec.ups.3.weight_g
|
| 287 |
+
vq_model.dec.ups.3.weight_v
|
| 288 |
+
vq_model.dec.ups.4.bias
|
| 289 |
+
vq_model.dec.ups.4.weight_g
|
| 290 |
+
vq_model.dec.ups.4.weight_v
|
| 291 |
+
vq_model.enc_p.encoder2.attn_layers.0.conv_k.bias
|
| 292 |
+
vq_model.enc_p.encoder2.attn_layers.0.conv_k.weight
|
| 293 |
+
vq_model.enc_p.encoder2.attn_layers.0.conv_o.bias
|
| 294 |
+
vq_model.enc_p.encoder2.attn_layers.0.conv_o.weight
|
| 295 |
+
vq_model.enc_p.encoder2.attn_layers.0.conv_q.bias
|
| 296 |
+
vq_model.enc_p.encoder2.attn_layers.0.conv_q.weight
|
| 297 |
+
vq_model.enc_p.encoder2.attn_layers.0.conv_v.bias
|
| 298 |
+
vq_model.enc_p.encoder2.attn_layers.0.conv_v.weight
|
| 299 |
+
vq_model.enc_p.encoder2.attn_layers.0.emb_rel_k
|
| 300 |
+
vq_model.enc_p.encoder2.attn_layers.0.emb_rel_v
|
| 301 |
+
vq_model.enc_p.encoder2.attn_layers.1.conv_k.bias
|
| 302 |
+
vq_model.enc_p.encoder2.attn_layers.1.conv_k.weight
|
| 303 |
+
vq_model.enc_p.encoder2.attn_layers.1.conv_o.bias
|
| 304 |
+
vq_model.enc_p.encoder2.attn_layers.1.conv_o.weight
|
| 305 |
+
vq_model.enc_p.encoder2.attn_layers.1.conv_q.bias
|
| 306 |
+
vq_model.enc_p.encoder2.attn_layers.1.conv_q.weight
|
| 307 |
+
vq_model.enc_p.encoder2.attn_layers.1.conv_v.bias
|
| 308 |
+
vq_model.enc_p.encoder2.attn_layers.1.conv_v.weight
|
| 309 |
+
vq_model.enc_p.encoder2.attn_layers.1.emb_rel_k
|
| 310 |
+
vq_model.enc_p.encoder2.attn_layers.1.emb_rel_v
|
| 311 |
+
vq_model.enc_p.encoder2.attn_layers.2.conv_k.bias
|
| 312 |
+
vq_model.enc_p.encoder2.attn_layers.2.conv_k.weight
|
| 313 |
+
vq_model.enc_p.encoder2.attn_layers.2.conv_o.bias
|
| 314 |
+
vq_model.enc_p.encoder2.attn_layers.2.conv_o.weight
|
| 315 |
+
vq_model.enc_p.encoder2.attn_layers.2.conv_q.bias
|
| 316 |
+
vq_model.enc_p.encoder2.attn_layers.2.conv_q.weight
|
| 317 |
+
vq_model.enc_p.encoder2.attn_layers.2.conv_v.bias
|
| 318 |
+
vq_model.enc_p.encoder2.attn_layers.2.conv_v.weight
|
| 319 |
+
vq_model.enc_p.encoder2.attn_layers.2.emb_rel_k
|
| 320 |
+
vq_model.enc_p.encoder2.attn_layers.2.emb_rel_v
|
| 321 |
+
vq_model.enc_p.encoder2.ffn_layers.0.conv_1.bias
|
| 322 |
+
vq_model.enc_p.encoder2.ffn_layers.0.conv_1.weight
|
| 323 |
+
vq_model.enc_p.encoder2.ffn_layers.0.conv_2.bias
|
| 324 |
+
vq_model.enc_p.encoder2.ffn_layers.0.conv_2.weight
|
| 325 |
+
vq_model.enc_p.encoder2.ffn_layers.1.conv_1.bias
|
| 326 |
+
vq_model.enc_p.encoder2.ffn_layers.1.conv_1.weight
|
| 327 |
+
vq_model.enc_p.encoder2.ffn_layers.1.conv_2.bias
|
| 328 |
+
vq_model.enc_p.encoder2.ffn_layers.1.conv_2.weight
|
| 329 |
+
vq_model.enc_p.encoder2.ffn_layers.2.conv_1.bias
|
| 330 |
+
vq_model.enc_p.encoder2.ffn_layers.2.conv_1.weight
|
| 331 |
+
vq_model.enc_p.encoder2.ffn_layers.2.conv_2.bias
|
| 332 |
+
vq_model.enc_p.encoder2.ffn_layers.2.conv_2.weight
|
| 333 |
+
vq_model.enc_p.encoder2.norm_layers_1.0.beta
|
| 334 |
+
vq_model.enc_p.encoder2.norm_layers_1.0.gamma
|
| 335 |
+
vq_model.enc_p.encoder2.norm_layers_1.1.beta
|
| 336 |
+
vq_model.enc_p.encoder2.norm_layers_1.1.gamma
|
| 337 |
+
vq_model.enc_p.encoder2.norm_layers_1.2.beta
|
| 338 |
+
vq_model.enc_p.encoder2.norm_layers_1.2.gamma
|
| 339 |
+
vq_model.enc_p.encoder2.norm_layers_2.0.beta
|
| 340 |
+
vq_model.enc_p.encoder2.norm_layers_2.0.gamma
|
| 341 |
+
vq_model.enc_p.encoder2.norm_layers_2.1.beta
|
| 342 |
+
vq_model.enc_p.encoder2.norm_layers_2.1.gamma
|
| 343 |
+
vq_model.enc_p.encoder2.norm_layers_2.2.beta
|
| 344 |
+
vq_model.enc_p.encoder2.norm_layers_2.2.gamma
|
| 345 |
+
vq_model.enc_p.encoder_ssl.attn_layers.0.conv_k.bias
|
| 346 |
+
vq_model.enc_p.encoder_ssl.attn_layers.0.conv_k.weight
|
| 347 |
+
vq_model.enc_p.encoder_ssl.attn_layers.0.conv_o.bias
|
| 348 |
+
vq_model.enc_p.encoder_ssl.attn_layers.0.conv_o.weight
|
| 349 |
+
vq_model.enc_p.encoder_ssl.attn_layers.0.conv_q.bias
|
| 350 |
+
vq_model.enc_p.encoder_ssl.attn_layers.0.conv_q.weight
|
| 351 |
+
vq_model.enc_p.encoder_ssl.attn_layers.0.conv_v.bias
|
| 352 |
+
vq_model.enc_p.encoder_ssl.attn_layers.0.conv_v.weight
|
| 353 |
+
vq_model.enc_p.encoder_ssl.attn_layers.0.emb_rel_k
|
| 354 |
+
vq_model.enc_p.encoder_ssl.attn_layers.0.emb_rel_v
|
| 355 |
+
vq_model.enc_p.encoder_ssl.attn_layers.1.conv_k.bias
|
| 356 |
+
vq_model.enc_p.encoder_ssl.attn_layers.1.conv_k.weight
|
| 357 |
+
vq_model.enc_p.encoder_ssl.attn_layers.1.conv_o.bias
|
| 358 |
+
vq_model.enc_p.encoder_ssl.attn_layers.1.conv_o.weight
|
| 359 |
+
vq_model.enc_p.encoder_ssl.attn_layers.1.conv_q.bias
|
| 360 |
+
vq_model.enc_p.encoder_ssl.attn_layers.1.conv_q.weight
|
| 361 |
+
vq_model.enc_p.encoder_ssl.attn_layers.1.conv_v.bias
|
| 362 |
+
vq_model.enc_p.encoder_ssl.attn_layers.1.conv_v.weight
|
| 363 |
+
vq_model.enc_p.encoder_ssl.attn_layers.1.emb_rel_k
|
| 364 |
+
vq_model.enc_p.encoder_ssl.attn_layers.1.emb_rel_v
|
| 365 |
+
vq_model.enc_p.encoder_ssl.attn_layers.2.conv_k.bias
|
| 366 |
+
vq_model.enc_p.encoder_ssl.attn_layers.2.conv_k.weight
|
| 367 |
+
vq_model.enc_p.encoder_ssl.attn_layers.2.conv_o.bias
|
| 368 |
+
vq_model.enc_p.encoder_ssl.attn_layers.2.conv_o.weight
|
| 369 |
+
vq_model.enc_p.encoder_ssl.attn_layers.2.conv_q.bias
|
| 370 |
+
vq_model.enc_p.encoder_ssl.attn_layers.2.conv_q.weight
|
| 371 |
+
vq_model.enc_p.encoder_ssl.attn_layers.2.conv_v.bias
|
| 372 |
+
vq_model.enc_p.encoder_ssl.attn_layers.2.conv_v.weight
|
| 373 |
+
vq_model.enc_p.encoder_ssl.attn_layers.2.emb_rel_k
|
| 374 |
+
vq_model.enc_p.encoder_ssl.attn_layers.2.emb_rel_v
|
| 375 |
+
vq_model.enc_p.encoder_ssl.ffn_layers.0.conv_1.bias
|
| 376 |
+
vq_model.enc_p.encoder_ssl.ffn_layers.0.conv_1.weight
|
| 377 |
+
vq_model.enc_p.encoder_ssl.ffn_layers.0.conv_2.bias
|
| 378 |
+
vq_model.enc_p.encoder_ssl.ffn_layers.0.conv_2.weight
|
| 379 |
+
vq_model.enc_p.encoder_ssl.ffn_layers.1.conv_1.bias
|
| 380 |
+
vq_model.enc_p.encoder_ssl.ffn_layers.1.conv_1.weight
|
| 381 |
+
vq_model.enc_p.encoder_ssl.ffn_layers.1.conv_2.bias
|
| 382 |
+
vq_model.enc_p.encoder_ssl.ffn_layers.1.conv_2.weight
|
| 383 |
+
vq_model.enc_p.encoder_ssl.ffn_layers.2.conv_1.bias
|
| 384 |
+
vq_model.enc_p.encoder_ssl.ffn_layers.2.conv_1.weight
|
| 385 |
+
vq_model.enc_p.encoder_ssl.ffn_layers.2.conv_2.bias
|
| 386 |
+
vq_model.enc_p.encoder_ssl.ffn_layers.2.conv_2.weight
|
| 387 |
+
vq_model.enc_p.encoder_ssl.norm_layers_1.0.beta
|
| 388 |
+
vq_model.enc_p.encoder_ssl.norm_layers_1.0.gamma
|
| 389 |
+
vq_model.enc_p.encoder_ssl.norm_layers_1.1.beta
|
| 390 |
+
vq_model.enc_p.encoder_ssl.norm_layers_1.1.gamma
|
| 391 |
+
vq_model.enc_p.encoder_ssl.norm_layers_1.2.beta
|
| 392 |
+
vq_model.enc_p.encoder_ssl.norm_layers_1.2.gamma
|
| 393 |
+
vq_model.enc_p.encoder_ssl.norm_layers_2.0.beta
|
| 394 |
+
vq_model.enc_p.encoder_ssl.norm_layers_2.0.gamma
|
| 395 |
+
vq_model.enc_p.encoder_ssl.norm_layers_2.1.beta
|
| 396 |
+
vq_model.enc_p.encoder_ssl.norm_layers_2.1.gamma
|
| 397 |
+
vq_model.enc_p.encoder_ssl.norm_layers_2.2.beta
|
| 398 |
+
vq_model.enc_p.encoder_ssl.norm_layers_2.2.gamma
|
| 399 |
+
vq_model.enc_p.encoder_text.attn_layers.0.conv_k.bias
|
| 400 |
+
vq_model.enc_p.encoder_text.attn_layers.0.conv_k.weight
|
| 401 |
+
vq_model.enc_p.encoder_text.attn_layers.0.conv_o.bias
|
| 402 |
+
vq_model.enc_p.encoder_text.attn_layers.0.conv_o.weight
|
| 403 |
+
vq_model.enc_p.encoder_text.attn_layers.0.conv_q.bias
|
| 404 |
+
vq_model.enc_p.encoder_text.attn_layers.0.conv_q.weight
|
| 405 |
+
vq_model.enc_p.encoder_text.attn_layers.0.conv_v.bias
|
| 406 |
+
vq_model.enc_p.encoder_text.attn_layers.0.conv_v.weight
|
| 407 |
+
vq_model.enc_p.encoder_text.attn_layers.0.emb_rel_k
|
| 408 |
+
vq_model.enc_p.encoder_text.attn_layers.0.emb_rel_v
|
| 409 |
+
vq_model.enc_p.encoder_text.attn_layers.1.conv_k.bias
|
| 410 |
+
vq_model.enc_p.encoder_text.attn_layers.1.conv_k.weight
|
| 411 |
+
vq_model.enc_p.encoder_text.attn_layers.1.conv_o.bias
|
| 412 |
+
vq_model.enc_p.encoder_text.attn_layers.1.conv_o.weight
|
| 413 |
+
vq_model.enc_p.encoder_text.attn_layers.1.conv_q.bias
|
| 414 |
+
vq_model.enc_p.encoder_text.attn_layers.1.conv_q.weight
|
| 415 |
+
vq_model.enc_p.encoder_text.attn_layers.1.conv_v.bias
|
| 416 |
+
vq_model.enc_p.encoder_text.attn_layers.1.conv_v.weight
|
| 417 |
+
vq_model.enc_p.encoder_text.attn_layers.1.emb_rel_k
|
| 418 |
+
vq_model.enc_p.encoder_text.attn_layers.1.emb_rel_v
|
| 419 |
+
vq_model.enc_p.encoder_text.attn_layers.2.conv_k.bias
|
| 420 |
+
vq_model.enc_p.encoder_text.attn_layers.2.conv_k.weight
|
| 421 |
+
vq_model.enc_p.encoder_text.attn_layers.2.conv_o.bias
|
| 422 |
+
vq_model.enc_p.encoder_text.attn_layers.2.conv_o.weight
|
| 423 |
+
vq_model.enc_p.encoder_text.attn_layers.2.conv_q.bias
|
| 424 |
+
vq_model.enc_p.encoder_text.attn_layers.2.conv_q.weight
|
| 425 |
+
vq_model.enc_p.encoder_text.attn_layers.2.conv_v.bias
|
| 426 |
+
vq_model.enc_p.encoder_text.attn_layers.2.conv_v.weight
|
| 427 |
+
vq_model.enc_p.encoder_text.attn_layers.2.emb_rel_k
|
| 428 |
+
vq_model.enc_p.encoder_text.attn_layers.2.emb_rel_v
|
| 429 |
+
vq_model.enc_p.encoder_text.attn_layers.3.conv_k.bias
|
| 430 |
+
vq_model.enc_p.encoder_text.attn_layers.3.conv_k.weight
|
| 431 |
+
vq_model.enc_p.encoder_text.attn_layers.3.conv_o.bias
|
| 432 |
+
vq_model.enc_p.encoder_text.attn_layers.3.conv_o.weight
|
| 433 |
+
vq_model.enc_p.encoder_text.attn_layers.3.conv_q.bias
|
| 434 |
+
vq_model.enc_p.encoder_text.attn_layers.3.conv_q.weight
|
| 435 |
+
vq_model.enc_p.encoder_text.attn_layers.3.conv_v.bias
|
| 436 |
+
vq_model.enc_p.encoder_text.attn_layers.3.conv_v.weight
|
| 437 |
+
vq_model.enc_p.encoder_text.attn_layers.3.emb_rel_k
|
| 438 |
+
vq_model.enc_p.encoder_text.attn_layers.3.emb_rel_v
|
| 439 |
+
vq_model.enc_p.encoder_text.attn_layers.4.conv_k.bias
|
| 440 |
+
vq_model.enc_p.encoder_text.attn_layers.4.conv_k.weight
|
| 441 |
+
vq_model.enc_p.encoder_text.attn_layers.4.conv_o.bias
|
| 442 |
+
vq_model.enc_p.encoder_text.attn_layers.4.conv_o.weight
|
| 443 |
+
vq_model.enc_p.encoder_text.attn_layers.4.conv_q.bias
|
| 444 |
+
vq_model.enc_p.encoder_text.attn_layers.4.conv_q.weight
|
| 445 |
+
vq_model.enc_p.encoder_text.attn_layers.4.conv_v.bias
|
| 446 |
+
vq_model.enc_p.encoder_text.attn_layers.4.conv_v.weight
|
| 447 |
+
vq_model.enc_p.encoder_text.attn_layers.4.emb_rel_k
|
| 448 |
+
vq_model.enc_p.encoder_text.attn_layers.4.emb_rel_v
|
| 449 |
+
vq_model.enc_p.encoder_text.attn_layers.5.conv_k.bias
|
| 450 |
+
vq_model.enc_p.encoder_text.attn_layers.5.conv_k.weight
|
| 451 |
+
vq_model.enc_p.encoder_text.attn_layers.5.conv_o.bias
|
| 452 |
+
vq_model.enc_p.encoder_text.attn_layers.5.conv_o.weight
|
| 453 |
+
vq_model.enc_p.encoder_text.attn_layers.5.conv_q.bias
|
| 454 |
+
vq_model.enc_p.encoder_text.attn_layers.5.conv_q.weight
|
| 455 |
+
vq_model.enc_p.encoder_text.attn_layers.5.conv_v.bias
|
| 456 |
+
vq_model.enc_p.encoder_text.attn_layers.5.conv_v.weight
|
| 457 |
+
vq_model.enc_p.encoder_text.attn_layers.5.emb_rel_k
|
| 458 |
+
vq_model.enc_p.encoder_text.attn_layers.5.emb_rel_v
|
| 459 |
+
vq_model.enc_p.encoder_text.ffn_layers.0.conv_1.bias
|
| 460 |
+
vq_model.enc_p.encoder_text.ffn_layers.0.conv_1.weight
|
| 461 |
+
vq_model.enc_p.encoder_text.ffn_layers.0.conv_2.bias
|
| 462 |
+
vq_model.enc_p.encoder_text.ffn_layers.0.conv_2.weight
|
| 463 |
+
vq_model.enc_p.encoder_text.ffn_layers.1.conv_1.bias
|
| 464 |
+
vq_model.enc_p.encoder_text.ffn_layers.1.conv_1.weight
|
| 465 |
+
vq_model.enc_p.encoder_text.ffn_layers.1.conv_2.bias
|
| 466 |
+
vq_model.enc_p.encoder_text.ffn_layers.1.conv_2.weight
|
| 467 |
+
vq_model.enc_p.encoder_text.ffn_layers.2.conv_1.bias
|
| 468 |
+
vq_model.enc_p.encoder_text.ffn_layers.2.conv_1.weight
|
| 469 |
+
vq_model.enc_p.encoder_text.ffn_layers.2.conv_2.bias
|
| 470 |
+
vq_model.enc_p.encoder_text.ffn_layers.2.conv_2.weight
|
| 471 |
+
vq_model.enc_p.encoder_text.ffn_layers.3.conv_1.bias
|
| 472 |
+
vq_model.enc_p.encoder_text.ffn_layers.3.conv_1.weight
|
| 473 |
+
vq_model.enc_p.encoder_text.ffn_layers.3.conv_2.bias
|
| 474 |
+
vq_model.enc_p.encoder_text.ffn_layers.3.conv_2.weight
|
| 475 |
+
vq_model.enc_p.encoder_text.ffn_layers.4.conv_1.bias
|
| 476 |
+
vq_model.enc_p.encoder_text.ffn_layers.4.conv_1.weight
|
| 477 |
+
vq_model.enc_p.encoder_text.ffn_layers.4.conv_2.bias
|
| 478 |
+
vq_model.enc_p.encoder_text.ffn_layers.4.conv_2.weight
|
| 479 |
+
vq_model.enc_p.encoder_text.ffn_layers.5.conv_1.bias
|
| 480 |
+
vq_model.enc_p.encoder_text.ffn_layers.5.conv_1.weight
|
| 481 |
+
vq_model.enc_p.encoder_text.ffn_layers.5.conv_2.bias
|
| 482 |
+
vq_model.enc_p.encoder_text.ffn_layers.5.conv_2.weight
|
| 483 |
+
vq_model.enc_p.encoder_text.norm_layers_1.0.beta
|
| 484 |
+
vq_model.enc_p.encoder_text.norm_layers_1.0.gamma
|
| 485 |
+
vq_model.enc_p.encoder_text.norm_layers_1.1.beta
|
| 486 |
+
vq_model.enc_p.encoder_text.norm_layers_1.1.gamma
|
| 487 |
+
vq_model.enc_p.encoder_text.norm_layers_1.2.beta
|
| 488 |
+
vq_model.enc_p.encoder_text.norm_layers_1.2.gamma
|
| 489 |
+
vq_model.enc_p.encoder_text.norm_layers_1.3.beta
|
| 490 |
+
vq_model.enc_p.encoder_text.norm_layers_1.3.gamma
|
| 491 |
+
vq_model.enc_p.encoder_text.norm_layers_1.4.beta
|
| 492 |
+
vq_model.enc_p.encoder_text.norm_layers_1.4.gamma
|
| 493 |
+
vq_model.enc_p.encoder_text.norm_layers_1.5.beta
|
| 494 |
+
vq_model.enc_p.encoder_text.norm_layers_1.5.gamma
|
| 495 |
+
vq_model.enc_p.encoder_text.norm_layers_2.0.beta
|
| 496 |
+
vq_model.enc_p.encoder_text.norm_layers_2.0.gamma
|
| 497 |
+
vq_model.enc_p.encoder_text.norm_layers_2.1.beta
|
| 498 |
+
vq_model.enc_p.encoder_text.norm_layers_2.1.gamma
|
| 499 |
+
vq_model.enc_p.encoder_text.norm_layers_2.2.beta
|
| 500 |
+
vq_model.enc_p.encoder_text.norm_layers_2.2.gamma
|
| 501 |
+
vq_model.enc_p.encoder_text.norm_layers_2.3.beta
|
| 502 |
+
vq_model.enc_p.encoder_text.norm_layers_2.3.gamma
|
| 503 |
+
vq_model.enc_p.encoder_text.norm_layers_2.4.beta
|
| 504 |
+
vq_model.enc_p.encoder_text.norm_layers_2.4.gamma
|
| 505 |
+
vq_model.enc_p.encoder_text.norm_layers_2.5.beta
|
| 506 |
+
vq_model.enc_p.encoder_text.norm_layers_2.5.gamma
|
| 507 |
+
vq_model.enc_p.mrte.c_post.bias
|
| 508 |
+
vq_model.enc_p.mrte.c_post.weight
|
| 509 |
+
vq_model.enc_p.mrte.c_pre.bias
|
| 510 |
+
vq_model.enc_p.mrte.c_pre.weight
|
| 511 |
+
vq_model.enc_p.mrte.cross_attention.conv_k.bias
|
| 512 |
+
vq_model.enc_p.mrte.cross_attention.conv_k.weight
|
| 513 |
+
vq_model.enc_p.mrte.cross_attention.conv_o.bias
|
| 514 |
+
vq_model.enc_p.mrte.cross_attention.conv_o.weight
|
| 515 |
+
vq_model.enc_p.mrte.cross_attention.conv_q.bias
|
| 516 |
+
vq_model.enc_p.mrte.cross_attention.conv_q.weight
|
| 517 |
+
vq_model.enc_p.mrte.cross_attention.conv_v.bias
|
| 518 |
+
vq_model.enc_p.mrte.cross_attention.conv_v.weight
|
| 519 |
+
vq_model.enc_p.mrte.text_pre.bias
|
| 520 |
+
vq_model.enc_p.mrte.text_pre.weight
|
| 521 |
+
vq_model.enc_p.proj.bias
|
| 522 |
+
vq_model.enc_p.proj.weight
|
| 523 |
+
vq_model.enc_p.ssl_proj.bias
|
| 524 |
+
vq_model.enc_p.ssl_proj.weight
|
| 525 |
+
vq_model.enc_p.text_embedding.weight
|
| 526 |
+
vq_model.flow.flows.0.enc.cond_layer.bias
|
| 527 |
+
vq_model.flow.flows.0.enc.cond_layer.weight_g
|
| 528 |
+
vq_model.flow.flows.0.enc.cond_layer.weight_v
|
| 529 |
+
vq_model.flow.flows.0.enc.in_layers.0.bias
|
| 530 |
+
vq_model.flow.flows.0.enc.in_layers.0.weight_g
|
| 531 |
+
vq_model.flow.flows.0.enc.in_layers.0.weight_v
|
| 532 |
+
vq_model.flow.flows.0.enc.in_layers.1.bias
|
| 533 |
+
vq_model.flow.flows.0.enc.in_layers.1.weight_g
|
| 534 |
+
vq_model.flow.flows.0.enc.in_layers.1.weight_v
|
| 535 |
+
vq_model.flow.flows.0.enc.in_layers.2.bias
|
| 536 |
+
vq_model.flow.flows.0.enc.in_layers.2.weight_g
|
| 537 |
+
vq_model.flow.flows.0.enc.in_layers.2.weight_v
|
| 538 |
+
vq_model.flow.flows.0.enc.in_layers.3.bias
|
| 539 |
+
vq_model.flow.flows.0.enc.in_layers.3.weight_g
|
| 540 |
+
vq_model.flow.flows.0.enc.in_layers.3.weight_v
|
| 541 |
+
vq_model.flow.flows.0.enc.res_skip_layers.0.bias
|
| 542 |
+
vq_model.flow.flows.0.enc.res_skip_layers.0.weight_g
|
| 543 |
+
vq_model.flow.flows.0.enc.res_skip_layers.0.weight_v
|
| 544 |
+
vq_model.flow.flows.0.enc.res_skip_layers.1.bias
|
| 545 |
+
vq_model.flow.flows.0.enc.res_skip_layers.1.weight_g
|
| 546 |
+
vq_model.flow.flows.0.enc.res_skip_layers.1.weight_v
|
| 547 |
+
vq_model.flow.flows.0.enc.res_skip_layers.2.bias
|
| 548 |
+
vq_model.flow.flows.0.enc.res_skip_layers.2.weight_g
|
| 549 |
+
vq_model.flow.flows.0.enc.res_skip_layers.2.weight_v
|
| 550 |
+
vq_model.flow.flows.0.enc.res_skip_layers.3.bias
|
| 551 |
+
vq_model.flow.flows.0.enc.res_skip_layers.3.weight_g
|
| 552 |
+
vq_model.flow.flows.0.enc.res_skip_layers.3.weight_v
|
| 553 |
+
vq_model.flow.flows.0.post.bias
|
| 554 |
+
vq_model.flow.flows.0.post.weight
|
| 555 |
+
vq_model.flow.flows.0.pre.bias
|
| 556 |
+
vq_model.flow.flows.0.pre.weight
|
| 557 |
+
vq_model.flow.flows.2.enc.cond_layer.bias
|
| 558 |
+
vq_model.flow.flows.2.enc.cond_layer.weight_g
|
| 559 |
+
vq_model.flow.flows.2.enc.cond_layer.weight_v
|
| 560 |
+
vq_model.flow.flows.2.enc.in_layers.0.bias
|
| 561 |
+
vq_model.flow.flows.2.enc.in_layers.0.weight_g
|
| 562 |
+
vq_model.flow.flows.2.enc.in_layers.0.weight_v
|
| 563 |
+
vq_model.flow.flows.2.enc.in_layers.1.bias
|
| 564 |
+
vq_model.flow.flows.2.enc.in_layers.1.weight_g
|
| 565 |
+
vq_model.flow.flows.2.enc.in_layers.1.weight_v
|
| 566 |
+
vq_model.flow.flows.2.enc.in_layers.2.bias
|
| 567 |
+
vq_model.flow.flows.2.enc.in_layers.2.weight_g
|
| 568 |
+
vq_model.flow.flows.2.enc.in_layers.2.weight_v
|
| 569 |
+
vq_model.flow.flows.2.enc.in_layers.3.bias
|
| 570 |
+
vq_model.flow.flows.2.enc.in_layers.3.weight_g
|
| 571 |
+
vq_model.flow.flows.2.enc.in_layers.3.weight_v
|
| 572 |
+
vq_model.flow.flows.2.enc.res_skip_layers.0.bias
|
| 573 |
+
vq_model.flow.flows.2.enc.res_skip_layers.0.weight_g
|
| 574 |
+
vq_model.flow.flows.2.enc.res_skip_layers.0.weight_v
|
| 575 |
+
vq_model.flow.flows.2.enc.res_skip_layers.1.bias
|
| 576 |
+
vq_model.flow.flows.2.enc.res_skip_layers.1.weight_g
|
| 577 |
+
vq_model.flow.flows.2.enc.res_skip_layers.1.weight_v
|
| 578 |
+
vq_model.flow.flows.2.enc.res_skip_layers.2.bias
|
| 579 |
+
vq_model.flow.flows.2.enc.res_skip_layers.2.weight_g
|
| 580 |
+
vq_model.flow.flows.2.enc.res_skip_layers.2.weight_v
|
| 581 |
+
vq_model.flow.flows.2.enc.res_skip_layers.3.bias
|
| 582 |
+
vq_model.flow.flows.2.enc.res_skip_layers.3.weight_g
|
| 583 |
+
vq_model.flow.flows.2.enc.res_skip_layers.3.weight_v
|
| 584 |
+
vq_model.flow.flows.2.post.bias
|
| 585 |
+
vq_model.flow.flows.2.post.weight
|
| 586 |
+
vq_model.flow.flows.2.pre.bias
|
| 587 |
+
vq_model.flow.flows.2.pre.weight
|
| 588 |
+
vq_model.flow.flows.4.enc.cond_layer.bias
|
| 589 |
+
vq_model.flow.flows.4.enc.cond_layer.weight_g
|
| 590 |
+
vq_model.flow.flows.4.enc.cond_layer.weight_v
|
| 591 |
+
vq_model.flow.flows.4.enc.in_layers.0.bias
|
| 592 |
+
vq_model.flow.flows.4.enc.in_layers.0.weight_g
|
| 593 |
+
vq_model.flow.flows.4.enc.in_layers.0.weight_v
|
| 594 |
+
vq_model.flow.flows.4.enc.in_layers.1.bias
|
| 595 |
+
vq_model.flow.flows.4.enc.in_layers.1.weight_g
|
| 596 |
+
vq_model.flow.flows.4.enc.in_layers.1.weight_v
|
| 597 |
+
vq_model.flow.flows.4.enc.in_layers.2.bias
|
| 598 |
+
vq_model.flow.flows.4.enc.in_layers.2.weight_g
|
| 599 |
+
vq_model.flow.flows.4.enc.in_layers.2.weight_v
|
| 600 |
+
vq_model.flow.flows.4.enc.in_layers.3.bias
|
| 601 |
+
vq_model.flow.flows.4.enc.in_layers.3.weight_g
|
| 602 |
+
vq_model.flow.flows.4.enc.in_layers.3.weight_v
|
| 603 |
+
vq_model.flow.flows.4.enc.res_skip_layers.0.bias
|
| 604 |
+
vq_model.flow.flows.4.enc.res_skip_layers.0.weight_g
|
| 605 |
+
vq_model.flow.flows.4.enc.res_skip_layers.0.weight_v
|
| 606 |
+
vq_model.flow.flows.4.enc.res_skip_layers.1.bias
|
| 607 |
+
vq_model.flow.flows.4.enc.res_skip_layers.1.weight_g
|
| 608 |
+
vq_model.flow.flows.4.enc.res_skip_layers.1.weight_v
|
| 609 |
+
vq_model.flow.flows.4.enc.res_skip_layers.2.bias
|
| 610 |
+
vq_model.flow.flows.4.enc.res_skip_layers.2.weight_g
|
| 611 |
+
vq_model.flow.flows.4.enc.res_skip_layers.2.weight_v
|
| 612 |
+
vq_model.flow.flows.4.enc.res_skip_layers.3.bias
|
| 613 |
+
vq_model.flow.flows.4.enc.res_skip_layers.3.weight_g
|
| 614 |
+
vq_model.flow.flows.4.enc.res_skip_layers.3.weight_v
|
| 615 |
+
vq_model.flow.flows.4.post.bias
|
| 616 |
+
vq_model.flow.flows.4.post.weight
|
| 617 |
+
vq_model.flow.flows.4.pre.bias
|
| 618 |
+
vq_model.flow.flows.4.pre.weight
|
| 619 |
+
vq_model.flow.flows.6.enc.cond_layer.bias
|
| 620 |
+
vq_model.flow.flows.6.enc.cond_layer.weight_g
|
| 621 |
+
vq_model.flow.flows.6.enc.cond_layer.weight_v
|
| 622 |
+
vq_model.flow.flows.6.enc.in_layers.0.bias
|
| 623 |
+
vq_model.flow.flows.6.enc.in_layers.0.weight_g
|
| 624 |
+
vq_model.flow.flows.6.enc.in_layers.0.weight_v
|
| 625 |
+
vq_model.flow.flows.6.enc.in_layers.1.bias
|
| 626 |
+
vq_model.flow.flows.6.enc.in_layers.1.weight_g
|
| 627 |
+
vq_model.flow.flows.6.enc.in_layers.1.weight_v
|
| 628 |
+
vq_model.flow.flows.6.enc.in_layers.2.bias
|
| 629 |
+
vq_model.flow.flows.6.enc.in_layers.2.weight_g
|
| 630 |
+
vq_model.flow.flows.6.enc.in_layers.2.weight_v
|
| 631 |
+
vq_model.flow.flows.6.enc.in_layers.3.bias
|
| 632 |
+
vq_model.flow.flows.6.enc.in_layers.3.weight_g
|
| 633 |
+
vq_model.flow.flows.6.enc.in_layers.3.weight_v
|
| 634 |
+
vq_model.flow.flows.6.enc.res_skip_layers.0.bias
|
| 635 |
+
vq_model.flow.flows.6.enc.res_skip_layers.0.weight_g
|
| 636 |
+
vq_model.flow.flows.6.enc.res_skip_layers.0.weight_v
|
| 637 |
+
vq_model.flow.flows.6.enc.res_skip_layers.1.bias
|
| 638 |
+
vq_model.flow.flows.6.enc.res_skip_layers.1.weight_g
|
| 639 |
+
vq_model.flow.flows.6.enc.res_skip_layers.1.weight_v
|
| 640 |
+
vq_model.flow.flows.6.enc.res_skip_layers.2.bias
|
| 641 |
+
vq_model.flow.flows.6.enc.res_skip_layers.2.weight_g
|
| 642 |
+
vq_model.flow.flows.6.enc.res_skip_layers.2.weight_v
|
| 643 |
+
vq_model.flow.flows.6.enc.res_skip_layers.3.bias
|
| 644 |
+
vq_model.flow.flows.6.enc.res_skip_layers.3.weight_g
|
| 645 |
+
vq_model.flow.flows.6.enc.res_skip_layers.3.weight_v
|
| 646 |
+
vq_model.flow.flows.6.post.bias
|
| 647 |
+
vq_model.flow.flows.6.post.weight
|
| 648 |
+
vq_model.flow.flows.6.pre.bias
|
| 649 |
+
vq_model.flow.flows.6.pre.weight
|
| 650 |
+
vq_model.quantizer.vq.layers.0._codebook.embed
|
| 651 |
+
vq_model.ref_enc.fc.fc.bias
|
| 652 |
+
vq_model.ref_enc.fc.fc.weight
|
| 653 |
+
vq_model.ref_enc.slf_attn.fc.bias
|
| 654 |
+
vq_model.ref_enc.slf_attn.fc.weight
|
| 655 |
+
vq_model.ref_enc.slf_attn.w_ks.bias
|
| 656 |
+
vq_model.ref_enc.slf_attn.w_ks.weight
|
| 657 |
+
vq_model.ref_enc.slf_attn.w_qs.bias
|
| 658 |
+
vq_model.ref_enc.slf_attn.w_qs.weight
|
| 659 |
+
vq_model.ref_enc.slf_attn.w_vs.bias
|
| 660 |
+
vq_model.ref_enc.slf_attn.w_vs.weight
|
| 661 |
+
vq_model.ref_enc.spectral.0.fc.bias
|
| 662 |
+
vq_model.ref_enc.spectral.0.fc.weight
|
| 663 |
+
vq_model.ref_enc.spectral.3.fc.bias
|
| 664 |
+
vq_model.ref_enc.spectral.3.fc.weight
|
| 665 |
+
vq_model.ref_enc.temporal.0.conv1.conv.bias
|
| 666 |
+
vq_model.ref_enc.temporal.0.conv1.conv.weight
|
| 667 |
+
vq_model.ref_enc.temporal.1.conv1.conv.bias
|
| 668 |
+
vq_model.ref_enc.temporal.1.conv1.conv.weight
|
genie_tts/Data/v2ProPlus/Keys/prompt_encoder_weights.txt
CHANGED
|
@@ -1,23 +1,23 @@
|
|
| 1 |
-
ref_enc.spectral.0.fc.weight
|
| 2 |
-
ref_enc.spectral.0.fc.bias
|
| 3 |
-
ref_enc.spectral.3.fc.weight
|
| 4 |
-
ref_enc.spectral.3.fc.bias
|
| 5 |
-
ref_enc.temporal.0.conv1.conv.weight
|
| 6 |
-
ref_enc.temporal.0.conv1.conv.bias
|
| 7 |
-
ref_enc.temporal.1.conv1.conv.weight
|
| 8 |
-
ref_enc.temporal.1.conv1.conv.bias
|
| 9 |
-
ref_enc.slf_attn.w_qs.weight
|
| 10 |
-
ref_enc.slf_attn.w_qs.bias
|
| 11 |
-
ref_enc.slf_attn.w_ks.weight
|
| 12 |
-
ref_enc.slf_attn.w_ks.bias
|
| 13 |
-
ref_enc.slf_attn.w_vs.weight
|
| 14 |
-
ref_enc.slf_attn.w_vs.bias
|
| 15 |
-
ref_enc.slf_attn.fc.weight
|
| 16 |
-
ref_enc.slf_attn.fc.bias
|
| 17 |
-
ref_enc.fc.fc.weight
|
| 18 |
-
ref_enc.fc.fc.bias
|
| 19 |
-
sv_emb.weight
|
| 20 |
-
sv_emb.bias
|
| 21 |
-
ge_to512.weight
|
| 22 |
-
ge_to512.bias
|
| 23 |
-
prelu.weight
|
|
|
|
| 1 |
+
ref_enc.spectral.0.fc.weight
|
| 2 |
+
ref_enc.spectral.0.fc.bias
|
| 3 |
+
ref_enc.spectral.3.fc.weight
|
| 4 |
+
ref_enc.spectral.3.fc.bias
|
| 5 |
+
ref_enc.temporal.0.conv1.conv.weight
|
| 6 |
+
ref_enc.temporal.0.conv1.conv.bias
|
| 7 |
+
ref_enc.temporal.1.conv1.conv.weight
|
| 8 |
+
ref_enc.temporal.1.conv1.conv.bias
|
| 9 |
+
ref_enc.slf_attn.w_qs.weight
|
| 10 |
+
ref_enc.slf_attn.w_qs.bias
|
| 11 |
+
ref_enc.slf_attn.w_ks.weight
|
| 12 |
+
ref_enc.slf_attn.w_ks.bias
|
| 13 |
+
ref_enc.slf_attn.w_vs.weight
|
| 14 |
+
ref_enc.slf_attn.w_vs.bias
|
| 15 |
+
ref_enc.slf_attn.fc.weight
|
| 16 |
+
ref_enc.slf_attn.fc.bias
|
| 17 |
+
ref_enc.fc.fc.weight
|
| 18 |
+
ref_enc.fc.fc.bias
|
| 19 |
+
sv_emb.weight
|
| 20 |
+
sv_emb.bias
|
| 21 |
+
ge_to512.weight
|
| 22 |
+
ge_to512.bias
|
| 23 |
+
prelu.weight
|
genie_tts/Data/v2ProPlus/Keys/vits_weights.txt
CHANGED
|
@@ -1,650 +1,650 @@
|
|
| 1 |
-
vq_model.enc_p.ssl_proj.weight
|
| 2 |
-
vq_model.enc_p.ssl_proj.bias
|
| 3 |
-
vq_model.enc_p.encoder_ssl.attn_layers.0.emb_rel_k
|
| 4 |
-
vq_model.enc_p.encoder_ssl.attn_layers.0.emb_rel_v
|
| 5 |
-
vq_model.enc_p.encoder_ssl.attn_layers.0.conv_q.weight
|
| 6 |
-
vq_model.enc_p.encoder_ssl.attn_layers.0.conv_q.bias
|
| 7 |
-
vq_model.enc_p.encoder_ssl.attn_layers.0.conv_k.weight
|
| 8 |
-
vq_model.enc_p.encoder_ssl.attn_layers.0.conv_k.bias
|
| 9 |
-
vq_model.enc_p.encoder_ssl.attn_layers.0.conv_v.weight
|
| 10 |
-
vq_model.enc_p.encoder_ssl.attn_layers.0.conv_v.bias
|
| 11 |
-
vq_model.enc_p.encoder_ssl.attn_layers.0.conv_o.weight
|
| 12 |
-
vq_model.enc_p.encoder_ssl.attn_layers.0.conv_o.bias
|
| 13 |
-
vq_model.enc_p.encoder_ssl.attn_layers.1.emb_rel_k
|
| 14 |
-
vq_model.enc_p.encoder_ssl.attn_layers.1.emb_rel_v
|
| 15 |
-
vq_model.enc_p.encoder_ssl.attn_layers.1.conv_q.weight
|
| 16 |
-
vq_model.enc_p.encoder_ssl.attn_layers.1.conv_q.bias
|
| 17 |
-
vq_model.enc_p.encoder_ssl.attn_layers.1.conv_k.weight
|
| 18 |
-
vq_model.enc_p.encoder_ssl.attn_layers.1.conv_k.bias
|
| 19 |
-
vq_model.enc_p.encoder_ssl.attn_layers.1.conv_v.weight
|
| 20 |
-
vq_model.enc_p.encoder_ssl.attn_layers.1.conv_v.bias
|
| 21 |
-
vq_model.enc_p.encoder_ssl.attn_layers.1.conv_o.weight
|
| 22 |
-
vq_model.enc_p.encoder_ssl.attn_layers.1.conv_o.bias
|
| 23 |
-
vq_model.enc_p.encoder_ssl.attn_layers.2.emb_rel_k
|
| 24 |
-
vq_model.enc_p.encoder_ssl.attn_layers.2.emb_rel_v
|
| 25 |
-
vq_model.enc_p.encoder_ssl.attn_layers.2.conv_q.weight
|
| 26 |
-
vq_model.enc_p.encoder_ssl.attn_layers.2.conv_q.bias
|
| 27 |
-
vq_model.enc_p.encoder_ssl.attn_layers.2.conv_k.weight
|
| 28 |
-
vq_model.enc_p.encoder_ssl.attn_layers.2.conv_k.bias
|
| 29 |
-
vq_model.enc_p.encoder_ssl.attn_layers.2.conv_v.weight
|
| 30 |
-
vq_model.enc_p.encoder_ssl.attn_layers.2.conv_v.bias
|
| 31 |
-
vq_model.enc_p.encoder_ssl.attn_layers.2.conv_o.weight
|
| 32 |
-
vq_model.enc_p.encoder_ssl.attn_layers.2.conv_o.bias
|
| 33 |
-
vq_model.enc_p.encoder_ssl.norm_layers_1.0.gamma
|
| 34 |
-
vq_model.enc_p.encoder_ssl.norm_layers_1.0.beta
|
| 35 |
-
vq_model.enc_p.encoder_ssl.norm_layers_1.1.gamma
|
| 36 |
-
vq_model.enc_p.encoder_ssl.norm_layers_1.1.beta
|
| 37 |
-
vq_model.enc_p.encoder_ssl.norm_layers_1.2.gamma
|
| 38 |
-
vq_model.enc_p.encoder_ssl.norm_layers_1.2.beta
|
| 39 |
-
vq_model.enc_p.encoder_ssl.ffn_layers.0.conv_1.weight
|
| 40 |
-
vq_model.enc_p.encoder_ssl.ffn_layers.0.conv_1.bias
|
| 41 |
-
vq_model.enc_p.encoder_ssl.ffn_layers.0.conv_2.weight
|
| 42 |
-
vq_model.enc_p.encoder_ssl.ffn_layers.0.conv_2.bias
|
| 43 |
-
vq_model.enc_p.encoder_ssl.ffn_layers.1.conv_1.weight
|
| 44 |
-
vq_model.enc_p.encoder_ssl.ffn_layers.1.conv_1.bias
|
| 45 |
-
vq_model.enc_p.encoder_ssl.ffn_layers.1.conv_2.weight
|
| 46 |
-
vq_model.enc_p.encoder_ssl.ffn_layers.1.conv_2.bias
|
| 47 |
-
vq_model.enc_p.encoder_ssl.ffn_layers.2.conv_1.weight
|
| 48 |
-
vq_model.enc_p.encoder_ssl.ffn_layers.2.conv_1.bias
|
| 49 |
-
vq_model.enc_p.encoder_ssl.ffn_layers.2.conv_2.weight
|
| 50 |
-
vq_model.enc_p.encoder_ssl.ffn_layers.2.conv_2.bias
|
| 51 |
-
vq_model.enc_p.encoder_ssl.norm_layers_2.0.gamma
|
| 52 |
-
vq_model.enc_p.encoder_ssl.norm_layers_2.0.beta
|
| 53 |
-
vq_model.enc_p.encoder_ssl.norm_layers_2.1.gamma
|
| 54 |
-
vq_model.enc_p.encoder_ssl.norm_layers_2.1.beta
|
| 55 |
-
vq_model.enc_p.encoder_ssl.norm_layers_2.2.gamma
|
| 56 |
-
vq_model.enc_p.encoder_ssl.norm_layers_2.2.beta
|
| 57 |
-
vq_model.enc_p.encoder_text.attn_layers.0.emb_rel_k
|
| 58 |
-
vq_model.enc_p.encoder_text.attn_layers.0.emb_rel_v
|
| 59 |
-
vq_model.enc_p.encoder_text.attn_layers.0.conv_q.weight
|
| 60 |
-
vq_model.enc_p.encoder_text.attn_layers.0.conv_q.bias
|
| 61 |
-
vq_model.enc_p.encoder_text.attn_layers.0.conv_k.weight
|
| 62 |
-
vq_model.enc_p.encoder_text.attn_layers.0.conv_k.bias
|
| 63 |
-
vq_model.enc_p.encoder_text.attn_layers.0.conv_v.weight
|
| 64 |
-
vq_model.enc_p.encoder_text.attn_layers.0.conv_v.bias
|
| 65 |
-
vq_model.enc_p.encoder_text.attn_layers.0.conv_o.weight
|
| 66 |
-
vq_model.enc_p.encoder_text.attn_layers.0.conv_o.bias
|
| 67 |
-
vq_model.enc_p.encoder_text.attn_layers.1.emb_rel_k
|
| 68 |
-
vq_model.enc_p.encoder_text.attn_layers.1.emb_rel_v
|
| 69 |
-
vq_model.enc_p.encoder_text.attn_layers.1.conv_q.weight
|
| 70 |
-
vq_model.enc_p.encoder_text.attn_layers.1.conv_q.bias
|
| 71 |
-
vq_model.enc_p.encoder_text.attn_layers.1.conv_k.weight
|
| 72 |
-
vq_model.enc_p.encoder_text.attn_layers.1.conv_k.bias
|
| 73 |
-
vq_model.enc_p.encoder_text.attn_layers.1.conv_v.weight
|
| 74 |
-
vq_model.enc_p.encoder_text.attn_layers.1.conv_v.bias
|
| 75 |
-
vq_model.enc_p.encoder_text.attn_layers.1.conv_o.weight
|
| 76 |
-
vq_model.enc_p.encoder_text.attn_layers.1.conv_o.bias
|
| 77 |
-
vq_model.enc_p.encoder_text.attn_layers.2.emb_rel_k
|
| 78 |
-
vq_model.enc_p.encoder_text.attn_layers.2.emb_rel_v
|
| 79 |
-
vq_model.enc_p.encoder_text.attn_layers.2.conv_q.weight
|
| 80 |
-
vq_model.enc_p.encoder_text.attn_layers.2.conv_q.bias
|
| 81 |
-
vq_model.enc_p.encoder_text.attn_layers.2.conv_k.weight
|
| 82 |
-
vq_model.enc_p.encoder_text.attn_layers.2.conv_k.bias
|
| 83 |
-
vq_model.enc_p.encoder_text.attn_layers.2.conv_v.weight
|
| 84 |
-
vq_model.enc_p.encoder_text.attn_layers.2.conv_v.bias
|
| 85 |
-
vq_model.enc_p.encoder_text.attn_layers.2.conv_o.weight
|
| 86 |
-
vq_model.enc_p.encoder_text.attn_layers.2.conv_o.bias
|
| 87 |
-
vq_model.enc_p.encoder_text.attn_layers.3.emb_rel_k
|
| 88 |
-
vq_model.enc_p.encoder_text.attn_layers.3.emb_rel_v
|
| 89 |
-
vq_model.enc_p.encoder_text.attn_layers.3.conv_q.weight
|
| 90 |
-
vq_model.enc_p.encoder_text.attn_layers.3.conv_q.bias
|
| 91 |
-
vq_model.enc_p.encoder_text.attn_layers.3.conv_k.weight
|
| 92 |
-
vq_model.enc_p.encoder_text.attn_layers.3.conv_k.bias
|
| 93 |
-
vq_model.enc_p.encoder_text.attn_layers.3.conv_v.weight
|
| 94 |
-
vq_model.enc_p.encoder_text.attn_layers.3.conv_v.bias
|
| 95 |
-
vq_model.enc_p.encoder_text.attn_layers.3.conv_o.weight
|
| 96 |
-
vq_model.enc_p.encoder_text.attn_layers.3.conv_o.bias
|
| 97 |
-
vq_model.enc_p.encoder_text.attn_layers.4.emb_rel_k
|
| 98 |
-
vq_model.enc_p.encoder_text.attn_layers.4.emb_rel_v
|
| 99 |
-
vq_model.enc_p.encoder_text.attn_layers.4.conv_q.weight
|
| 100 |
-
vq_model.enc_p.encoder_text.attn_layers.4.conv_q.bias
|
| 101 |
-
vq_model.enc_p.encoder_text.attn_layers.4.conv_k.weight
|
| 102 |
-
vq_model.enc_p.encoder_text.attn_layers.4.conv_k.bias
|
| 103 |
-
vq_model.enc_p.encoder_text.attn_layers.4.conv_v.weight
|
| 104 |
-
vq_model.enc_p.encoder_text.attn_layers.4.conv_v.bias
|
| 105 |
-
vq_model.enc_p.encoder_text.attn_layers.4.conv_o.weight
|
| 106 |
-
vq_model.enc_p.encoder_text.attn_layers.4.conv_o.bias
|
| 107 |
-
vq_model.enc_p.encoder_text.attn_layers.5.emb_rel_k
|
| 108 |
-
vq_model.enc_p.encoder_text.attn_layers.5.emb_rel_v
|
| 109 |
-
vq_model.enc_p.encoder_text.attn_layers.5.conv_q.weight
|
| 110 |
-
vq_model.enc_p.encoder_text.attn_layers.5.conv_q.bias
|
| 111 |
-
vq_model.enc_p.encoder_text.attn_layers.5.conv_k.weight
|
| 112 |
-
vq_model.enc_p.encoder_text.attn_layers.5.conv_k.bias
|
| 113 |
-
vq_model.enc_p.encoder_text.attn_layers.5.conv_v.weight
|
| 114 |
-
vq_model.enc_p.encoder_text.attn_layers.5.conv_v.bias
|
| 115 |
-
vq_model.enc_p.encoder_text.attn_layers.5.conv_o.weight
|
| 116 |
-
vq_model.enc_p.encoder_text.attn_layers.5.conv_o.bias
|
| 117 |
-
vq_model.enc_p.encoder_text.norm_layers_1.0.gamma
|
| 118 |
-
vq_model.enc_p.encoder_text.norm_layers_1.0.beta
|
| 119 |
-
vq_model.enc_p.encoder_text.norm_layers_1.1.gamma
|
| 120 |
-
vq_model.enc_p.encoder_text.norm_layers_1.1.beta
|
| 121 |
-
vq_model.enc_p.encoder_text.norm_layers_1.2.gamma
|
| 122 |
-
vq_model.enc_p.encoder_text.norm_layers_1.2.beta
|
| 123 |
-
vq_model.enc_p.encoder_text.norm_layers_1.3.gamma
|
| 124 |
-
vq_model.enc_p.encoder_text.norm_layers_1.3.beta
|
| 125 |
-
vq_model.enc_p.encoder_text.norm_layers_1.4.gamma
|
| 126 |
-
vq_model.enc_p.encoder_text.norm_layers_1.4.beta
|
| 127 |
-
vq_model.enc_p.encoder_text.norm_layers_1.5.gamma
|
| 128 |
-
vq_model.enc_p.encoder_text.norm_layers_1.5.beta
|
| 129 |
-
vq_model.enc_p.encoder_text.ffn_layers.0.conv_1.weight
|
| 130 |
-
vq_model.enc_p.encoder_text.ffn_layers.0.conv_1.bias
|
| 131 |
-
vq_model.enc_p.encoder_text.ffn_layers.0.conv_2.weight
|
| 132 |
-
vq_model.enc_p.encoder_text.ffn_layers.0.conv_2.bias
|
| 133 |
-
vq_model.enc_p.encoder_text.ffn_layers.1.conv_1.weight
|
| 134 |
-
vq_model.enc_p.encoder_text.ffn_layers.1.conv_1.bias
|
| 135 |
-
vq_model.enc_p.encoder_text.ffn_layers.1.conv_2.weight
|
| 136 |
-
vq_model.enc_p.encoder_text.ffn_layers.1.conv_2.bias
|
| 137 |
-
vq_model.enc_p.encoder_text.ffn_layers.2.conv_1.weight
|
| 138 |
-
vq_model.enc_p.encoder_text.ffn_layers.2.conv_1.bias
|
| 139 |
-
vq_model.enc_p.encoder_text.ffn_layers.2.conv_2.weight
|
| 140 |
-
vq_model.enc_p.encoder_text.ffn_layers.2.conv_2.bias
|
| 141 |
-
vq_model.enc_p.encoder_text.ffn_layers.3.conv_1.weight
|
| 142 |
-
vq_model.enc_p.encoder_text.ffn_layers.3.conv_1.bias
|
| 143 |
-
vq_model.enc_p.encoder_text.ffn_layers.3.conv_2.weight
|
| 144 |
-
vq_model.enc_p.encoder_text.ffn_layers.3.conv_2.bias
|
| 145 |
-
vq_model.enc_p.encoder_text.ffn_layers.4.conv_1.weight
|
| 146 |
-
vq_model.enc_p.encoder_text.ffn_layers.4.conv_1.bias
|
| 147 |
-
vq_model.enc_p.encoder_text.ffn_layers.4.conv_2.weight
|
| 148 |
-
vq_model.enc_p.encoder_text.ffn_layers.4.conv_2.bias
|
| 149 |
-
vq_model.enc_p.encoder_text.ffn_layers.5.conv_1.weight
|
| 150 |
-
vq_model.enc_p.encoder_text.ffn_layers.5.conv_1.bias
|
| 151 |
-
vq_model.enc_p.encoder_text.ffn_layers.5.conv_2.weight
|
| 152 |
-
vq_model.enc_p.encoder_text.ffn_layers.5.conv_2.bias
|
| 153 |
-
vq_model.enc_p.encoder_text.norm_layers_2.0.gamma
|
| 154 |
-
vq_model.enc_p.encoder_text.norm_layers_2.0.beta
|
| 155 |
-
vq_model.enc_p.encoder_text.norm_layers_2.1.gamma
|
| 156 |
-
vq_model.enc_p.encoder_text.norm_layers_2.1.beta
|
| 157 |
-
vq_model.enc_p.encoder_text.norm_layers_2.2.gamma
|
| 158 |
-
vq_model.enc_p.encoder_text.norm_layers_2.2.beta
|
| 159 |
-
vq_model.enc_p.encoder_text.norm_layers_2.3.gamma
|
| 160 |
-
vq_model.enc_p.encoder_text.norm_layers_2.3.beta
|
| 161 |
-
vq_model.enc_p.encoder_text.norm_layers_2.4.gamma
|
| 162 |
-
vq_model.enc_p.encoder_text.norm_layers_2.4.beta
|
| 163 |
-
vq_model.enc_p.encoder_text.norm_layers_2.5.gamma
|
| 164 |
-
vq_model.enc_p.encoder_text.norm_layers_2.5.beta
|
| 165 |
-
vq_model.enc_p.text_embedding.weight
|
| 166 |
-
vq_model.enc_p.mrte.cross_attention.conv_q.weight
|
| 167 |
-
vq_model.enc_p.mrte.cross_attention.conv_q.bias
|
| 168 |
-
vq_model.enc_p.mrte.cross_attention.conv_k.weight
|
| 169 |
-
vq_model.enc_p.mrte.cross_attention.conv_k.bias
|
| 170 |
-
vq_model.enc_p.mrte.cross_attention.conv_v.weight
|
| 171 |
-
vq_model.enc_p.mrte.cross_attention.conv_v.bias
|
| 172 |
-
vq_model.enc_p.mrte.cross_attention.conv_o.weight
|
| 173 |
-
vq_model.enc_p.mrte.cross_attention.conv_o.bias
|
| 174 |
-
vq_model.enc_p.mrte.c_pre.weight
|
| 175 |
-
vq_model.enc_p.mrte.c_pre.bias
|
| 176 |
-
vq_model.enc_p.mrte.text_pre.weight
|
| 177 |
-
vq_model.enc_p.mrte.text_pre.bias
|
| 178 |
-
vq_model.enc_p.mrte.c_post.weight
|
| 179 |
-
vq_model.enc_p.mrte.c_post.bias
|
| 180 |
-
vq_model.enc_p.encoder2.attn_layers.0.emb_rel_k
|
| 181 |
-
vq_model.enc_p.encoder2.attn_layers.0.emb_rel_v
|
| 182 |
-
vq_model.enc_p.encoder2.attn_layers.0.conv_q.weight
|
| 183 |
-
vq_model.enc_p.encoder2.attn_layers.0.conv_q.bias
|
| 184 |
-
vq_model.enc_p.encoder2.attn_layers.0.conv_k.weight
|
| 185 |
-
vq_model.enc_p.encoder2.attn_layers.0.conv_k.bias
|
| 186 |
-
vq_model.enc_p.encoder2.attn_layers.0.conv_v.weight
|
| 187 |
-
vq_model.enc_p.encoder2.attn_layers.0.conv_v.bias
|
| 188 |
-
vq_model.enc_p.encoder2.attn_layers.0.conv_o.weight
|
| 189 |
-
vq_model.enc_p.encoder2.attn_layers.0.conv_o.bias
|
| 190 |
-
vq_model.enc_p.encoder2.attn_layers.1.emb_rel_k
|
| 191 |
-
vq_model.enc_p.encoder2.attn_layers.1.emb_rel_v
|
| 192 |
-
vq_model.enc_p.encoder2.attn_layers.1.conv_q.weight
|
| 193 |
-
vq_model.enc_p.encoder2.attn_layers.1.conv_q.bias
|
| 194 |
-
vq_model.enc_p.encoder2.attn_layers.1.conv_k.weight
|
| 195 |
-
vq_model.enc_p.encoder2.attn_layers.1.conv_k.bias
|
| 196 |
-
vq_model.enc_p.encoder2.attn_layers.1.conv_v.weight
|
| 197 |
-
vq_model.enc_p.encoder2.attn_layers.1.conv_v.bias
|
| 198 |
-
vq_model.enc_p.encoder2.attn_layers.1.conv_o.weight
|
| 199 |
-
vq_model.enc_p.encoder2.attn_layers.1.conv_o.bias
|
| 200 |
-
vq_model.enc_p.encoder2.attn_layers.2.emb_rel_k
|
| 201 |
-
vq_model.enc_p.encoder2.attn_layers.2.emb_rel_v
|
| 202 |
-
vq_model.enc_p.encoder2.attn_layers.2.conv_q.weight
|
| 203 |
-
vq_model.enc_p.encoder2.attn_layers.2.conv_q.bias
|
| 204 |
-
vq_model.enc_p.encoder2.attn_layers.2.conv_k.weight
|
| 205 |
-
vq_model.enc_p.encoder2.attn_layers.2.conv_k.bias
|
| 206 |
-
vq_model.enc_p.encoder2.attn_layers.2.conv_v.weight
|
| 207 |
-
vq_model.enc_p.encoder2.attn_layers.2.conv_v.bias
|
| 208 |
-
vq_model.enc_p.encoder2.attn_layers.2.conv_o.weight
|
| 209 |
-
vq_model.enc_p.encoder2.attn_layers.2.conv_o.bias
|
| 210 |
-
vq_model.enc_p.encoder2.norm_layers_1.0.gamma
|
| 211 |
-
vq_model.enc_p.encoder2.norm_layers_1.0.beta
|
| 212 |
-
vq_model.enc_p.encoder2.norm_layers_1.1.gamma
|
| 213 |
-
vq_model.enc_p.encoder2.norm_layers_1.1.beta
|
| 214 |
-
vq_model.enc_p.encoder2.norm_layers_1.2.gamma
|
| 215 |
-
vq_model.enc_p.encoder2.norm_layers_1.2.beta
|
| 216 |
-
vq_model.enc_p.encoder2.ffn_layers.0.conv_1.weight
|
| 217 |
-
vq_model.enc_p.encoder2.ffn_layers.0.conv_1.bias
|
| 218 |
-
vq_model.enc_p.encoder2.ffn_layers.0.conv_2.weight
|
| 219 |
-
vq_model.enc_p.encoder2.ffn_layers.0.conv_2.bias
|
| 220 |
-
vq_model.enc_p.encoder2.ffn_layers.1.conv_1.weight
|
| 221 |
-
vq_model.enc_p.encoder2.ffn_layers.1.conv_1.bias
|
| 222 |
-
vq_model.enc_p.encoder2.ffn_layers.1.conv_2.weight
|
| 223 |
-
vq_model.enc_p.encoder2.ffn_layers.1.conv_2.bias
|
| 224 |
-
vq_model.enc_p.encoder2.ffn_layers.2.conv_1.weight
|
| 225 |
-
vq_model.enc_p.encoder2.ffn_layers.2.conv_1.bias
|
| 226 |
-
vq_model.enc_p.encoder2.ffn_layers.2.conv_2.weight
|
| 227 |
-
vq_model.enc_p.encoder2.ffn_layers.2.conv_2.bias
|
| 228 |
-
vq_model.enc_p.encoder2.norm_layers_2.0.gamma
|
| 229 |
-
vq_model.enc_p.encoder2.norm_layers_2.0.beta
|
| 230 |
-
vq_model.enc_p.encoder2.norm_layers_2.1.gamma
|
| 231 |
-
vq_model.enc_p.encoder2.norm_layers_2.1.beta
|
| 232 |
-
vq_model.enc_p.encoder2.norm_layers_2.2.gamma
|
| 233 |
-
vq_model.enc_p.encoder2.norm_layers_2.2.beta
|
| 234 |
-
vq_model.enc_p.proj.weight
|
| 235 |
-
vq_model.enc_p.proj.bias
|
| 236 |
-
vq_model.dec.conv_pre.weight
|
| 237 |
-
vq_model.dec.conv_pre.bias
|
| 238 |
-
vq_model.dec.ups.0.bias
|
| 239 |
-
vq_model.dec.ups.0.weight_g
|
| 240 |
-
vq_model.dec.ups.0.weight_v
|
| 241 |
-
vq_model.dec.ups.1.bias
|
| 242 |
-
vq_model.dec.ups.1.weight_g
|
| 243 |
-
vq_model.dec.ups.1.weight_v
|
| 244 |
-
vq_model.dec.ups.2.bias
|
| 245 |
-
vq_model.dec.ups.2.weight_g
|
| 246 |
-
vq_model.dec.ups.2.weight_v
|
| 247 |
-
vq_model.dec.ups.3.bias
|
| 248 |
-
vq_model.dec.ups.3.weight_g
|
| 249 |
-
vq_model.dec.ups.3.weight_v
|
| 250 |
-
vq_model.dec.ups.4.bias
|
| 251 |
-
vq_model.dec.ups.4.weight_g
|
| 252 |
-
vq_model.dec.ups.4.weight_v
|
| 253 |
-
vq_model.dec.resblocks.0.convs1.0.bias
|
| 254 |
-
vq_model.dec.resblocks.0.convs1.0.weight_g
|
| 255 |
-
vq_model.dec.resblocks.0.convs1.0.weight_v
|
| 256 |
-
vq_model.dec.resblocks.0.convs1.1.bias
|
| 257 |
-
vq_model.dec.resblocks.0.convs1.1.weight_g
|
| 258 |
-
vq_model.dec.resblocks.0.convs1.1.weight_v
|
| 259 |
-
vq_model.dec.resblocks.0.convs1.2.bias
|
| 260 |
-
vq_model.dec.resblocks.0.convs1.2.weight_g
|
| 261 |
-
vq_model.dec.resblocks.0.convs1.2.weight_v
|
| 262 |
-
vq_model.dec.resblocks.0.convs2.0.bias
|
| 263 |
-
vq_model.dec.resblocks.0.convs2.0.weight_g
|
| 264 |
-
vq_model.dec.resblocks.0.convs2.0.weight_v
|
| 265 |
-
vq_model.dec.resblocks.0.convs2.1.bias
|
| 266 |
-
vq_model.dec.resblocks.0.convs2.1.weight_g
|
| 267 |
-
vq_model.dec.resblocks.0.convs2.1.weight_v
|
| 268 |
-
vq_model.dec.resblocks.0.convs2.2.bias
|
| 269 |
-
vq_model.dec.resblocks.0.convs2.2.weight_g
|
| 270 |
-
vq_model.dec.resblocks.0.convs2.2.weight_v
|
| 271 |
-
vq_model.dec.resblocks.1.convs1.0.bias
|
| 272 |
-
vq_model.dec.resblocks.1.convs1.0.weight_g
|
| 273 |
-
vq_model.dec.resblocks.1.convs1.0.weight_v
|
| 274 |
-
vq_model.dec.resblocks.1.convs1.1.bias
|
| 275 |
-
vq_model.dec.resblocks.1.convs1.1.weight_g
|
| 276 |
-
vq_model.dec.resblocks.1.convs1.1.weight_v
|
| 277 |
-
vq_model.dec.resblocks.1.convs1.2.bias
|
| 278 |
-
vq_model.dec.resblocks.1.convs1.2.weight_g
|
| 279 |
-
vq_model.dec.resblocks.1.convs1.2.weight_v
|
| 280 |
-
vq_model.dec.resblocks.1.convs2.0.bias
|
| 281 |
-
vq_model.dec.resblocks.1.convs2.0.weight_g
|
| 282 |
-
vq_model.dec.resblocks.1.convs2.0.weight_v
|
| 283 |
-
vq_model.dec.resblocks.1.convs2.1.bias
|
| 284 |
-
vq_model.dec.resblocks.1.convs2.1.weight_g
|
| 285 |
-
vq_model.dec.resblocks.1.convs2.1.weight_v
|
| 286 |
-
vq_model.dec.resblocks.1.convs2.2.bias
|
| 287 |
-
vq_model.dec.resblocks.1.convs2.2.weight_g
|
| 288 |
-
vq_model.dec.resblocks.1.convs2.2.weight_v
|
| 289 |
-
vq_model.dec.resblocks.2.convs1.0.bias
|
| 290 |
-
vq_model.dec.resblocks.2.convs1.0.weight_g
|
| 291 |
-
vq_model.dec.resblocks.2.convs1.0.weight_v
|
| 292 |
-
vq_model.dec.resblocks.2.convs1.1.bias
|
| 293 |
-
vq_model.dec.resblocks.2.convs1.1.weight_g
|
| 294 |
-
vq_model.dec.resblocks.2.convs1.1.weight_v
|
| 295 |
-
vq_model.dec.resblocks.2.convs1.2.bias
|
| 296 |
-
vq_model.dec.resblocks.2.convs1.2.weight_g
|
| 297 |
-
vq_model.dec.resblocks.2.convs1.2.weight_v
|
| 298 |
-
vq_model.dec.resblocks.2.convs2.0.bias
|
| 299 |
-
vq_model.dec.resblocks.2.convs2.0.weight_g
|
| 300 |
-
vq_model.dec.resblocks.2.convs2.0.weight_v
|
| 301 |
-
vq_model.dec.resblocks.2.convs2.1.bias
|
| 302 |
-
vq_model.dec.resblocks.2.convs2.1.weight_g
|
| 303 |
-
vq_model.dec.resblocks.2.convs2.1.weight_v
|
| 304 |
-
vq_model.dec.resblocks.2.convs2.2.bias
|
| 305 |
-
vq_model.dec.resblocks.2.convs2.2.weight_g
|
| 306 |
-
vq_model.dec.resblocks.2.convs2.2.weight_v
|
| 307 |
-
vq_model.dec.resblocks.3.convs1.0.bias
|
| 308 |
-
vq_model.dec.resblocks.3.convs1.0.weight_g
|
| 309 |
-
vq_model.dec.resblocks.3.convs1.0.weight_v
|
| 310 |
-
vq_model.dec.resblocks.3.convs1.1.bias
|
| 311 |
-
vq_model.dec.resblocks.3.convs1.1.weight_g
|
| 312 |
-
vq_model.dec.resblocks.3.convs1.1.weight_v
|
| 313 |
-
vq_model.dec.resblocks.3.convs1.2.bias
|
| 314 |
-
vq_model.dec.resblocks.3.convs1.2.weight_g
|
| 315 |
-
vq_model.dec.resblocks.3.convs1.2.weight_v
|
| 316 |
-
vq_model.dec.resblocks.3.convs2.0.bias
|
| 317 |
-
vq_model.dec.resblocks.3.convs2.0.weight_g
|
| 318 |
-
vq_model.dec.resblocks.3.convs2.0.weight_v
|
| 319 |
-
vq_model.dec.resblocks.3.convs2.1.bias
|
| 320 |
-
vq_model.dec.resblocks.3.convs2.1.weight_g
|
| 321 |
-
vq_model.dec.resblocks.3.convs2.1.weight_v
|
| 322 |
-
vq_model.dec.resblocks.3.convs2.2.bias
|
| 323 |
-
vq_model.dec.resblocks.3.convs2.2.weight_g
|
| 324 |
-
vq_model.dec.resblocks.3.convs2.2.weight_v
|
| 325 |
-
vq_model.dec.resblocks.4.convs1.0.bias
|
| 326 |
-
vq_model.dec.resblocks.4.convs1.0.weight_g
|
| 327 |
-
vq_model.dec.resblocks.4.convs1.0.weight_v
|
| 328 |
-
vq_model.dec.resblocks.4.convs1.1.bias
|
| 329 |
-
vq_model.dec.resblocks.4.convs1.1.weight_g
|
| 330 |
-
vq_model.dec.resblocks.4.convs1.1.weight_v
|
| 331 |
-
vq_model.dec.resblocks.4.convs1.2.bias
|
| 332 |
-
vq_model.dec.resblocks.4.convs1.2.weight_g
|
| 333 |
-
vq_model.dec.resblocks.4.convs1.2.weight_v
|
| 334 |
-
vq_model.dec.resblocks.4.convs2.0.bias
|
| 335 |
-
vq_model.dec.resblocks.4.convs2.0.weight_g
|
| 336 |
-
vq_model.dec.resblocks.4.convs2.0.weight_v
|
| 337 |
-
vq_model.dec.resblocks.4.convs2.1.bias
|
| 338 |
-
vq_model.dec.resblocks.4.convs2.1.weight_g
|
| 339 |
-
vq_model.dec.resblocks.4.convs2.1.weight_v
|
| 340 |
-
vq_model.dec.resblocks.4.convs2.2.bias
|
| 341 |
-
vq_model.dec.resblocks.4.convs2.2.weight_g
|
| 342 |
-
vq_model.dec.resblocks.4.convs2.2.weight_v
|
| 343 |
-
vq_model.dec.resblocks.5.convs1.0.bias
|
| 344 |
-
vq_model.dec.resblocks.5.convs1.0.weight_g
|
| 345 |
-
vq_model.dec.resblocks.5.convs1.0.weight_v
|
| 346 |
-
vq_model.dec.resblocks.5.convs1.1.bias
|
| 347 |
-
vq_model.dec.resblocks.5.convs1.1.weight_g
|
| 348 |
-
vq_model.dec.resblocks.5.convs1.1.weight_v
|
| 349 |
-
vq_model.dec.resblocks.5.convs1.2.bias
|
| 350 |
-
vq_model.dec.resblocks.5.convs1.2.weight_g
|
| 351 |
-
vq_model.dec.resblocks.5.convs1.2.weight_v
|
| 352 |
-
vq_model.dec.resblocks.5.convs2.0.bias
|
| 353 |
-
vq_model.dec.resblocks.5.convs2.0.weight_g
|
| 354 |
-
vq_model.dec.resblocks.5.convs2.0.weight_v
|
| 355 |
-
vq_model.dec.resblocks.5.convs2.1.bias
|
| 356 |
-
vq_model.dec.resblocks.5.convs2.1.weight_g
|
| 357 |
-
vq_model.dec.resblocks.5.convs2.1.weight_v
|
| 358 |
-
vq_model.dec.resblocks.5.convs2.2.bias
|
| 359 |
-
vq_model.dec.resblocks.5.convs2.2.weight_g
|
| 360 |
-
vq_model.dec.resblocks.5.convs2.2.weight_v
|
| 361 |
-
vq_model.dec.resblocks.6.convs1.0.bias
|
| 362 |
-
vq_model.dec.resblocks.6.convs1.0.weight_g
|
| 363 |
-
vq_model.dec.resblocks.6.convs1.0.weight_v
|
| 364 |
-
vq_model.dec.resblocks.6.convs1.1.bias
|
| 365 |
-
vq_model.dec.resblocks.6.convs1.1.weight_g
|
| 366 |
-
vq_model.dec.resblocks.6.convs1.1.weight_v
|
| 367 |
-
vq_model.dec.resblocks.6.convs1.2.bias
|
| 368 |
-
vq_model.dec.resblocks.6.convs1.2.weight_g
|
| 369 |
-
vq_model.dec.resblocks.6.convs1.2.weight_v
|
| 370 |
-
vq_model.dec.resblocks.6.convs2.0.bias
|
| 371 |
-
vq_model.dec.resblocks.6.convs2.0.weight_g
|
| 372 |
-
vq_model.dec.resblocks.6.convs2.0.weight_v
|
| 373 |
-
vq_model.dec.resblocks.6.convs2.1.bias
|
| 374 |
-
vq_model.dec.resblocks.6.convs2.1.weight_g
|
| 375 |
-
vq_model.dec.resblocks.6.convs2.1.weight_v
|
| 376 |
-
vq_model.dec.resblocks.6.convs2.2.bias
|
| 377 |
-
vq_model.dec.resblocks.6.convs2.2.weight_g
|
| 378 |
-
vq_model.dec.resblocks.6.convs2.2.weight_v
|
| 379 |
-
vq_model.dec.resblocks.7.convs1.0.bias
|
| 380 |
-
vq_model.dec.resblocks.7.convs1.0.weight_g
|
| 381 |
-
vq_model.dec.resblocks.7.convs1.0.weight_v
|
| 382 |
-
vq_model.dec.resblocks.7.convs1.1.bias
|
| 383 |
-
vq_model.dec.resblocks.7.convs1.1.weight_g
|
| 384 |
-
vq_model.dec.resblocks.7.convs1.1.weight_v
|
| 385 |
-
vq_model.dec.resblocks.7.convs1.2.bias
|
| 386 |
-
vq_model.dec.resblocks.7.convs1.2.weight_g
|
| 387 |
-
vq_model.dec.resblocks.7.convs1.2.weight_v
|
| 388 |
-
vq_model.dec.resblocks.7.convs2.0.bias
|
| 389 |
-
vq_model.dec.resblocks.7.convs2.0.weight_g
|
| 390 |
-
vq_model.dec.resblocks.7.convs2.0.weight_v
|
| 391 |
-
vq_model.dec.resblocks.7.convs2.1.bias
|
| 392 |
-
vq_model.dec.resblocks.7.convs2.1.weight_g
|
| 393 |
-
vq_model.dec.resblocks.7.convs2.1.weight_v
|
| 394 |
-
vq_model.dec.resblocks.7.convs2.2.bias
|
| 395 |
-
vq_model.dec.resblocks.7.convs2.2.weight_g
|
| 396 |
-
vq_model.dec.resblocks.7.convs2.2.weight_v
|
| 397 |
-
vq_model.dec.resblocks.8.convs1.0.bias
|
| 398 |
-
vq_model.dec.resblocks.8.convs1.0.weight_g
|
| 399 |
-
vq_model.dec.resblocks.8.convs1.0.weight_v
|
| 400 |
-
vq_model.dec.resblocks.8.convs1.1.bias
|
| 401 |
-
vq_model.dec.resblocks.8.convs1.1.weight_g
|
| 402 |
-
vq_model.dec.resblocks.8.convs1.1.weight_v
|
| 403 |
-
vq_model.dec.resblocks.8.convs1.2.bias
|
| 404 |
-
vq_model.dec.resblocks.8.convs1.2.weight_g
|
| 405 |
-
vq_model.dec.resblocks.8.convs1.2.weight_v
|
| 406 |
-
vq_model.dec.resblocks.8.convs2.0.bias
|
| 407 |
-
vq_model.dec.resblocks.8.convs2.0.weight_g
|
| 408 |
-
vq_model.dec.resblocks.8.convs2.0.weight_v
|
| 409 |
-
vq_model.dec.resblocks.8.convs2.1.bias
|
| 410 |
-
vq_model.dec.resblocks.8.convs2.1.weight_g
|
| 411 |
-
vq_model.dec.resblocks.8.convs2.1.weight_v
|
| 412 |
-
vq_model.dec.resblocks.8.convs2.2.bias
|
| 413 |
-
vq_model.dec.resblocks.8.convs2.2.weight_g
|
| 414 |
-
vq_model.dec.resblocks.8.convs2.2.weight_v
|
| 415 |
-
vq_model.dec.resblocks.9.convs1.0.bias
|
| 416 |
-
vq_model.dec.resblocks.9.convs1.0.weight_g
|
| 417 |
-
vq_model.dec.resblocks.9.convs1.0.weight_v
|
| 418 |
-
vq_model.dec.resblocks.9.convs1.1.bias
|
| 419 |
-
vq_model.dec.resblocks.9.convs1.1.weight_g
|
| 420 |
-
vq_model.dec.resblocks.9.convs1.1.weight_v
|
| 421 |
-
vq_model.dec.resblocks.9.convs1.2.bias
|
| 422 |
-
vq_model.dec.resblocks.9.convs1.2.weight_g
|
| 423 |
-
vq_model.dec.resblocks.9.convs1.2.weight_v
|
| 424 |
-
vq_model.dec.resblocks.9.convs2.0.bias
|
| 425 |
-
vq_model.dec.resblocks.9.convs2.0.weight_g
|
| 426 |
-
vq_model.dec.resblocks.9.convs2.0.weight_v
|
| 427 |
-
vq_model.dec.resblocks.9.convs2.1.bias
|
| 428 |
-
vq_model.dec.resblocks.9.convs2.1.weight_g
|
| 429 |
-
vq_model.dec.resblocks.9.convs2.1.weight_v
|
| 430 |
-
vq_model.dec.resblocks.9.convs2.2.bias
|
| 431 |
-
vq_model.dec.resblocks.9.convs2.2.weight_g
|
| 432 |
-
vq_model.dec.resblocks.9.convs2.2.weight_v
|
| 433 |
-
vq_model.dec.resblocks.10.convs1.0.bias
|
| 434 |
-
vq_model.dec.resblocks.10.convs1.0.weight_g
|
| 435 |
-
vq_model.dec.resblocks.10.convs1.0.weight_v
|
| 436 |
-
vq_model.dec.resblocks.10.convs1.1.bias
|
| 437 |
-
vq_model.dec.resblocks.10.convs1.1.weight_g
|
| 438 |
-
vq_model.dec.resblocks.10.convs1.1.weight_v
|
| 439 |
-
vq_model.dec.resblocks.10.convs1.2.bias
|
| 440 |
-
vq_model.dec.resblocks.10.convs1.2.weight_g
|
| 441 |
-
vq_model.dec.resblocks.10.convs1.2.weight_v
|
| 442 |
-
vq_model.dec.resblocks.10.convs2.0.bias
|
| 443 |
-
vq_model.dec.resblocks.10.convs2.0.weight_g
|
| 444 |
-
vq_model.dec.resblocks.10.convs2.0.weight_v
|
| 445 |
-
vq_model.dec.resblocks.10.convs2.1.bias
|
| 446 |
-
vq_model.dec.resblocks.10.convs2.1.weight_g
|
| 447 |
-
vq_model.dec.resblocks.10.convs2.1.weight_v
|
| 448 |
-
vq_model.dec.resblocks.10.convs2.2.bias
|
| 449 |
-
vq_model.dec.resblocks.10.convs2.2.weight_g
|
| 450 |
-
vq_model.dec.resblocks.10.convs2.2.weight_v
|
| 451 |
-
vq_model.dec.resblocks.11.convs1.0.bias
|
| 452 |
-
vq_model.dec.resblocks.11.convs1.0.weight_g
|
| 453 |
-
vq_model.dec.resblocks.11.convs1.0.weight_v
|
| 454 |
-
vq_model.dec.resblocks.11.convs1.1.bias
|
| 455 |
-
vq_model.dec.resblocks.11.convs1.1.weight_g
|
| 456 |
-
vq_model.dec.resblocks.11.convs1.1.weight_v
|
| 457 |
-
vq_model.dec.resblocks.11.convs1.2.bias
|
| 458 |
-
vq_model.dec.resblocks.11.convs1.2.weight_g
|
| 459 |
-
vq_model.dec.resblocks.11.convs1.2.weight_v
|
| 460 |
-
vq_model.dec.resblocks.11.convs2.0.bias
|
| 461 |
-
vq_model.dec.resblocks.11.convs2.0.weight_g
|
| 462 |
-
vq_model.dec.resblocks.11.convs2.0.weight_v
|
| 463 |
-
vq_model.dec.resblocks.11.convs2.1.bias
|
| 464 |
-
vq_model.dec.resblocks.11.convs2.1.weight_g
|
| 465 |
-
vq_model.dec.resblocks.11.convs2.1.weight_v
|
| 466 |
-
vq_model.dec.resblocks.11.convs2.2.bias
|
| 467 |
-
vq_model.dec.resblocks.11.convs2.2.weight_g
|
| 468 |
-
vq_model.dec.resblocks.11.convs2.2.weight_v
|
| 469 |
-
vq_model.dec.resblocks.12.convs1.0.bias
|
| 470 |
-
vq_model.dec.resblocks.12.convs1.0.weight_g
|
| 471 |
-
vq_model.dec.resblocks.12.convs1.0.weight_v
|
| 472 |
-
vq_model.dec.resblocks.12.convs1.1.bias
|
| 473 |
-
vq_model.dec.resblocks.12.convs1.1.weight_g
|
| 474 |
-
vq_model.dec.resblocks.12.convs1.1.weight_v
|
| 475 |
-
vq_model.dec.resblocks.12.convs1.2.bias
|
| 476 |
-
vq_model.dec.resblocks.12.convs1.2.weight_g
|
| 477 |
-
vq_model.dec.resblocks.12.convs1.2.weight_v
|
| 478 |
-
vq_model.dec.resblocks.12.convs2.0.bias
|
| 479 |
-
vq_model.dec.resblocks.12.convs2.0.weight_g
|
| 480 |
-
vq_model.dec.resblocks.12.convs2.0.weight_v
|
| 481 |
-
vq_model.dec.resblocks.12.convs2.1.bias
|
| 482 |
-
vq_model.dec.resblocks.12.convs2.1.weight_g
|
| 483 |
-
vq_model.dec.resblocks.12.convs2.1.weight_v
|
| 484 |
-
vq_model.dec.resblocks.12.convs2.2.bias
|
| 485 |
-
vq_model.dec.resblocks.12.convs2.2.weight_g
|
| 486 |
-
vq_model.dec.resblocks.12.convs2.2.weight_v
|
| 487 |
-
vq_model.dec.resblocks.13.convs1.0.bias
|
| 488 |
-
vq_model.dec.resblocks.13.convs1.0.weight_g
|
| 489 |
-
vq_model.dec.resblocks.13.convs1.0.weight_v
|
| 490 |
-
vq_model.dec.resblocks.13.convs1.1.bias
|
| 491 |
-
vq_model.dec.resblocks.13.convs1.1.weight_g
|
| 492 |
-
vq_model.dec.resblocks.13.convs1.1.weight_v
|
| 493 |
-
vq_model.dec.resblocks.13.convs1.2.bias
|
| 494 |
-
vq_model.dec.resblocks.13.convs1.2.weight_g
|
| 495 |
-
vq_model.dec.resblocks.13.convs1.2.weight_v
|
| 496 |
-
vq_model.dec.resblocks.13.convs2.0.bias
|
| 497 |
-
vq_model.dec.resblocks.13.convs2.0.weight_g
|
| 498 |
-
vq_model.dec.resblocks.13.convs2.0.weight_v
|
| 499 |
-
vq_model.dec.resblocks.13.convs2.1.bias
|
| 500 |
-
vq_model.dec.resblocks.13.convs2.1.weight_g
|
| 501 |
-
vq_model.dec.resblocks.13.convs2.1.weight_v
|
| 502 |
-
vq_model.dec.resblocks.13.convs2.2.bias
|
| 503 |
-
vq_model.dec.resblocks.13.convs2.2.weight_g
|
| 504 |
-
vq_model.dec.resblocks.13.convs2.2.weight_v
|
| 505 |
-
vq_model.dec.resblocks.14.convs1.0.bias
|
| 506 |
-
vq_model.dec.resblocks.14.convs1.0.weight_g
|
| 507 |
-
vq_model.dec.resblocks.14.convs1.0.weight_v
|
| 508 |
-
vq_model.dec.resblocks.14.convs1.1.bias
|
| 509 |
-
vq_model.dec.resblocks.14.convs1.1.weight_g
|
| 510 |
-
vq_model.dec.resblocks.14.convs1.1.weight_v
|
| 511 |
-
vq_model.dec.resblocks.14.convs1.2.bias
|
| 512 |
-
vq_model.dec.resblocks.14.convs1.2.weight_g
|
| 513 |
-
vq_model.dec.resblocks.14.convs1.2.weight_v
|
| 514 |
-
vq_model.dec.resblocks.14.convs2.0.bias
|
| 515 |
-
vq_model.dec.resblocks.14.convs2.0.weight_g
|
| 516 |
-
vq_model.dec.resblocks.14.convs2.0.weight_v
|
| 517 |
-
vq_model.dec.resblocks.14.convs2.1.bias
|
| 518 |
-
vq_model.dec.resblocks.14.convs2.1.weight_g
|
| 519 |
-
vq_model.dec.resblocks.14.convs2.1.weight_v
|
| 520 |
-
vq_model.dec.resblocks.14.convs2.2.bias
|
| 521 |
-
vq_model.dec.resblocks.14.convs2.2.weight_g
|
| 522 |
-
vq_model.dec.resblocks.14.convs2.2.weight_v
|
| 523 |
-
vq_model.dec.conv_post.weight
|
| 524 |
-
vq_model.dec.cond.weight
|
| 525 |
-
vq_model.dec.cond.bias
|
| 526 |
-
vq_model.flow.flows.0.pre.weight
|
| 527 |
-
vq_model.flow.flows.0.pre.bias
|
| 528 |
-
vq_model.flow.flows.0.enc.in_layers.0.bias
|
| 529 |
-
vq_model.flow.flows.0.enc.in_layers.0.weight_g
|
| 530 |
-
vq_model.flow.flows.0.enc.in_layers.0.weight_v
|
| 531 |
-
vq_model.flow.flows.0.enc.in_layers.1.bias
|
| 532 |
-
vq_model.flow.flows.0.enc.in_layers.1.weight_g
|
| 533 |
-
vq_model.flow.flows.0.enc.in_layers.1.weight_v
|
| 534 |
-
vq_model.flow.flows.0.enc.in_layers.2.bias
|
| 535 |
-
vq_model.flow.flows.0.enc.in_layers.2.weight_g
|
| 536 |
-
vq_model.flow.flows.0.enc.in_layers.2.weight_v
|
| 537 |
-
vq_model.flow.flows.0.enc.in_layers.3.bias
|
| 538 |
-
vq_model.flow.flows.0.enc.in_layers.3.weight_g
|
| 539 |
-
vq_model.flow.flows.0.enc.in_layers.3.weight_v
|
| 540 |
-
vq_model.flow.flows.0.enc.res_skip_layers.0.bias
|
| 541 |
-
vq_model.flow.flows.0.enc.res_skip_layers.0.weight_g
|
| 542 |
-
vq_model.flow.flows.0.enc.res_skip_layers.0.weight_v
|
| 543 |
-
vq_model.flow.flows.0.enc.res_skip_layers.1.bias
|
| 544 |
-
vq_model.flow.flows.0.enc.res_skip_layers.1.weight_g
|
| 545 |
-
vq_model.flow.flows.0.enc.res_skip_layers.1.weight_v
|
| 546 |
-
vq_model.flow.flows.0.enc.res_skip_layers.2.bias
|
| 547 |
-
vq_model.flow.flows.0.enc.res_skip_layers.2.weight_g
|
| 548 |
-
vq_model.flow.flows.0.enc.res_skip_layers.2.weight_v
|
| 549 |
-
vq_model.flow.flows.0.enc.res_skip_layers.3.bias
|
| 550 |
-
vq_model.flow.flows.0.enc.res_skip_layers.3.weight_g
|
| 551 |
-
vq_model.flow.flows.0.enc.res_skip_layers.3.weight_v
|
| 552 |
-
vq_model.flow.flows.0.enc.cond_layer.bias
|
| 553 |
-
vq_model.flow.flows.0.enc.cond_layer.weight_g
|
| 554 |
-
vq_model.flow.flows.0.enc.cond_layer.weight_v
|
| 555 |
-
vq_model.flow.flows.0.post.weight
|
| 556 |
-
vq_model.flow.flows.0.post.bias
|
| 557 |
-
vq_model.flow.flows.2.pre.weight
|
| 558 |
-
vq_model.flow.flows.2.pre.bias
|
| 559 |
-
vq_model.flow.flows.2.enc.in_layers.0.bias
|
| 560 |
-
vq_model.flow.flows.2.enc.in_layers.0.weight_g
|
| 561 |
-
vq_model.flow.flows.2.enc.in_layers.0.weight_v
|
| 562 |
-
vq_model.flow.flows.2.enc.in_layers.1.bias
|
| 563 |
-
vq_model.flow.flows.2.enc.in_layers.1.weight_g
|
| 564 |
-
vq_model.flow.flows.2.enc.in_layers.1.weight_v
|
| 565 |
-
vq_model.flow.flows.2.enc.in_layers.2.bias
|
| 566 |
-
vq_model.flow.flows.2.enc.in_layers.2.weight_g
|
| 567 |
-
vq_model.flow.flows.2.enc.in_layers.2.weight_v
|
| 568 |
-
vq_model.flow.flows.2.enc.in_layers.3.bias
|
| 569 |
-
vq_model.flow.flows.2.enc.in_layers.3.weight_g
|
| 570 |
-
vq_model.flow.flows.2.enc.in_layers.3.weight_v
|
| 571 |
-
vq_model.flow.flows.2.enc.res_skip_layers.0.bias
|
| 572 |
-
vq_model.flow.flows.2.enc.res_skip_layers.0.weight_g
|
| 573 |
-
vq_model.flow.flows.2.enc.res_skip_layers.0.weight_v
|
| 574 |
-
vq_model.flow.flows.2.enc.res_skip_layers.1.bias
|
| 575 |
-
vq_model.flow.flows.2.enc.res_skip_layers.1.weight_g
|
| 576 |
-
vq_model.flow.flows.2.enc.res_skip_layers.1.weight_v
|
| 577 |
-
vq_model.flow.flows.2.enc.res_skip_layers.2.bias
|
| 578 |
-
vq_model.flow.flows.2.enc.res_skip_layers.2.weight_g
|
| 579 |
-
vq_model.flow.flows.2.enc.res_skip_layers.2.weight_v
|
| 580 |
-
vq_model.flow.flows.2.enc.res_skip_layers.3.bias
|
| 581 |
-
vq_model.flow.flows.2.enc.res_skip_layers.3.weight_g
|
| 582 |
-
vq_model.flow.flows.2.enc.res_skip_layers.3.weight_v
|
| 583 |
-
vq_model.flow.flows.2.enc.cond_layer.bias
|
| 584 |
-
vq_model.flow.flows.2.enc.cond_layer.weight_g
|
| 585 |
-
vq_model.flow.flows.2.enc.cond_layer.weight_v
|
| 586 |
-
vq_model.flow.flows.2.post.weight
|
| 587 |
-
vq_model.flow.flows.2.post.bias
|
| 588 |
-
vq_model.flow.flows.4.pre.weight
|
| 589 |
-
vq_model.flow.flows.4.pre.bias
|
| 590 |
-
vq_model.flow.flows.4.enc.in_layers.0.bias
|
| 591 |
-
vq_model.flow.flows.4.enc.in_layers.0.weight_g
|
| 592 |
-
vq_model.flow.flows.4.enc.in_layers.0.weight_v
|
| 593 |
-
vq_model.flow.flows.4.enc.in_layers.1.bias
|
| 594 |
-
vq_model.flow.flows.4.enc.in_layers.1.weight_g
|
| 595 |
-
vq_model.flow.flows.4.enc.in_layers.1.weight_v
|
| 596 |
-
vq_model.flow.flows.4.enc.in_layers.2.bias
|
| 597 |
-
vq_model.flow.flows.4.enc.in_layers.2.weight_g
|
| 598 |
-
vq_model.flow.flows.4.enc.in_layers.2.weight_v
|
| 599 |
-
vq_model.flow.flows.4.enc.in_layers.3.bias
|
| 600 |
-
vq_model.flow.flows.4.enc.in_layers.3.weight_g
|
| 601 |
-
vq_model.flow.flows.4.enc.in_layers.3.weight_v
|
| 602 |
-
vq_model.flow.flows.4.enc.res_skip_layers.0.bias
|
| 603 |
-
vq_model.flow.flows.4.enc.res_skip_layers.0.weight_g
|
| 604 |
-
vq_model.flow.flows.4.enc.res_skip_layers.0.weight_v
|
| 605 |
-
vq_model.flow.flows.4.enc.res_skip_layers.1.bias
|
| 606 |
-
vq_model.flow.flows.4.enc.res_skip_layers.1.weight_g
|
| 607 |
-
vq_model.flow.flows.4.enc.res_skip_layers.1.weight_v
|
| 608 |
-
vq_model.flow.flows.4.enc.res_skip_layers.2.bias
|
| 609 |
-
vq_model.flow.flows.4.enc.res_skip_layers.2.weight_g
|
| 610 |
-
vq_model.flow.flows.4.enc.res_skip_layers.2.weight_v
|
| 611 |
-
vq_model.flow.flows.4.enc.res_skip_layers.3.bias
|
| 612 |
-
vq_model.flow.flows.4.enc.res_skip_layers.3.weight_g
|
| 613 |
-
vq_model.flow.flows.4.enc.res_skip_layers.3.weight_v
|
| 614 |
-
vq_model.flow.flows.4.enc.cond_layer.bias
|
| 615 |
-
vq_model.flow.flows.4.enc.cond_layer.weight_g
|
| 616 |
-
vq_model.flow.flows.4.enc.cond_layer.weight_v
|
| 617 |
-
vq_model.flow.flows.4.post.weight
|
| 618 |
-
vq_model.flow.flows.4.post.bias
|
| 619 |
-
vq_model.flow.flows.6.pre.weight
|
| 620 |
-
vq_model.flow.flows.6.pre.bias
|
| 621 |
-
vq_model.flow.flows.6.enc.in_layers.0.bias
|
| 622 |
-
vq_model.flow.flows.6.enc.in_layers.0.weight_g
|
| 623 |
-
vq_model.flow.flows.6.enc.in_layers.0.weight_v
|
| 624 |
-
vq_model.flow.flows.6.enc.in_layers.1.bias
|
| 625 |
-
vq_model.flow.flows.6.enc.in_layers.1.weight_g
|
| 626 |
-
vq_model.flow.flows.6.enc.in_layers.1.weight_v
|
| 627 |
-
vq_model.flow.flows.6.enc.in_layers.2.bias
|
| 628 |
-
vq_model.flow.flows.6.enc.in_layers.2.weight_g
|
| 629 |
-
vq_model.flow.flows.6.enc.in_layers.2.weight_v
|
| 630 |
-
vq_model.flow.flows.6.enc.in_layers.3.bias
|
| 631 |
-
vq_model.flow.flows.6.enc.in_layers.3.weight_g
|
| 632 |
-
vq_model.flow.flows.6.enc.in_layers.3.weight_v
|
| 633 |
-
vq_model.flow.flows.6.enc.res_skip_layers.0.bias
|
| 634 |
-
vq_model.flow.flows.6.enc.res_skip_layers.0.weight_g
|
| 635 |
-
vq_model.flow.flows.6.enc.res_skip_layers.0.weight_v
|
| 636 |
-
vq_model.flow.flows.6.enc.res_skip_layers.1.bias
|
| 637 |
-
vq_model.flow.flows.6.enc.res_skip_layers.1.weight_g
|
| 638 |
-
vq_model.flow.flows.6.enc.res_skip_layers.1.weight_v
|
| 639 |
-
vq_model.flow.flows.6.enc.res_skip_layers.2.bias
|
| 640 |
-
vq_model.flow.flows.6.enc.res_skip_layers.2.weight_g
|
| 641 |
-
vq_model.flow.flows.6.enc.res_skip_layers.2.weight_v
|
| 642 |
-
vq_model.flow.flows.6.enc.res_skip_layers.3.bias
|
| 643 |
-
vq_model.flow.flows.6.enc.res_skip_layers.3.weight_g
|
| 644 |
-
vq_model.flow.flows.6.enc.res_skip_layers.3.weight_v
|
| 645 |
-
vq_model.flow.flows.6.enc.cond_layer.bias
|
| 646 |
-
vq_model.flow.flows.6.enc.cond_layer.weight_g
|
| 647 |
-
vq_model.flow.flows.6.enc.cond_layer.weight_v
|
| 648 |
-
vq_model.flow.flows.6.post.weight
|
| 649 |
-
vq_model.flow.flows.6.post.bias
|
| 650 |
-
vq_model.quantizer.vq.layers.0._codebook.embed
|
|
|
|
| 1 |
+
vq_model.enc_p.ssl_proj.weight
|
| 2 |
+
vq_model.enc_p.ssl_proj.bias
|
| 3 |
+
vq_model.enc_p.encoder_ssl.attn_layers.0.emb_rel_k
|
| 4 |
+
vq_model.enc_p.encoder_ssl.attn_layers.0.emb_rel_v
|
| 5 |
+
vq_model.enc_p.encoder_ssl.attn_layers.0.conv_q.weight
|
| 6 |
+
vq_model.enc_p.encoder_ssl.attn_layers.0.conv_q.bias
|
| 7 |
+
vq_model.enc_p.encoder_ssl.attn_layers.0.conv_k.weight
|
| 8 |
+
vq_model.enc_p.encoder_ssl.attn_layers.0.conv_k.bias
|
| 9 |
+
vq_model.enc_p.encoder_ssl.attn_layers.0.conv_v.weight
|
| 10 |
+
vq_model.enc_p.encoder_ssl.attn_layers.0.conv_v.bias
|
| 11 |
+
vq_model.enc_p.encoder_ssl.attn_layers.0.conv_o.weight
|
| 12 |
+
vq_model.enc_p.encoder_ssl.attn_layers.0.conv_o.bias
|
| 13 |
+
vq_model.enc_p.encoder_ssl.attn_layers.1.emb_rel_k
|
| 14 |
+
vq_model.enc_p.encoder_ssl.attn_layers.1.emb_rel_v
|
| 15 |
+
vq_model.enc_p.encoder_ssl.attn_layers.1.conv_q.weight
|
| 16 |
+
vq_model.enc_p.encoder_ssl.attn_layers.1.conv_q.bias
|
| 17 |
+
vq_model.enc_p.encoder_ssl.attn_layers.1.conv_k.weight
|
| 18 |
+
vq_model.enc_p.encoder_ssl.attn_layers.1.conv_k.bias
|
| 19 |
+
vq_model.enc_p.encoder_ssl.attn_layers.1.conv_v.weight
|
| 20 |
+
vq_model.enc_p.encoder_ssl.attn_layers.1.conv_v.bias
|
| 21 |
+
vq_model.enc_p.encoder_ssl.attn_layers.1.conv_o.weight
|
| 22 |
+
vq_model.enc_p.encoder_ssl.attn_layers.1.conv_o.bias
|
| 23 |
+
vq_model.enc_p.encoder_ssl.attn_layers.2.emb_rel_k
|
| 24 |
+
vq_model.enc_p.encoder_ssl.attn_layers.2.emb_rel_v
|
| 25 |
+
vq_model.enc_p.encoder_ssl.attn_layers.2.conv_q.weight
|
| 26 |
+
vq_model.enc_p.encoder_ssl.attn_layers.2.conv_q.bias
|
| 27 |
+
vq_model.enc_p.encoder_ssl.attn_layers.2.conv_k.weight
|
| 28 |
+
vq_model.enc_p.encoder_ssl.attn_layers.2.conv_k.bias
|
| 29 |
+
vq_model.enc_p.encoder_ssl.attn_layers.2.conv_v.weight
|
| 30 |
+
vq_model.enc_p.encoder_ssl.attn_layers.2.conv_v.bias
|
| 31 |
+
vq_model.enc_p.encoder_ssl.attn_layers.2.conv_o.weight
|
| 32 |
+
vq_model.enc_p.encoder_ssl.attn_layers.2.conv_o.bias
|
| 33 |
+
vq_model.enc_p.encoder_ssl.norm_layers_1.0.gamma
|
| 34 |
+
vq_model.enc_p.encoder_ssl.norm_layers_1.0.beta
|
| 35 |
+
vq_model.enc_p.encoder_ssl.norm_layers_1.1.gamma
|
| 36 |
+
vq_model.enc_p.encoder_ssl.norm_layers_1.1.beta
|
| 37 |
+
vq_model.enc_p.encoder_ssl.norm_layers_1.2.gamma
|
| 38 |
+
vq_model.enc_p.encoder_ssl.norm_layers_1.2.beta
|
| 39 |
+
vq_model.enc_p.encoder_ssl.ffn_layers.0.conv_1.weight
|
| 40 |
+
vq_model.enc_p.encoder_ssl.ffn_layers.0.conv_1.bias
|
| 41 |
+
vq_model.enc_p.encoder_ssl.ffn_layers.0.conv_2.weight
|
| 42 |
+
vq_model.enc_p.encoder_ssl.ffn_layers.0.conv_2.bias
|
| 43 |
+
vq_model.enc_p.encoder_ssl.ffn_layers.1.conv_1.weight
|
| 44 |
+
vq_model.enc_p.encoder_ssl.ffn_layers.1.conv_1.bias
|
| 45 |
+
vq_model.enc_p.encoder_ssl.ffn_layers.1.conv_2.weight
|
| 46 |
+
vq_model.enc_p.encoder_ssl.ffn_layers.1.conv_2.bias
|
| 47 |
+
vq_model.enc_p.encoder_ssl.ffn_layers.2.conv_1.weight
|
| 48 |
+
vq_model.enc_p.encoder_ssl.ffn_layers.2.conv_1.bias
|
| 49 |
+
vq_model.enc_p.encoder_ssl.ffn_layers.2.conv_2.weight
|
| 50 |
+
vq_model.enc_p.encoder_ssl.ffn_layers.2.conv_2.bias
|
| 51 |
+
vq_model.enc_p.encoder_ssl.norm_layers_2.0.gamma
|
| 52 |
+
vq_model.enc_p.encoder_ssl.norm_layers_2.0.beta
|
| 53 |
+
vq_model.enc_p.encoder_ssl.norm_layers_2.1.gamma
|
| 54 |
+
vq_model.enc_p.encoder_ssl.norm_layers_2.1.beta
|
| 55 |
+
vq_model.enc_p.encoder_ssl.norm_layers_2.2.gamma
|
| 56 |
+
vq_model.enc_p.encoder_ssl.norm_layers_2.2.beta
|
| 57 |
+
vq_model.enc_p.encoder_text.attn_layers.0.emb_rel_k
|
| 58 |
+
vq_model.enc_p.encoder_text.attn_layers.0.emb_rel_v
|
| 59 |
+
vq_model.enc_p.encoder_text.attn_layers.0.conv_q.weight
|
| 60 |
+
vq_model.enc_p.encoder_text.attn_layers.0.conv_q.bias
|
| 61 |
+
vq_model.enc_p.encoder_text.attn_layers.0.conv_k.weight
|
| 62 |
+
vq_model.enc_p.encoder_text.attn_layers.0.conv_k.bias
|
| 63 |
+
vq_model.enc_p.encoder_text.attn_layers.0.conv_v.weight
|
| 64 |
+
vq_model.enc_p.encoder_text.attn_layers.0.conv_v.bias
|
| 65 |
+
vq_model.enc_p.encoder_text.attn_layers.0.conv_o.weight
|
| 66 |
+
vq_model.enc_p.encoder_text.attn_layers.0.conv_o.bias
|
| 67 |
+
vq_model.enc_p.encoder_text.attn_layers.1.emb_rel_k
|
| 68 |
+
vq_model.enc_p.encoder_text.attn_layers.1.emb_rel_v
|
| 69 |
+
vq_model.enc_p.encoder_text.attn_layers.1.conv_q.weight
|
| 70 |
+
vq_model.enc_p.encoder_text.attn_layers.1.conv_q.bias
|
| 71 |
+
vq_model.enc_p.encoder_text.attn_layers.1.conv_k.weight
|
| 72 |
+
vq_model.enc_p.encoder_text.attn_layers.1.conv_k.bias
|
| 73 |
+
vq_model.enc_p.encoder_text.attn_layers.1.conv_v.weight
|
| 74 |
+
vq_model.enc_p.encoder_text.attn_layers.1.conv_v.bias
|
| 75 |
+
vq_model.enc_p.encoder_text.attn_layers.1.conv_o.weight
|
| 76 |
+
vq_model.enc_p.encoder_text.attn_layers.1.conv_o.bias
|
| 77 |
+
vq_model.enc_p.encoder_text.attn_layers.2.emb_rel_k
|
| 78 |
+
vq_model.enc_p.encoder_text.attn_layers.2.emb_rel_v
|
| 79 |
+
vq_model.enc_p.encoder_text.attn_layers.2.conv_q.weight
|
| 80 |
+
vq_model.enc_p.encoder_text.attn_layers.2.conv_q.bias
|
| 81 |
+
vq_model.enc_p.encoder_text.attn_layers.2.conv_k.weight
|
| 82 |
+
vq_model.enc_p.encoder_text.attn_layers.2.conv_k.bias
|
| 83 |
+
vq_model.enc_p.encoder_text.attn_layers.2.conv_v.weight
|
| 84 |
+
vq_model.enc_p.encoder_text.attn_layers.2.conv_v.bias
|
| 85 |
+
vq_model.enc_p.encoder_text.attn_layers.2.conv_o.weight
|
| 86 |
+
vq_model.enc_p.encoder_text.attn_layers.2.conv_o.bias
|
| 87 |
+
vq_model.enc_p.encoder_text.attn_layers.3.emb_rel_k
|
| 88 |
+
vq_model.enc_p.encoder_text.attn_layers.3.emb_rel_v
|
| 89 |
+
vq_model.enc_p.encoder_text.attn_layers.3.conv_q.weight
|
| 90 |
+
vq_model.enc_p.encoder_text.attn_layers.3.conv_q.bias
|
| 91 |
+
vq_model.enc_p.encoder_text.attn_layers.3.conv_k.weight
|
| 92 |
+
vq_model.enc_p.encoder_text.attn_layers.3.conv_k.bias
|
| 93 |
+
vq_model.enc_p.encoder_text.attn_layers.3.conv_v.weight
|
| 94 |
+
vq_model.enc_p.encoder_text.attn_layers.3.conv_v.bias
|
| 95 |
+
vq_model.enc_p.encoder_text.attn_layers.3.conv_o.weight
|
| 96 |
+
vq_model.enc_p.encoder_text.attn_layers.3.conv_o.bias
|
| 97 |
+
vq_model.enc_p.encoder_text.attn_layers.4.emb_rel_k
|
| 98 |
+
vq_model.enc_p.encoder_text.attn_layers.4.emb_rel_v
|
| 99 |
+
vq_model.enc_p.encoder_text.attn_layers.4.conv_q.weight
|
| 100 |
+
vq_model.enc_p.encoder_text.attn_layers.4.conv_q.bias
|
| 101 |
+
vq_model.enc_p.encoder_text.attn_layers.4.conv_k.weight
|
| 102 |
+
vq_model.enc_p.encoder_text.attn_layers.4.conv_k.bias
|
| 103 |
+
vq_model.enc_p.encoder_text.attn_layers.4.conv_v.weight
|
| 104 |
+
vq_model.enc_p.encoder_text.attn_layers.4.conv_v.bias
|
| 105 |
+
vq_model.enc_p.encoder_text.attn_layers.4.conv_o.weight
|
| 106 |
+
vq_model.enc_p.encoder_text.attn_layers.4.conv_o.bias
|
| 107 |
+
vq_model.enc_p.encoder_text.attn_layers.5.emb_rel_k
|
| 108 |
+
vq_model.enc_p.encoder_text.attn_layers.5.emb_rel_v
|
| 109 |
+
vq_model.enc_p.encoder_text.attn_layers.5.conv_q.weight
|
| 110 |
+
vq_model.enc_p.encoder_text.attn_layers.5.conv_q.bias
|
| 111 |
+
vq_model.enc_p.encoder_text.attn_layers.5.conv_k.weight
|
| 112 |
+
vq_model.enc_p.encoder_text.attn_layers.5.conv_k.bias
|
| 113 |
+
vq_model.enc_p.encoder_text.attn_layers.5.conv_v.weight
|
| 114 |
+
vq_model.enc_p.encoder_text.attn_layers.5.conv_v.bias
|
| 115 |
+
vq_model.enc_p.encoder_text.attn_layers.5.conv_o.weight
|
| 116 |
+
vq_model.enc_p.encoder_text.attn_layers.5.conv_o.bias
|
| 117 |
+
vq_model.enc_p.encoder_text.norm_layers_1.0.gamma
|
| 118 |
+
vq_model.enc_p.encoder_text.norm_layers_1.0.beta
|
| 119 |
+
vq_model.enc_p.encoder_text.norm_layers_1.1.gamma
|
| 120 |
+
vq_model.enc_p.encoder_text.norm_layers_1.1.beta
|
| 121 |
+
vq_model.enc_p.encoder_text.norm_layers_1.2.gamma
|
| 122 |
+
vq_model.enc_p.encoder_text.norm_layers_1.2.beta
|
| 123 |
+
vq_model.enc_p.encoder_text.norm_layers_1.3.gamma
|
| 124 |
+
vq_model.enc_p.encoder_text.norm_layers_1.3.beta
|
| 125 |
+
vq_model.enc_p.encoder_text.norm_layers_1.4.gamma
|
| 126 |
+
vq_model.enc_p.encoder_text.norm_layers_1.4.beta
|
| 127 |
+
vq_model.enc_p.encoder_text.norm_layers_1.5.gamma
|
| 128 |
+
vq_model.enc_p.encoder_text.norm_layers_1.5.beta
|
| 129 |
+
vq_model.enc_p.encoder_text.ffn_layers.0.conv_1.weight
|
| 130 |
+
vq_model.enc_p.encoder_text.ffn_layers.0.conv_1.bias
|
| 131 |
+
vq_model.enc_p.encoder_text.ffn_layers.0.conv_2.weight
|
| 132 |
+
vq_model.enc_p.encoder_text.ffn_layers.0.conv_2.bias
|
| 133 |
+
vq_model.enc_p.encoder_text.ffn_layers.1.conv_1.weight
|
| 134 |
+
vq_model.enc_p.encoder_text.ffn_layers.1.conv_1.bias
|
| 135 |
+
vq_model.enc_p.encoder_text.ffn_layers.1.conv_2.weight
|
| 136 |
+
vq_model.enc_p.encoder_text.ffn_layers.1.conv_2.bias
|
| 137 |
+
vq_model.enc_p.encoder_text.ffn_layers.2.conv_1.weight
|
| 138 |
+
vq_model.enc_p.encoder_text.ffn_layers.2.conv_1.bias
|
| 139 |
+
vq_model.enc_p.encoder_text.ffn_layers.2.conv_2.weight
|
| 140 |
+
vq_model.enc_p.encoder_text.ffn_layers.2.conv_2.bias
|
| 141 |
+
vq_model.enc_p.encoder_text.ffn_layers.3.conv_1.weight
|
| 142 |
+
vq_model.enc_p.encoder_text.ffn_layers.3.conv_1.bias
|
| 143 |
+
vq_model.enc_p.encoder_text.ffn_layers.3.conv_2.weight
|
| 144 |
+
vq_model.enc_p.encoder_text.ffn_layers.3.conv_2.bias
|
| 145 |
+
vq_model.enc_p.encoder_text.ffn_layers.4.conv_1.weight
|
| 146 |
+
vq_model.enc_p.encoder_text.ffn_layers.4.conv_1.bias
|
| 147 |
+
vq_model.enc_p.encoder_text.ffn_layers.4.conv_2.weight
|
| 148 |
+
vq_model.enc_p.encoder_text.ffn_layers.4.conv_2.bias
|
| 149 |
+
vq_model.enc_p.encoder_text.ffn_layers.5.conv_1.weight
|
| 150 |
+
vq_model.enc_p.encoder_text.ffn_layers.5.conv_1.bias
|
| 151 |
+
vq_model.enc_p.encoder_text.ffn_layers.5.conv_2.weight
|
| 152 |
+
vq_model.enc_p.encoder_text.ffn_layers.5.conv_2.bias
|
| 153 |
+
vq_model.enc_p.encoder_text.norm_layers_2.0.gamma
|
| 154 |
+
vq_model.enc_p.encoder_text.norm_layers_2.0.beta
|
| 155 |
+
vq_model.enc_p.encoder_text.norm_layers_2.1.gamma
|
| 156 |
+
vq_model.enc_p.encoder_text.norm_layers_2.1.beta
|
| 157 |
+
vq_model.enc_p.encoder_text.norm_layers_2.2.gamma
|
| 158 |
+
vq_model.enc_p.encoder_text.norm_layers_2.2.beta
|
| 159 |
+
vq_model.enc_p.encoder_text.norm_layers_2.3.gamma
|
| 160 |
+
vq_model.enc_p.encoder_text.norm_layers_2.3.beta
|
| 161 |
+
vq_model.enc_p.encoder_text.norm_layers_2.4.gamma
|
| 162 |
+
vq_model.enc_p.encoder_text.norm_layers_2.4.beta
|
| 163 |
+
vq_model.enc_p.encoder_text.norm_layers_2.5.gamma
|
| 164 |
+
vq_model.enc_p.encoder_text.norm_layers_2.5.beta
|
| 165 |
+
vq_model.enc_p.text_embedding.weight
|
| 166 |
+
vq_model.enc_p.mrte.cross_attention.conv_q.weight
|
| 167 |
+
vq_model.enc_p.mrte.cross_attention.conv_q.bias
|
| 168 |
+
vq_model.enc_p.mrte.cross_attention.conv_k.weight
|
| 169 |
+
vq_model.enc_p.mrte.cross_attention.conv_k.bias
|
| 170 |
+
vq_model.enc_p.mrte.cross_attention.conv_v.weight
|
| 171 |
+
vq_model.enc_p.mrte.cross_attention.conv_v.bias
|
| 172 |
+
vq_model.enc_p.mrte.cross_attention.conv_o.weight
|
| 173 |
+
vq_model.enc_p.mrte.cross_attention.conv_o.bias
|
| 174 |
+
vq_model.enc_p.mrte.c_pre.weight
|
| 175 |
+
vq_model.enc_p.mrte.c_pre.bias
|
| 176 |
+
vq_model.enc_p.mrte.text_pre.weight
|
| 177 |
+
vq_model.enc_p.mrte.text_pre.bias
|
| 178 |
+
vq_model.enc_p.mrte.c_post.weight
|
| 179 |
+
vq_model.enc_p.mrte.c_post.bias
|
| 180 |
+
vq_model.enc_p.encoder2.attn_layers.0.emb_rel_k
|
| 181 |
+
vq_model.enc_p.encoder2.attn_layers.0.emb_rel_v
|
| 182 |
+
vq_model.enc_p.encoder2.attn_layers.0.conv_q.weight
|
| 183 |
+
vq_model.enc_p.encoder2.attn_layers.0.conv_q.bias
|
| 184 |
+
vq_model.enc_p.encoder2.attn_layers.0.conv_k.weight
|
| 185 |
+
vq_model.enc_p.encoder2.attn_layers.0.conv_k.bias
|
| 186 |
+
vq_model.enc_p.encoder2.attn_layers.0.conv_v.weight
|
| 187 |
+
vq_model.enc_p.encoder2.attn_layers.0.conv_v.bias
|
| 188 |
+
vq_model.enc_p.encoder2.attn_layers.0.conv_o.weight
|
| 189 |
+
vq_model.enc_p.encoder2.attn_layers.0.conv_o.bias
|
| 190 |
+
vq_model.enc_p.encoder2.attn_layers.1.emb_rel_k
|
| 191 |
+
vq_model.enc_p.encoder2.attn_layers.1.emb_rel_v
|
| 192 |
+
vq_model.enc_p.encoder2.attn_layers.1.conv_q.weight
|
| 193 |
+
vq_model.enc_p.encoder2.attn_layers.1.conv_q.bias
|
| 194 |
+
vq_model.enc_p.encoder2.attn_layers.1.conv_k.weight
|
| 195 |
+
vq_model.enc_p.encoder2.attn_layers.1.conv_k.bias
|
| 196 |
+
vq_model.enc_p.encoder2.attn_layers.1.conv_v.weight
|
| 197 |
+
vq_model.enc_p.encoder2.attn_layers.1.conv_v.bias
|
| 198 |
+
vq_model.enc_p.encoder2.attn_layers.1.conv_o.weight
|
| 199 |
+
vq_model.enc_p.encoder2.attn_layers.1.conv_o.bias
|
| 200 |
+
vq_model.enc_p.encoder2.attn_layers.2.emb_rel_k
|
| 201 |
+
vq_model.enc_p.encoder2.attn_layers.2.emb_rel_v
|
| 202 |
+
vq_model.enc_p.encoder2.attn_layers.2.conv_q.weight
|
| 203 |
+
vq_model.enc_p.encoder2.attn_layers.2.conv_q.bias
|
| 204 |
+
vq_model.enc_p.encoder2.attn_layers.2.conv_k.weight
|
| 205 |
+
vq_model.enc_p.encoder2.attn_layers.2.conv_k.bias
|
| 206 |
+
vq_model.enc_p.encoder2.attn_layers.2.conv_v.weight
|
| 207 |
+
vq_model.enc_p.encoder2.attn_layers.2.conv_v.bias
|
| 208 |
+
vq_model.enc_p.encoder2.attn_layers.2.conv_o.weight
|
| 209 |
+
vq_model.enc_p.encoder2.attn_layers.2.conv_o.bias
|
| 210 |
+
vq_model.enc_p.encoder2.norm_layers_1.0.gamma
|
| 211 |
+
vq_model.enc_p.encoder2.norm_layers_1.0.beta
|
| 212 |
+
vq_model.enc_p.encoder2.norm_layers_1.1.gamma
|
| 213 |
+
vq_model.enc_p.encoder2.norm_layers_1.1.beta
|
| 214 |
+
vq_model.enc_p.encoder2.norm_layers_1.2.gamma
|
| 215 |
+
vq_model.enc_p.encoder2.norm_layers_1.2.beta
|
| 216 |
+
vq_model.enc_p.encoder2.ffn_layers.0.conv_1.weight
|
| 217 |
+
vq_model.enc_p.encoder2.ffn_layers.0.conv_1.bias
|
| 218 |
+
vq_model.enc_p.encoder2.ffn_layers.0.conv_2.weight
|
| 219 |
+
vq_model.enc_p.encoder2.ffn_layers.0.conv_2.bias
|
| 220 |
+
vq_model.enc_p.encoder2.ffn_layers.1.conv_1.weight
|
| 221 |
+
vq_model.enc_p.encoder2.ffn_layers.1.conv_1.bias
|
| 222 |
+
vq_model.enc_p.encoder2.ffn_layers.1.conv_2.weight
|
| 223 |
+
vq_model.enc_p.encoder2.ffn_layers.1.conv_2.bias
|
| 224 |
+
vq_model.enc_p.encoder2.ffn_layers.2.conv_1.weight
|
| 225 |
+
vq_model.enc_p.encoder2.ffn_layers.2.conv_1.bias
|
| 226 |
+
vq_model.enc_p.encoder2.ffn_layers.2.conv_2.weight
|
| 227 |
+
vq_model.enc_p.encoder2.ffn_layers.2.conv_2.bias
|
| 228 |
+
vq_model.enc_p.encoder2.norm_layers_2.0.gamma
|
| 229 |
+
vq_model.enc_p.encoder2.norm_layers_2.0.beta
|
| 230 |
+
vq_model.enc_p.encoder2.norm_layers_2.1.gamma
|
| 231 |
+
vq_model.enc_p.encoder2.norm_layers_2.1.beta
|
| 232 |
+
vq_model.enc_p.encoder2.norm_layers_2.2.gamma
|
| 233 |
+
vq_model.enc_p.encoder2.norm_layers_2.2.beta
|
| 234 |
+
vq_model.enc_p.proj.weight
|
| 235 |
+
vq_model.enc_p.proj.bias
|
| 236 |
+
vq_model.dec.conv_pre.weight
|
| 237 |
+
vq_model.dec.conv_pre.bias
|
| 238 |
+
vq_model.dec.ups.0.bias
|
| 239 |
+
vq_model.dec.ups.0.weight_g
|
| 240 |
+
vq_model.dec.ups.0.weight_v
|
| 241 |
+
vq_model.dec.ups.1.bias
|
| 242 |
+
vq_model.dec.ups.1.weight_g
|
| 243 |
+
vq_model.dec.ups.1.weight_v
|
| 244 |
+
vq_model.dec.ups.2.bias
|
| 245 |
+
vq_model.dec.ups.2.weight_g
|
| 246 |
+
vq_model.dec.ups.2.weight_v
|
| 247 |
+
vq_model.dec.ups.3.bias
|
| 248 |
+
vq_model.dec.ups.3.weight_g
|
| 249 |
+
vq_model.dec.ups.3.weight_v
|
| 250 |
+
vq_model.dec.ups.4.bias
|
| 251 |
+
vq_model.dec.ups.4.weight_g
|
| 252 |
+
vq_model.dec.ups.4.weight_v
|
| 253 |
+
vq_model.dec.resblocks.0.convs1.0.bias
|
| 254 |
+
vq_model.dec.resblocks.0.convs1.0.weight_g
|
| 255 |
+
vq_model.dec.resblocks.0.convs1.0.weight_v
|
| 256 |
+
vq_model.dec.resblocks.0.convs1.1.bias
|
| 257 |
+
vq_model.dec.resblocks.0.convs1.1.weight_g
|
| 258 |
+
vq_model.dec.resblocks.0.convs1.1.weight_v
|
| 259 |
+
vq_model.dec.resblocks.0.convs1.2.bias
|
| 260 |
+
vq_model.dec.resblocks.0.convs1.2.weight_g
|
| 261 |
+
vq_model.dec.resblocks.0.convs1.2.weight_v
|
| 262 |
+
vq_model.dec.resblocks.0.convs2.0.bias
|
| 263 |
+
vq_model.dec.resblocks.0.convs2.0.weight_g
|
| 264 |
+
vq_model.dec.resblocks.0.convs2.0.weight_v
|
| 265 |
+
vq_model.dec.resblocks.0.convs2.1.bias
|
| 266 |
+
vq_model.dec.resblocks.0.convs2.1.weight_g
|
| 267 |
+
vq_model.dec.resblocks.0.convs2.1.weight_v
|
| 268 |
+
vq_model.dec.resblocks.0.convs2.2.bias
|
| 269 |
+
vq_model.dec.resblocks.0.convs2.2.weight_g
|
| 270 |
+
vq_model.dec.resblocks.0.convs2.2.weight_v
|
| 271 |
+
vq_model.dec.resblocks.1.convs1.0.bias
|
| 272 |
+
vq_model.dec.resblocks.1.convs1.0.weight_g
|
| 273 |
+
vq_model.dec.resblocks.1.convs1.0.weight_v
|
| 274 |
+
vq_model.dec.resblocks.1.convs1.1.bias
|
| 275 |
+
vq_model.dec.resblocks.1.convs1.1.weight_g
|
| 276 |
+
vq_model.dec.resblocks.1.convs1.1.weight_v
|
| 277 |
+
vq_model.dec.resblocks.1.convs1.2.bias
|
| 278 |
+
vq_model.dec.resblocks.1.convs1.2.weight_g
|
| 279 |
+
vq_model.dec.resblocks.1.convs1.2.weight_v
|
| 280 |
+
vq_model.dec.resblocks.1.convs2.0.bias
|
| 281 |
+
vq_model.dec.resblocks.1.convs2.0.weight_g
|
| 282 |
+
vq_model.dec.resblocks.1.convs2.0.weight_v
|
| 283 |
+
vq_model.dec.resblocks.1.convs2.1.bias
|
| 284 |
+
vq_model.dec.resblocks.1.convs2.1.weight_g
|
| 285 |
+
vq_model.dec.resblocks.1.convs2.1.weight_v
|
| 286 |
+
vq_model.dec.resblocks.1.convs2.2.bias
|
| 287 |
+
vq_model.dec.resblocks.1.convs2.2.weight_g
|
| 288 |
+
vq_model.dec.resblocks.1.convs2.2.weight_v
|
| 289 |
+
vq_model.dec.resblocks.2.convs1.0.bias
|
| 290 |
+
vq_model.dec.resblocks.2.convs1.0.weight_g
|
| 291 |
+
vq_model.dec.resblocks.2.convs1.0.weight_v
|
| 292 |
+
vq_model.dec.resblocks.2.convs1.1.bias
|
| 293 |
+
vq_model.dec.resblocks.2.convs1.1.weight_g
|
| 294 |
+
vq_model.dec.resblocks.2.convs1.1.weight_v
|
| 295 |
+
vq_model.dec.resblocks.2.convs1.2.bias
|
| 296 |
+
vq_model.dec.resblocks.2.convs1.2.weight_g
|
| 297 |
+
vq_model.dec.resblocks.2.convs1.2.weight_v
|
| 298 |
+
vq_model.dec.resblocks.2.convs2.0.bias
|
| 299 |
+
vq_model.dec.resblocks.2.convs2.0.weight_g
|
| 300 |
+
vq_model.dec.resblocks.2.convs2.0.weight_v
|
| 301 |
+
vq_model.dec.resblocks.2.convs2.1.bias
|
| 302 |
+
vq_model.dec.resblocks.2.convs2.1.weight_g
|
| 303 |
+
vq_model.dec.resblocks.2.convs2.1.weight_v
|
| 304 |
+
vq_model.dec.resblocks.2.convs2.2.bias
|
| 305 |
+
vq_model.dec.resblocks.2.convs2.2.weight_g
|
| 306 |
+
vq_model.dec.resblocks.2.convs2.2.weight_v
|
| 307 |
+
vq_model.dec.resblocks.3.convs1.0.bias
|
| 308 |
+
vq_model.dec.resblocks.3.convs1.0.weight_g
|
| 309 |
+
vq_model.dec.resblocks.3.convs1.0.weight_v
|
| 310 |
+
vq_model.dec.resblocks.3.convs1.1.bias
|
| 311 |
+
vq_model.dec.resblocks.3.convs1.1.weight_g
|
| 312 |
+
vq_model.dec.resblocks.3.convs1.1.weight_v
|
| 313 |
+
vq_model.dec.resblocks.3.convs1.2.bias
|
| 314 |
+
vq_model.dec.resblocks.3.convs1.2.weight_g
|
| 315 |
+
vq_model.dec.resblocks.3.convs1.2.weight_v
|
| 316 |
+
vq_model.dec.resblocks.3.convs2.0.bias
|
| 317 |
+
vq_model.dec.resblocks.3.convs2.0.weight_g
|
| 318 |
+
vq_model.dec.resblocks.3.convs2.0.weight_v
|
| 319 |
+
vq_model.dec.resblocks.3.convs2.1.bias
|
| 320 |
+
vq_model.dec.resblocks.3.convs2.1.weight_g
|
| 321 |
+
vq_model.dec.resblocks.3.convs2.1.weight_v
|
| 322 |
+
vq_model.dec.resblocks.3.convs2.2.bias
|
| 323 |
+
vq_model.dec.resblocks.3.convs2.2.weight_g
|
| 324 |
+
vq_model.dec.resblocks.3.convs2.2.weight_v
|
| 325 |
+
vq_model.dec.resblocks.4.convs1.0.bias
|
| 326 |
+
vq_model.dec.resblocks.4.convs1.0.weight_g
|
| 327 |
+
vq_model.dec.resblocks.4.convs1.0.weight_v
|
| 328 |
+
vq_model.dec.resblocks.4.convs1.1.bias
|
| 329 |
+
vq_model.dec.resblocks.4.convs1.1.weight_g
|
| 330 |
+
vq_model.dec.resblocks.4.convs1.1.weight_v
|
| 331 |
+
vq_model.dec.resblocks.4.convs1.2.bias
|
| 332 |
+
vq_model.dec.resblocks.4.convs1.2.weight_g
|
| 333 |
+
vq_model.dec.resblocks.4.convs1.2.weight_v
|
| 334 |
+
vq_model.dec.resblocks.4.convs2.0.bias
|
| 335 |
+
vq_model.dec.resblocks.4.convs2.0.weight_g
|
| 336 |
+
vq_model.dec.resblocks.4.convs2.0.weight_v
|
| 337 |
+
vq_model.dec.resblocks.4.convs2.1.bias
|
| 338 |
+
vq_model.dec.resblocks.4.convs2.1.weight_g
|
| 339 |
+
vq_model.dec.resblocks.4.convs2.1.weight_v
|
| 340 |
+
vq_model.dec.resblocks.4.convs2.2.bias
|
| 341 |
+
vq_model.dec.resblocks.4.convs2.2.weight_g
|
| 342 |
+
vq_model.dec.resblocks.4.convs2.2.weight_v
|
| 343 |
+
vq_model.dec.resblocks.5.convs1.0.bias
|
| 344 |
+
vq_model.dec.resblocks.5.convs1.0.weight_g
|
| 345 |
+
vq_model.dec.resblocks.5.convs1.0.weight_v
|
| 346 |
+
vq_model.dec.resblocks.5.convs1.1.bias
|
| 347 |
+
vq_model.dec.resblocks.5.convs1.1.weight_g
|
| 348 |
+
vq_model.dec.resblocks.5.convs1.1.weight_v
|
| 349 |
+
vq_model.dec.resblocks.5.convs1.2.bias
|
| 350 |
+
vq_model.dec.resblocks.5.convs1.2.weight_g
|
| 351 |
+
vq_model.dec.resblocks.5.convs1.2.weight_v
|
| 352 |
+
vq_model.dec.resblocks.5.convs2.0.bias
|
| 353 |
+
vq_model.dec.resblocks.5.convs2.0.weight_g
|
| 354 |
+
vq_model.dec.resblocks.5.convs2.0.weight_v
|
| 355 |
+
vq_model.dec.resblocks.5.convs2.1.bias
|
| 356 |
+
vq_model.dec.resblocks.5.convs2.1.weight_g
|
| 357 |
+
vq_model.dec.resblocks.5.convs2.1.weight_v
|
| 358 |
+
vq_model.dec.resblocks.5.convs2.2.bias
|
| 359 |
+
vq_model.dec.resblocks.5.convs2.2.weight_g
|
| 360 |
+
vq_model.dec.resblocks.5.convs2.2.weight_v
|
| 361 |
+
vq_model.dec.resblocks.6.convs1.0.bias
|
| 362 |
+
vq_model.dec.resblocks.6.convs1.0.weight_g
|
| 363 |
+
vq_model.dec.resblocks.6.convs1.0.weight_v
|
| 364 |
+
vq_model.dec.resblocks.6.convs1.1.bias
|
| 365 |
+
vq_model.dec.resblocks.6.convs1.1.weight_g
|
| 366 |
+
vq_model.dec.resblocks.6.convs1.1.weight_v
|
| 367 |
+
vq_model.dec.resblocks.6.convs1.2.bias
|
| 368 |
+
vq_model.dec.resblocks.6.convs1.2.weight_g
|
| 369 |
+
vq_model.dec.resblocks.6.convs1.2.weight_v
|
| 370 |
+
vq_model.dec.resblocks.6.convs2.0.bias
|
| 371 |
+
vq_model.dec.resblocks.6.convs2.0.weight_g
|
| 372 |
+
vq_model.dec.resblocks.6.convs2.0.weight_v
|
| 373 |
+
vq_model.dec.resblocks.6.convs2.1.bias
|
| 374 |
+
vq_model.dec.resblocks.6.convs2.1.weight_g
|
| 375 |
+
vq_model.dec.resblocks.6.convs2.1.weight_v
|
| 376 |
+
vq_model.dec.resblocks.6.convs2.2.bias
|
| 377 |
+
vq_model.dec.resblocks.6.convs2.2.weight_g
|
| 378 |
+
vq_model.dec.resblocks.6.convs2.2.weight_v
|
| 379 |
+
vq_model.dec.resblocks.7.convs1.0.bias
|
| 380 |
+
vq_model.dec.resblocks.7.convs1.0.weight_g
|
| 381 |
+
vq_model.dec.resblocks.7.convs1.0.weight_v
|
| 382 |
+
vq_model.dec.resblocks.7.convs1.1.bias
|
| 383 |
+
vq_model.dec.resblocks.7.convs1.1.weight_g
|
| 384 |
+
vq_model.dec.resblocks.7.convs1.1.weight_v
|
| 385 |
+
vq_model.dec.resblocks.7.convs1.2.bias
|
| 386 |
+
vq_model.dec.resblocks.7.convs1.2.weight_g
|
| 387 |
+
vq_model.dec.resblocks.7.convs1.2.weight_v
|
| 388 |
+
vq_model.dec.resblocks.7.convs2.0.bias
|
| 389 |
+
vq_model.dec.resblocks.7.convs2.0.weight_g
|
| 390 |
+
vq_model.dec.resblocks.7.convs2.0.weight_v
|
| 391 |
+
vq_model.dec.resblocks.7.convs2.1.bias
|
| 392 |
+
vq_model.dec.resblocks.7.convs2.1.weight_g
|
| 393 |
+
vq_model.dec.resblocks.7.convs2.1.weight_v
|
| 394 |
+
vq_model.dec.resblocks.7.convs2.2.bias
|
| 395 |
+
vq_model.dec.resblocks.7.convs2.2.weight_g
|
| 396 |
+
vq_model.dec.resblocks.7.convs2.2.weight_v
|
| 397 |
+
vq_model.dec.resblocks.8.convs1.0.bias
|
| 398 |
+
vq_model.dec.resblocks.8.convs1.0.weight_g
|
| 399 |
+
vq_model.dec.resblocks.8.convs1.0.weight_v
|
| 400 |
+
vq_model.dec.resblocks.8.convs1.1.bias
|
| 401 |
+
vq_model.dec.resblocks.8.convs1.1.weight_g
|
| 402 |
+
vq_model.dec.resblocks.8.convs1.1.weight_v
|
| 403 |
+
vq_model.dec.resblocks.8.convs1.2.bias
|
| 404 |
+
vq_model.dec.resblocks.8.convs1.2.weight_g
|
| 405 |
+
vq_model.dec.resblocks.8.convs1.2.weight_v
|
| 406 |
+
vq_model.dec.resblocks.8.convs2.0.bias
|
| 407 |
+
vq_model.dec.resblocks.8.convs2.0.weight_g
|
| 408 |
+
vq_model.dec.resblocks.8.convs2.0.weight_v
|
| 409 |
+
vq_model.dec.resblocks.8.convs2.1.bias
|
| 410 |
+
vq_model.dec.resblocks.8.convs2.1.weight_g
|
| 411 |
+
vq_model.dec.resblocks.8.convs2.1.weight_v
|
| 412 |
+
vq_model.dec.resblocks.8.convs2.2.bias
|
| 413 |
+
vq_model.dec.resblocks.8.convs2.2.weight_g
|
| 414 |
+
vq_model.dec.resblocks.8.convs2.2.weight_v
|
| 415 |
+
vq_model.dec.resblocks.9.convs1.0.bias
|
| 416 |
+
vq_model.dec.resblocks.9.convs1.0.weight_g
|
| 417 |
+
vq_model.dec.resblocks.9.convs1.0.weight_v
|
| 418 |
+
vq_model.dec.resblocks.9.convs1.1.bias
|
| 419 |
+
vq_model.dec.resblocks.9.convs1.1.weight_g
|
| 420 |
+
vq_model.dec.resblocks.9.convs1.1.weight_v
|
| 421 |
+
vq_model.dec.resblocks.9.convs1.2.bias
|
| 422 |
+
vq_model.dec.resblocks.9.convs1.2.weight_g
|
| 423 |
+
vq_model.dec.resblocks.9.convs1.2.weight_v
|
| 424 |
+
vq_model.dec.resblocks.9.convs2.0.bias
|
| 425 |
+
vq_model.dec.resblocks.9.convs2.0.weight_g
|
| 426 |
+
vq_model.dec.resblocks.9.convs2.0.weight_v
|
| 427 |
+
vq_model.dec.resblocks.9.convs2.1.bias
|
| 428 |
+
vq_model.dec.resblocks.9.convs2.1.weight_g
|
| 429 |
+
vq_model.dec.resblocks.9.convs2.1.weight_v
|
| 430 |
+
vq_model.dec.resblocks.9.convs2.2.bias
|
| 431 |
+
vq_model.dec.resblocks.9.convs2.2.weight_g
|
| 432 |
+
vq_model.dec.resblocks.9.convs2.2.weight_v
|
| 433 |
+
vq_model.dec.resblocks.10.convs1.0.bias
|
| 434 |
+
vq_model.dec.resblocks.10.convs1.0.weight_g
|
| 435 |
+
vq_model.dec.resblocks.10.convs1.0.weight_v
|
| 436 |
+
vq_model.dec.resblocks.10.convs1.1.bias
|
| 437 |
+
vq_model.dec.resblocks.10.convs1.1.weight_g
|
| 438 |
+
vq_model.dec.resblocks.10.convs1.1.weight_v
|
| 439 |
+
vq_model.dec.resblocks.10.convs1.2.bias
|
| 440 |
+
vq_model.dec.resblocks.10.convs1.2.weight_g
|
| 441 |
+
vq_model.dec.resblocks.10.convs1.2.weight_v
|
| 442 |
+
vq_model.dec.resblocks.10.convs2.0.bias
|
| 443 |
+
vq_model.dec.resblocks.10.convs2.0.weight_g
|
| 444 |
+
vq_model.dec.resblocks.10.convs2.0.weight_v
|
| 445 |
+
vq_model.dec.resblocks.10.convs2.1.bias
|
| 446 |
+
vq_model.dec.resblocks.10.convs2.1.weight_g
|
| 447 |
+
vq_model.dec.resblocks.10.convs2.1.weight_v
|
| 448 |
+
vq_model.dec.resblocks.10.convs2.2.bias
|
| 449 |
+
vq_model.dec.resblocks.10.convs2.2.weight_g
|
| 450 |
+
vq_model.dec.resblocks.10.convs2.2.weight_v
|
| 451 |
+
vq_model.dec.resblocks.11.convs1.0.bias
|
| 452 |
+
vq_model.dec.resblocks.11.convs1.0.weight_g
|
| 453 |
+
vq_model.dec.resblocks.11.convs1.0.weight_v
|
| 454 |
+
vq_model.dec.resblocks.11.convs1.1.bias
|
| 455 |
+
vq_model.dec.resblocks.11.convs1.1.weight_g
|
| 456 |
+
vq_model.dec.resblocks.11.convs1.1.weight_v
|
| 457 |
+
vq_model.dec.resblocks.11.convs1.2.bias
|
| 458 |
+
vq_model.dec.resblocks.11.convs1.2.weight_g
|
| 459 |
+
vq_model.dec.resblocks.11.convs1.2.weight_v
|
| 460 |
+
vq_model.dec.resblocks.11.convs2.0.bias
|
| 461 |
+
vq_model.dec.resblocks.11.convs2.0.weight_g
|
| 462 |
+
vq_model.dec.resblocks.11.convs2.0.weight_v
|
| 463 |
+
vq_model.dec.resblocks.11.convs2.1.bias
|
| 464 |
+
vq_model.dec.resblocks.11.convs2.1.weight_g
|
| 465 |
+
vq_model.dec.resblocks.11.convs2.1.weight_v
|
| 466 |
+
vq_model.dec.resblocks.11.convs2.2.bias
|
| 467 |
+
vq_model.dec.resblocks.11.convs2.2.weight_g
|
| 468 |
+
vq_model.dec.resblocks.11.convs2.2.weight_v
|
| 469 |
+
vq_model.dec.resblocks.12.convs1.0.bias
|
| 470 |
+
vq_model.dec.resblocks.12.convs1.0.weight_g
|
| 471 |
+
vq_model.dec.resblocks.12.convs1.0.weight_v
|
| 472 |
+
vq_model.dec.resblocks.12.convs1.1.bias
|
| 473 |
+
vq_model.dec.resblocks.12.convs1.1.weight_g
|
| 474 |
+
vq_model.dec.resblocks.12.convs1.1.weight_v
|
| 475 |
+
vq_model.dec.resblocks.12.convs1.2.bias
|
| 476 |
+
vq_model.dec.resblocks.12.convs1.2.weight_g
|
| 477 |
+
vq_model.dec.resblocks.12.convs1.2.weight_v
|
| 478 |
+
vq_model.dec.resblocks.12.convs2.0.bias
|
| 479 |
+
vq_model.dec.resblocks.12.convs2.0.weight_g
|
| 480 |
+
vq_model.dec.resblocks.12.convs2.0.weight_v
|
| 481 |
+
vq_model.dec.resblocks.12.convs2.1.bias
|
| 482 |
+
vq_model.dec.resblocks.12.convs2.1.weight_g
|
| 483 |
+
vq_model.dec.resblocks.12.convs2.1.weight_v
|
| 484 |
+
vq_model.dec.resblocks.12.convs2.2.bias
|
| 485 |
+
vq_model.dec.resblocks.12.convs2.2.weight_g
|
| 486 |
+
vq_model.dec.resblocks.12.convs2.2.weight_v
|
| 487 |
+
vq_model.dec.resblocks.13.convs1.0.bias
|
| 488 |
+
vq_model.dec.resblocks.13.convs1.0.weight_g
|
| 489 |
+
vq_model.dec.resblocks.13.convs1.0.weight_v
|
| 490 |
+
vq_model.dec.resblocks.13.convs1.1.bias
|
| 491 |
+
vq_model.dec.resblocks.13.convs1.1.weight_g
|
| 492 |
+
vq_model.dec.resblocks.13.convs1.1.weight_v
|
| 493 |
+
vq_model.dec.resblocks.13.convs1.2.bias
|
| 494 |
+
vq_model.dec.resblocks.13.convs1.2.weight_g
|
| 495 |
+
vq_model.dec.resblocks.13.convs1.2.weight_v
|
| 496 |
+
vq_model.dec.resblocks.13.convs2.0.bias
|
| 497 |
+
vq_model.dec.resblocks.13.convs2.0.weight_g
|
| 498 |
+
vq_model.dec.resblocks.13.convs2.0.weight_v
|
| 499 |
+
vq_model.dec.resblocks.13.convs2.1.bias
|
| 500 |
+
vq_model.dec.resblocks.13.convs2.1.weight_g
|
| 501 |
+
vq_model.dec.resblocks.13.convs2.1.weight_v
|
| 502 |
+
vq_model.dec.resblocks.13.convs2.2.bias
|
| 503 |
+
vq_model.dec.resblocks.13.convs2.2.weight_g
|
| 504 |
+
vq_model.dec.resblocks.13.convs2.2.weight_v
|
| 505 |
+
vq_model.dec.resblocks.14.convs1.0.bias
|
| 506 |
+
vq_model.dec.resblocks.14.convs1.0.weight_g
|
| 507 |
+
vq_model.dec.resblocks.14.convs1.0.weight_v
|
| 508 |
+
vq_model.dec.resblocks.14.convs1.1.bias
|
| 509 |
+
vq_model.dec.resblocks.14.convs1.1.weight_g
|
| 510 |
+
vq_model.dec.resblocks.14.convs1.1.weight_v
|
| 511 |
+
vq_model.dec.resblocks.14.convs1.2.bias
|
| 512 |
+
vq_model.dec.resblocks.14.convs1.2.weight_g
|
| 513 |
+
vq_model.dec.resblocks.14.convs1.2.weight_v
|
| 514 |
+
vq_model.dec.resblocks.14.convs2.0.bias
|
| 515 |
+
vq_model.dec.resblocks.14.convs2.0.weight_g
|
| 516 |
+
vq_model.dec.resblocks.14.convs2.0.weight_v
|
| 517 |
+
vq_model.dec.resblocks.14.convs2.1.bias
|
| 518 |
+
vq_model.dec.resblocks.14.convs2.1.weight_g
|
| 519 |
+
vq_model.dec.resblocks.14.convs2.1.weight_v
|
| 520 |
+
vq_model.dec.resblocks.14.convs2.2.bias
|
| 521 |
+
vq_model.dec.resblocks.14.convs2.2.weight_g
|
| 522 |
+
vq_model.dec.resblocks.14.convs2.2.weight_v
|
| 523 |
+
vq_model.dec.conv_post.weight
|
| 524 |
+
vq_model.dec.cond.weight
|
| 525 |
+
vq_model.dec.cond.bias
|
| 526 |
+
vq_model.flow.flows.0.pre.weight
|
| 527 |
+
vq_model.flow.flows.0.pre.bias
|
| 528 |
+
vq_model.flow.flows.0.enc.in_layers.0.bias
|
| 529 |
+
vq_model.flow.flows.0.enc.in_layers.0.weight_g
|
| 530 |
+
vq_model.flow.flows.0.enc.in_layers.0.weight_v
|
| 531 |
+
vq_model.flow.flows.0.enc.in_layers.1.bias
|
| 532 |
+
vq_model.flow.flows.0.enc.in_layers.1.weight_g
|
| 533 |
+
vq_model.flow.flows.0.enc.in_layers.1.weight_v
|
| 534 |
+
vq_model.flow.flows.0.enc.in_layers.2.bias
|
| 535 |
+
vq_model.flow.flows.0.enc.in_layers.2.weight_g
|
| 536 |
+
vq_model.flow.flows.0.enc.in_layers.2.weight_v
|
| 537 |
+
vq_model.flow.flows.0.enc.in_layers.3.bias
|
| 538 |
+
vq_model.flow.flows.0.enc.in_layers.3.weight_g
|
| 539 |
+
vq_model.flow.flows.0.enc.in_layers.3.weight_v
|
| 540 |
+
vq_model.flow.flows.0.enc.res_skip_layers.0.bias
|
| 541 |
+
vq_model.flow.flows.0.enc.res_skip_layers.0.weight_g
|
| 542 |
+
vq_model.flow.flows.0.enc.res_skip_layers.0.weight_v
|
| 543 |
+
vq_model.flow.flows.0.enc.res_skip_layers.1.bias
|
| 544 |
+
vq_model.flow.flows.0.enc.res_skip_layers.1.weight_g
|
| 545 |
+
vq_model.flow.flows.0.enc.res_skip_layers.1.weight_v
|
| 546 |
+
vq_model.flow.flows.0.enc.res_skip_layers.2.bias
|
| 547 |
+
vq_model.flow.flows.0.enc.res_skip_layers.2.weight_g
|
| 548 |
+
vq_model.flow.flows.0.enc.res_skip_layers.2.weight_v
|
| 549 |
+
vq_model.flow.flows.0.enc.res_skip_layers.3.bias
|
| 550 |
+
vq_model.flow.flows.0.enc.res_skip_layers.3.weight_g
|
| 551 |
+
vq_model.flow.flows.0.enc.res_skip_layers.3.weight_v
|
| 552 |
+
vq_model.flow.flows.0.enc.cond_layer.bias
|
| 553 |
+
vq_model.flow.flows.0.enc.cond_layer.weight_g
|
| 554 |
+
vq_model.flow.flows.0.enc.cond_layer.weight_v
|
| 555 |
+
vq_model.flow.flows.0.post.weight
|
| 556 |
+
vq_model.flow.flows.0.post.bias
|
| 557 |
+
vq_model.flow.flows.2.pre.weight
|
| 558 |
+
vq_model.flow.flows.2.pre.bias
|
| 559 |
+
vq_model.flow.flows.2.enc.in_layers.0.bias
|
| 560 |
+
vq_model.flow.flows.2.enc.in_layers.0.weight_g
|
| 561 |
+
vq_model.flow.flows.2.enc.in_layers.0.weight_v
|
| 562 |
+
vq_model.flow.flows.2.enc.in_layers.1.bias
|
| 563 |
+
vq_model.flow.flows.2.enc.in_layers.1.weight_g
|
| 564 |
+
vq_model.flow.flows.2.enc.in_layers.1.weight_v
|
| 565 |
+
vq_model.flow.flows.2.enc.in_layers.2.bias
|
| 566 |
+
vq_model.flow.flows.2.enc.in_layers.2.weight_g
|
| 567 |
+
vq_model.flow.flows.2.enc.in_layers.2.weight_v
|
| 568 |
+
vq_model.flow.flows.2.enc.in_layers.3.bias
|
| 569 |
+
vq_model.flow.flows.2.enc.in_layers.3.weight_g
|
| 570 |
+
vq_model.flow.flows.2.enc.in_layers.3.weight_v
|
| 571 |
+
vq_model.flow.flows.2.enc.res_skip_layers.0.bias
|
| 572 |
+
vq_model.flow.flows.2.enc.res_skip_layers.0.weight_g
|
| 573 |
+
vq_model.flow.flows.2.enc.res_skip_layers.0.weight_v
|
| 574 |
+
vq_model.flow.flows.2.enc.res_skip_layers.1.bias
|
| 575 |
+
vq_model.flow.flows.2.enc.res_skip_layers.1.weight_g
|
| 576 |
+
vq_model.flow.flows.2.enc.res_skip_layers.1.weight_v
|
| 577 |
+
vq_model.flow.flows.2.enc.res_skip_layers.2.bias
|
| 578 |
+
vq_model.flow.flows.2.enc.res_skip_layers.2.weight_g
|
| 579 |
+
vq_model.flow.flows.2.enc.res_skip_layers.2.weight_v
|
| 580 |
+
vq_model.flow.flows.2.enc.res_skip_layers.3.bias
|
| 581 |
+
vq_model.flow.flows.2.enc.res_skip_layers.3.weight_g
|
| 582 |
+
vq_model.flow.flows.2.enc.res_skip_layers.3.weight_v
|
| 583 |
+
vq_model.flow.flows.2.enc.cond_layer.bias
|
| 584 |
+
vq_model.flow.flows.2.enc.cond_layer.weight_g
|
| 585 |
+
vq_model.flow.flows.2.enc.cond_layer.weight_v
|
| 586 |
+
vq_model.flow.flows.2.post.weight
|
| 587 |
+
vq_model.flow.flows.2.post.bias
|
| 588 |
+
vq_model.flow.flows.4.pre.weight
|
| 589 |
+
vq_model.flow.flows.4.pre.bias
|
| 590 |
+
vq_model.flow.flows.4.enc.in_layers.0.bias
|
| 591 |
+
vq_model.flow.flows.4.enc.in_layers.0.weight_g
|
| 592 |
+
vq_model.flow.flows.4.enc.in_layers.0.weight_v
|
| 593 |
+
vq_model.flow.flows.4.enc.in_layers.1.bias
|
| 594 |
+
vq_model.flow.flows.4.enc.in_layers.1.weight_g
|
| 595 |
+
vq_model.flow.flows.4.enc.in_layers.1.weight_v
|
| 596 |
+
vq_model.flow.flows.4.enc.in_layers.2.bias
|
| 597 |
+
vq_model.flow.flows.4.enc.in_layers.2.weight_g
|
| 598 |
+
vq_model.flow.flows.4.enc.in_layers.2.weight_v
|
| 599 |
+
vq_model.flow.flows.4.enc.in_layers.3.bias
|
| 600 |
+
vq_model.flow.flows.4.enc.in_layers.3.weight_g
|
| 601 |
+
vq_model.flow.flows.4.enc.in_layers.3.weight_v
|
| 602 |
+
vq_model.flow.flows.4.enc.res_skip_layers.0.bias
|
| 603 |
+
vq_model.flow.flows.4.enc.res_skip_layers.0.weight_g
|
| 604 |
+
vq_model.flow.flows.4.enc.res_skip_layers.0.weight_v
|
| 605 |
+
vq_model.flow.flows.4.enc.res_skip_layers.1.bias
|
| 606 |
+
vq_model.flow.flows.4.enc.res_skip_layers.1.weight_g
|
| 607 |
+
vq_model.flow.flows.4.enc.res_skip_layers.1.weight_v
|
| 608 |
+
vq_model.flow.flows.4.enc.res_skip_layers.2.bias
|
| 609 |
+
vq_model.flow.flows.4.enc.res_skip_layers.2.weight_g
|
| 610 |
+
vq_model.flow.flows.4.enc.res_skip_layers.2.weight_v
|
| 611 |
+
vq_model.flow.flows.4.enc.res_skip_layers.3.bias
|
| 612 |
+
vq_model.flow.flows.4.enc.res_skip_layers.3.weight_g
|
| 613 |
+
vq_model.flow.flows.4.enc.res_skip_layers.3.weight_v
|
| 614 |
+
vq_model.flow.flows.4.enc.cond_layer.bias
|
| 615 |
+
vq_model.flow.flows.4.enc.cond_layer.weight_g
|
| 616 |
+
vq_model.flow.flows.4.enc.cond_layer.weight_v
|
| 617 |
+
vq_model.flow.flows.4.post.weight
|
| 618 |
+
vq_model.flow.flows.4.post.bias
|
| 619 |
+
vq_model.flow.flows.6.pre.weight
|
| 620 |
+
vq_model.flow.flows.6.pre.bias
|
| 621 |
+
vq_model.flow.flows.6.enc.in_layers.0.bias
|
| 622 |
+
vq_model.flow.flows.6.enc.in_layers.0.weight_g
|
| 623 |
+
vq_model.flow.flows.6.enc.in_layers.0.weight_v
|
| 624 |
+
vq_model.flow.flows.6.enc.in_layers.1.bias
|
| 625 |
+
vq_model.flow.flows.6.enc.in_layers.1.weight_g
|
| 626 |
+
vq_model.flow.flows.6.enc.in_layers.1.weight_v
|
| 627 |
+
vq_model.flow.flows.6.enc.in_layers.2.bias
|
| 628 |
+
vq_model.flow.flows.6.enc.in_layers.2.weight_g
|
| 629 |
+
vq_model.flow.flows.6.enc.in_layers.2.weight_v
|
| 630 |
+
vq_model.flow.flows.6.enc.in_layers.3.bias
|
| 631 |
+
vq_model.flow.flows.6.enc.in_layers.3.weight_g
|
| 632 |
+
vq_model.flow.flows.6.enc.in_layers.3.weight_v
|
| 633 |
+
vq_model.flow.flows.6.enc.res_skip_layers.0.bias
|
| 634 |
+
vq_model.flow.flows.6.enc.res_skip_layers.0.weight_g
|
| 635 |
+
vq_model.flow.flows.6.enc.res_skip_layers.0.weight_v
|
| 636 |
+
vq_model.flow.flows.6.enc.res_skip_layers.1.bias
|
| 637 |
+
vq_model.flow.flows.6.enc.res_skip_layers.1.weight_g
|
| 638 |
+
vq_model.flow.flows.6.enc.res_skip_layers.1.weight_v
|
| 639 |
+
vq_model.flow.flows.6.enc.res_skip_layers.2.bias
|
| 640 |
+
vq_model.flow.flows.6.enc.res_skip_layers.2.weight_g
|
| 641 |
+
vq_model.flow.flows.6.enc.res_skip_layers.2.weight_v
|
| 642 |
+
vq_model.flow.flows.6.enc.res_skip_layers.3.bias
|
| 643 |
+
vq_model.flow.flows.6.enc.res_skip_layers.3.weight_g
|
| 644 |
+
vq_model.flow.flows.6.enc.res_skip_layers.3.weight_v
|
| 645 |
+
vq_model.flow.flows.6.enc.cond_layer.bias
|
| 646 |
+
vq_model.flow.flows.6.enc.cond_layer.weight_g
|
| 647 |
+
vq_model.flow.flows.6.enc.cond_layer.weight_v
|
| 648 |
+
vq_model.flow.flows.6.post.weight
|
| 649 |
+
vq_model.flow.flows.6.post.bias
|
| 650 |
+
vq_model.quantizer.vq.layers.0._codebook.embed
|
genie_tts/G2P/Chinese/CorrectPronunciation.py
CHANGED
|
@@ -1,50 +1,50 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import pickle
|
| 3 |
-
from typing import List, Dict, Any, Union
|
| 4 |
-
|
| 5 |
-
from ...Core.Resources import Chinese_G2P_DIR
|
| 6 |
-
|
| 7 |
-
# 常量定义
|
| 8 |
-
DEFAULT_CACHE_PATH = os.path.join(Chinese_G2P_DIR, "polyphonic.pickle")
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
class PolyphonicDictManager:
|
| 12 |
-
_data: Dict[str, Any] = {}
|
| 13 |
-
|
| 14 |
-
@classmethod
|
| 15 |
-
def get_data(cls, path: str = DEFAULT_CACHE_PATH) -> Dict[str, Any]:
|
| 16 |
-
if not cls._data:
|
| 17 |
-
with open(path, "rb") as f:
|
| 18 |
-
cls._data = pickle.load(f)
|
| 19 |
-
return cls._data
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
def correct_pronunciation(word: str, word_pinyin: List[str]) -> Union[List[str], str]:
|
| 23 |
-
"""
|
| 24 |
-
根据加载的字典修正发音,作为供外部程序调用的独立接口。
|
| 25 |
-
逻辑:优先查找整词修正,如果没有整词匹配,则遍历每个字符进行单字修正。
|
| 26 |
-
|
| 27 |
-
Input:
|
| 28 |
-
word (str): 原始中文字符串,例如 "银行"。
|
| 29 |
-
word_pinyins (List[str]): 当前预测的拼音列表,例如 ['yin2', 'xing2']。
|
| 30 |
-
|
| 31 |
-
Output:
|
| 32 |
-
Union[List[str], str]: 修正后的拼音列表或字符串。
|
| 33 |
-
|
| 34 |
-
Example:
|
| 35 |
-
# 字典包含整词 {'银行': ['yin2', 'hang2']}
|
| 36 |
-
result = correct_pronunciation("银行", ["yin2", "xing2"])
|
| 37 |
-
# Result: ["yin2", "hang2"]
|
| 38 |
-
"""
|
| 39 |
-
pp_dict = PolyphonicDictManager.get_data()
|
| 40 |
-
new_word_pinyin = list(word_pinyin)
|
| 41 |
-
# 1. 尝试整词匹配
|
| 42 |
-
if new_pinyin := pp_dict.get(word):
|
| 43 |
-
return new_pinyin
|
| 44 |
-
# 2. 逐字修正
|
| 45 |
-
for idx, w in enumerate(word):
|
| 46 |
-
if idx >= len(new_word_pinyin):
|
| 47 |
-
break
|
| 48 |
-
if w_pinyin := pp_dict.get(w):
|
| 49 |
-
new_word_pinyin[idx] = w_pinyin[0]
|
| 50 |
-
return new_word_pinyin
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import pickle
|
| 3 |
+
from typing import List, Dict, Any, Union
|
| 4 |
+
|
| 5 |
+
from ...Core.Resources import Chinese_G2P_DIR
|
| 6 |
+
|
| 7 |
+
# 常量定义
|
| 8 |
+
DEFAULT_CACHE_PATH = os.path.join(Chinese_G2P_DIR, "polyphonic.pickle")
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class PolyphonicDictManager:
|
| 12 |
+
_data: Dict[str, Any] = {}
|
| 13 |
+
|
| 14 |
+
@classmethod
|
| 15 |
+
def get_data(cls, path: str = DEFAULT_CACHE_PATH) -> Dict[str, Any]:
|
| 16 |
+
if not cls._data:
|
| 17 |
+
with open(path, "rb") as f:
|
| 18 |
+
cls._data = pickle.load(f)
|
| 19 |
+
return cls._data
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def correct_pronunciation(word: str, word_pinyin: List[str]) -> Union[List[str], str]:
|
| 23 |
+
"""
|
| 24 |
+
根据加载的字典修正发音,作为供外部程序调用的独立接口。
|
| 25 |
+
逻辑:优先查找整词修正,如果没有整词匹配,则遍历每个字符进行单字修正。
|
| 26 |
+
|
| 27 |
+
Input:
|
| 28 |
+
word (str): 原始中文字符串,例如 "银行"。
|
| 29 |
+
word_pinyins (List[str]): 当前预测的拼音列表,例如 ['yin2', 'xing2']。
|
| 30 |
+
|
| 31 |
+
Output:
|
| 32 |
+
Union[List[str], str]: 修正后的拼音列表或字符串。
|
| 33 |
+
|
| 34 |
+
Example:
|
| 35 |
+
# 字典包含整词 {'银行': ['yin2', 'hang2']}
|
| 36 |
+
result = correct_pronunciation("银行", ["yin2", "xing2"])
|
| 37 |
+
# Result: ["yin2", "hang2"]
|
| 38 |
+
"""
|
| 39 |
+
pp_dict = PolyphonicDictManager.get_data()
|
| 40 |
+
new_word_pinyin = list(word_pinyin)
|
| 41 |
+
# 1. 尝试整词匹配
|
| 42 |
+
if new_pinyin := pp_dict.get(word):
|
| 43 |
+
return new_pinyin
|
| 44 |
+
# 2. 逐字修正
|
| 45 |
+
for idx, w in enumerate(word):
|
| 46 |
+
if idx >= len(new_word_pinyin):
|
| 47 |
+
break
|
| 48 |
+
if w_pinyin := pp_dict.get(w):
|
| 49 |
+
new_word_pinyin[idx] = w_pinyin[0]
|
| 50 |
+
return new_word_pinyin
|
genie_tts/G2P/Chinese/Erhua.py
CHANGED
|
@@ -1,49 +1,49 @@
|
|
| 1 |
-
from typing import List, Tuple, Set
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
class ErhuaProcessor:
|
| 5 |
-
"""
|
| 6 |
-
处理中文G2P中的儿化音逻辑。
|
| 7 |
-
"""
|
| 8 |
-
|
| 9 |
-
def __init__(self):
|
| 10 |
-
self.must_erhua: Set[str] = {
|
| 11 |
-
"小院儿", "胡同儿", "范儿", "老汉儿", "撒欢儿", "寻老礼儿", "妥妥儿", "媳妇儿"
|
| 12 |
-
}
|
| 13 |
-
self.not_erhua: Set[str] = {
|
| 14 |
-
"虐儿", "为儿", "护儿", "瞒儿", "救儿", "替儿", "有儿", "一儿", "我儿", "俺儿",
|
| 15 |
-
"妻儿", "拐儿", "聋儿", "乞儿", "患儿", "幼儿", "孤儿", "婴儿", "婴幼儿", "连体儿",
|
| 16 |
-
"脑瘫儿", "流浪儿", "体弱儿", "混血儿", "蜜雪儿", "舫儿", "祖儿", "美儿", "应采儿", "可儿",
|
| 17 |
-
"侄儿", "孙儿", "侄孙儿", "女儿", "男儿", "红孩儿", "花儿", "虫儿", "马儿", "鸟儿",
|
| 18 |
-
"猪儿", "猫儿", "狗儿", "少儿",
|
| 19 |
-
}
|
| 20 |
-
|
| 21 |
-
def merge_erhua(self, initials: List[str], finals: List[str], word: str, pos: str) -> Tuple[List[str], List[str]]:
|
| 22 |
-
# 1. 修正 er1 发音为 er2 (当'儿'在词尾且发音为er1时)
|
| 23 |
-
for i, phn in enumerate(finals):
|
| 24 |
-
if i == len(finals) - 1 and word[i] == "儿" and phn == "er1":
|
| 25 |
-
finals[i] = "er2"
|
| 26 |
-
# 2. 检查是否跳过儿化处理
|
| 27 |
-
if word not in self.must_erhua and (word in self.not_erhua or pos in {"a", "j", "nr"}):
|
| 28 |
-
return initials, finals
|
| 29 |
-
# 3. 长度校验 (处理如 "……" 等长度不一致的特殊符号情况)
|
| 30 |
-
if len(finals) != len(word):
|
| 31 |
-
return initials, finals
|
| 32 |
-
# 4. 执行儿化合并逻辑 (与前一个字发同音)
|
| 33 |
-
new_initials = []
|
| 34 |
-
new_finals = []
|
| 35 |
-
for i, phn in enumerate(finals):
|
| 36 |
-
# 判断是否需要合并儿化音
|
| 37 |
-
# 条件: 是最后一个字 + 是"儿" + 发音是er2/er5 + 后两字不在非儿化表中 + 前面已有韵母
|
| 38 |
-
if (
|
| 39 |
-
i == len(finals) - 1
|
| 40 |
-
and word[i] == "儿"
|
| 41 |
-
and phn in {"er2", "er5"}
|
| 42 |
-
and word[-2:] not in self.not_erhua
|
| 43 |
-
and new_finals
|
| 44 |
-
):
|
| 45 |
-
# 将 'er' 加上前一个字的声调
|
| 46 |
-
phn = "er" + new_finals[-1][-1]
|
| 47 |
-
new_initials.append(initials[i])
|
| 48 |
-
new_finals.append(phn)
|
| 49 |
-
return new_initials, new_finals
|
|
|
|
| 1 |
+
from typing import List, Tuple, Set
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class ErhuaProcessor:
|
| 5 |
+
"""
|
| 6 |
+
处理中文G2P中的儿化音逻辑。
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
def __init__(self):
|
| 10 |
+
self.must_erhua: Set[str] = {
|
| 11 |
+
"小院儿", "胡同儿", "范儿", "老汉儿", "撒欢儿", "寻老礼儿", "妥妥儿", "媳妇儿"
|
| 12 |
+
}
|
| 13 |
+
self.not_erhua: Set[str] = {
|
| 14 |
+
"虐儿", "为儿", "护儿", "瞒儿", "救儿", "替儿", "有儿", "一儿", "我儿", "俺儿",
|
| 15 |
+
"妻儿", "拐儿", "聋儿", "乞儿", "患儿", "幼儿", "孤儿", "婴儿", "婴幼儿", "连体儿",
|
| 16 |
+
"脑瘫儿", "流浪儿", "体弱儿", "混血儿", "蜜雪儿", "舫儿", "祖儿", "美儿", "应采儿", "可儿",
|
| 17 |
+
"侄儿", "孙儿", "侄孙儿", "女儿", "男儿", "红孩儿", "花儿", "虫儿", "马儿", "鸟儿",
|
| 18 |
+
"猪儿", "猫儿", "狗儿", "少儿",
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
def merge_erhua(self, initials: List[str], finals: List[str], word: str, pos: str) -> Tuple[List[str], List[str]]:
|
| 22 |
+
# 1. 修正 er1 发音为 er2 (当'儿'在词尾且发音为er1时)
|
| 23 |
+
for i, phn in enumerate(finals):
|
| 24 |
+
if i == len(finals) - 1 and word[i] == "儿" and phn == "er1":
|
| 25 |
+
finals[i] = "er2"
|
| 26 |
+
# 2. 检查是否跳过儿化处理
|
| 27 |
+
if word not in self.must_erhua and (word in self.not_erhua or pos in {"a", "j", "nr"}):
|
| 28 |
+
return initials, finals
|
| 29 |
+
# 3. 长度校验 (处理如 "……" 等长度不一致的特殊符号情况)
|
| 30 |
+
if len(finals) != len(word):
|
| 31 |
+
return initials, finals
|
| 32 |
+
# 4. 执行儿化合并逻辑 (与前一个字发同音)
|
| 33 |
+
new_initials = []
|
| 34 |
+
new_finals = []
|
| 35 |
+
for i, phn in enumerate(finals):
|
| 36 |
+
# 判断是否需要合并儿化音
|
| 37 |
+
# 条件: 是最后一个字 + 是"儿" + 发音是er2/er5 + 后两字不在非儿化表中 + 前面已有韵母
|
| 38 |
+
if (
|
| 39 |
+
i == len(finals) - 1
|
| 40 |
+
and word[i] == "儿"
|
| 41 |
+
and phn in {"er2", "er5"}
|
| 42 |
+
and word[-2:] not in self.not_erhua
|
| 43 |
+
and new_finals
|
| 44 |
+
):
|
| 45 |
+
# 将 'er' 加上前一个字的声调
|
| 46 |
+
phn = "er" + new_finals[-1][-1]
|
| 47 |
+
new_initials.append(initials[i])
|
| 48 |
+
new_finals.append(phn)
|
| 49 |
+
return new_initials, new_finals
|
genie_tts/G2P/Chinese/Normalization/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (204 Bytes). View file
|
|
|
genie_tts/G2P/Chinese/Normalization/__pycache__/char_convert.cpython-311.pyc
ADDED
|
Binary file (66.1 kB). View file
|
|
|
genie_tts/G2P/Chinese/Normalization/__pycache__/chronology.cpython-311.pyc
ADDED
|
Binary file (4.52 kB). View file
|
|
|
genie_tts/G2P/Chinese/Normalization/__pycache__/constants.cpython-311.pyc
ADDED
|
Binary file (2.36 kB). View file
|
|
|
genie_tts/G2P/Chinese/Normalization/__pycache__/num.cpython-311.pyc
ADDED
|
Binary file (12.8 kB). View file
|
|
|
genie_tts/G2P/Chinese/Normalization/__pycache__/phonecode.cpython-311.pyc
ADDED
|
Binary file (2.26 kB). View file
|
|
|
genie_tts/G2P/Chinese/Normalization/__pycache__/quantifier.cpython-311.pyc
ADDED
|
Binary file (1.94 kB). View file
|
|
|
genie_tts/G2P/Chinese/Normalization/__pycache__/text_normlization.cpython-311.pyc
ADDED
|
Binary file (10.8 kB). View file
|
|
|
genie_tts/G2P/Chinese/ToneSandhi.py
CHANGED
|
@@ -1,354 +1,354 @@
|
|
| 1 |
-
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
| 2 |
-
#
|
| 3 |
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
-
# you may not use this file except in compliance with the License.
|
| 5 |
-
# You may obtain a copy of the License at
|
| 6 |
-
#
|
| 7 |
-
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
-
#
|
| 9 |
-
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
-
# See the License for the specific language governing permissions and
|
| 13 |
-
# limitations under the License.
|
| 14 |
-
|
| 15 |
-
"""
|
| 16 |
-
中文拼音变调(Tone Sandhi)自动处理器
|
| 17 |
-
"""
|
| 18 |
-
|
| 19 |
-
from typing import List
|
| 20 |
-
from typing import Tuple
|
| 21 |
-
import jieba_fast as jieba
|
| 22 |
-
from pypinyin import lazy_pinyin
|
| 23 |
-
from pypinyin import Style
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
class ToneSandhi:
|
| 27 |
-
def __init__(self):
|
| 28 |
-
self.must_neural_tone_words = {
|
| 29 |
-
"麻烦", "麻利", "鸳鸯", "高粱", "骨头", "骆驼", "马虎", "首饰", "馒头", "馄饨",
|
| 30 |
-
"风筝", "难为", "队伍", "阔气", "闺女", "门道", "锄头", "铺盖", "铃铛", "铁匠",
|
| 31 |
-
"钥匙", "里脊", "里头", "部分", "那么", "道士", "造化", "迷糊", "连累", "这么",
|
| 32 |
-
"这个", "运气", "过去", "软和", "转悠", "踏实", "跳蚤", "跟头", "趔趄", "财主",
|
| 33 |
-
"豆腐", "讲究", "记性", "记号", "认识", "规矩", "见识", "裁缝", "补丁", "衣裳",
|
| 34 |
-
"衣服", "衙门", "街坊", "行李", "行当", "蛤蟆", "蘑菇", "薄荷", "葫芦", "葡萄",
|
| 35 |
-
"萝卜", "荸荠", "苗条", "苗头", "苍蝇", "芝麻", "舒服", "舒坦", "舌头", "自在",
|
| 36 |
-
"膏药", "脾气", "脑袋", "脊梁", "能耐", "胳膊", "胭脂", "胡萝", "胡琴", "胡同",
|
| 37 |
-
"聪明", "耽误", "耽搁", "耷拉", "耳朵", "老爷", "老实", "老婆", "老头", "老太",
|
| 38 |
-
"翻腾", "罗嗦", "罐头", "编辑", "结实", "红火", "累赘", "糨糊", "糊涂", "精神",
|
| 39 |
-
"粮食", "簸箕", "篱笆", "算计", "算盘", "答应", "笤帚", "笑语", "笑话", "窟窿",
|
| 40 |
-
"窝囊", "窗户", "稳当", "稀罕", "称呼", "秧歌", "秀气", "秀才", "福气", "祖宗",
|
| 41 |
-
"砚台", "码头", "石榴", "石头", "石匠", "知识", "眼睛", "眯缝", "眨巴", "眉毛",
|
| 42 |
-
"相声", "盘算", "白净", "痢疾", "痛快", "疟疾", "疙瘩", "疏忽", "畜生", "生意",
|
| 43 |
-
"甘蔗", "琵琶", "琢磨", "琉璃", "玻璃", "玫瑰", "玄乎", "狐狸", "状元", "特务",
|
| 44 |
-
"牲口", "牙碜", "牌楼", "爽快", "爱人", "热闹", "烧饼", "烟筒", "烂糊", "点心",
|
| 45 |
-
"炊帚", "灯笼", "火候", "漂亮", "滑溜", "溜达", "温和", "清楚", "消息", "浪头",
|
| 46 |
-
"活泼", "比方", "正经", "欺负", "模糊", "槟榔", "棺材", "棒槌", "棉花", "核桃",
|
| 47 |
-
"栅栏", "柴火", "架势", "枕头", "枇杷", "机灵", "本事", "木头", "木匠", "朋友",
|
| 48 |
-
"月饼", "月亮", "暖和", "明白", "时候", "新鲜", "故事", "收拾", "收成", "提防",
|
| 49 |
-
"挖苦", "挑剔", "指甲", "指头", "拾掇", "拳头", "拨弄", "招牌", "招呼", "抬举",
|
| 50 |
-
"护士", "折腾", "扫帚", "打量", "打算", "打点", "打扮", "打听", "打发", "扎实",
|
| 51 |
-
"扁担", "戒指", "懒得", "意识", "意思", "情形", "悟性", "怪物", "思量", "怎么",
|
| 52 |
-
"念头", "念叨", "快活", "忙活", "志气", "心思", "得罪", "张罗", "弟兄", "开通",
|
| 53 |
-
"应酬", "庄稼", "干事", "帮手", "帐篷", "希罕", "师父", "师傅", "巴结", "巴掌",
|
| 54 |
-
"差事", "工夫", "岁数", "屁股", "尾巴", "少爷", "小气", "小伙", "将就", "对头",
|
| 55 |
-
"对付", "寡妇", "家伙", "客气", "实在", "官司", "学问", "学生", "字号", "嫁妆",
|
| 56 |
-
"媳妇", "媒人", "婆家", "娘家", "委屈", "姑娘", "姐夫", "妯娌", "妥当", "妖精",
|
| 57 |
-
"奴才", "女婿", "头发", "太阳", "大爷", "大方", "大意", "大夫", "多少", "多么",
|
| 58 |
-
"外甥", "壮实", "地道", "地方", "在乎", "困难", "嘴巴", "嘱咐", "嘟囔", "嘀咕",
|
| 59 |
-
"喜欢", "喇嘛", "喇叭", "商量", "唾沫", "哑巴", "哈欠", "哆嗦", "咳嗽", "和尚",
|
| 60 |
-
"告诉", "告示", "含糊", "吓唬", "后头", "名字", "名堂", "合同", "吆喝", "叫唤",
|
| 61 |
-
"口袋", "厚道", "厉害", "千斤", "包袱", "包涵", "匀称", "勤快", "动静", "动弹",
|
| 62 |
-
"功夫", "力气", "前头", "刺猬", "刺激", "别扭", "利落", "利索", "利害", "分析",
|
| 63 |
-
"出息", "凑合", "凉快", "冷战", "冤枉", "冒失", "养活", "关系", "先生", "兄弟",
|
| 64 |
-
"便宜", "使唤", "佩服", "作坊", "体面", "位置", "似的", "伙计", "休息", "什么",
|
| 65 |
-
"人家", "亲戚", "亲家", "
|
| 66 |
-
"两口", "东西", "东家", "世故", "不由", "不在", "下水", "下巴", "上头", "上司",
|
| 67 |
-
"丈夫", "丈人", "一辈", "那个", "菩萨", "父亲", "母亲", "咕噜", "邋遢", "费用",
|
| 68 |
-
"冤家", "甜头", "介绍", "荒唐", "大人", "泥鳅", "幸福", "熟悉", "计划", "扑腾",
|
| 69 |
-
"蜡烛", "姥爷", "照顾", "喉咙", "吉他", "弄堂", "蚂蚱", "凤凰", "拖沓", "寒碜",
|
| 70 |
-
"糟蹋", "倒腾", "报复", "逻辑", "盘缠", "喽啰", "牢骚", "咖喱", "扫把", "惦记",
|
| 71 |
-
}
|
| 72 |
-
self.must_not_neural_tone_words = {
|
| 73 |
-
"男子", "女子", "分子", "原子", "量子", "莲子", "石子", "瓜子", "电子", "人人",
|
| 74 |
-
"虎虎", "幺幺", "干嘛", "学子", "哈哈", "数数", "袅袅", "局地", "以下", "娃哈哈",
|
| 75 |
-
"花花草草", "留得", "耕地", "想想", "熙熙", "攘攘", "卵子", "死死", "冉冉", "恳恳",
|
| 76 |
-
"佼佼", "吵吵", "打打", "考考", "整整", "莘莘", "落地", "算子", "家家户户", "青青",
|
| 77 |
-
}
|
| 78 |
-
self.punc = ":,;。?!“”‘’':,;.?!"
|
| 79 |
-
|
| 80 |
-
# the meaning of jieba pos tag: https://blog.csdn.net/weixin_44174352/article/details/113731041
|
| 81 |
-
# e.g.
|
| 82 |
-
# word: "家里"
|
| 83 |
-
# pos: "s"
|
| 84 |
-
# finals: ['ia1', 'i3']
|
| 85 |
-
def _neural_sandhi(self, word: str, pos: str, finals: List[str]) -> List[str]:
|
| 86 |
-
# reduplication words for n. and v. e.g. 奶奶, 试试, 旺旺
|
| 87 |
-
for j, item in enumerate(word):
|
| 88 |
-
if (
|
| 89 |
-
j - 1 >= 0
|
| 90 |
-
and item == word[j - 1]
|
| 91 |
-
and pos[0] in {"n", "v", "a"}
|
| 92 |
-
and word not in self.must_not_neural_tone_words
|
| 93 |
-
):
|
| 94 |
-
finals[j] = finals[j][:-1] + "5"
|
| 95 |
-
ge_idx = word.find("个")
|
| 96 |
-
if len(word) >= 1 and word[-1] in "吧呢哈啊呐噻嘛吖嗨呐哦哒额滴哩哟喽啰耶喔诶":
|
| 97 |
-
finals[-1] = finals[-1][:-1] + "5"
|
| 98 |
-
elif len(word) >= 1 and word[-1] in "的地得":
|
| 99 |
-
finals[-1] = finals[-1][:-1] + "5"
|
| 100 |
-
# e.g. 走了, 看着, 去过
|
| 101 |
-
elif len(word) == 1 and word in "了着过" and pos in {"ul", "uz", "ug"}:
|
| 102 |
-
finals[-1] = finals[-1][:-1] + "5"
|
| 103 |
-
elif len(word) > 1 and word[-1] in "们子" and pos in {"r", "n"} and word not in self.must_not_neural_tone_words:
|
| 104 |
-
finals[-1] = finals[-1][:-1] + "5"
|
| 105 |
-
# e.g. 桌上, 地下, 家里
|
| 106 |
-
elif len(word) > 1 and word[-1] in "上下里" and pos in {"s", "l", "f"}:
|
| 107 |
-
finals[-1] = finals[-1][:-1] + "5"
|
| 108 |
-
# e.g. 上来, 下去
|
| 109 |
-
elif len(word) > 1 and word[-1] in "来去" and word[-2] in "上下进出回过起开":
|
| 110 |
-
finals[-1] = finals[-1][:-1] + "5"
|
| 111 |
-
# 个做量词
|
| 112 |
-
elif (
|
| 113 |
-
ge_idx >= 1 and (word[ge_idx - 1].isnumeric() or word[ge_idx - 1] in "几有两半多各整每做是")
|
| 114 |
-
) or word == "个":
|
| 115 |
-
finals[ge_idx] = finals[ge_idx][:-1] + "5"
|
| 116 |
-
else:
|
| 117 |
-
if word in self.must_neural_tone_words or word[-2:] in self.must_neural_tone_words:
|
| 118 |
-
finals[-1] = finals[-1][:-1] + "5"
|
| 119 |
-
|
| 120 |
-
word_list = self._split_word(word)
|
| 121 |
-
finals_list = [finals[: len(word_list[0])], finals[len(word_list[0]):]]
|
| 122 |
-
for i, word in enumerate(word_list):
|
| 123 |
-
# conventional neural in Chinese
|
| 124 |
-
if word in self.must_neural_tone_words or word[-2:] in self.must_neural_tone_words:
|
| 125 |
-
finals_list[i][-1] = finals_list[i][-1][:-1] + "5"
|
| 126 |
-
finals = sum(finals_list, [])
|
| 127 |
-
return finals
|
| 128 |
-
|
| 129 |
-
@staticmethod
|
| 130 |
-
def _bu_sandhi(word: str, finals: List[str]) -> List[str]:
|
| 131 |
-
# e.g. 看不懂
|
| 132 |
-
if len(word) == 3 and word[1] == "不":
|
| 133 |
-
finals[1] = finals[1][:-1] + "5"
|
| 134 |
-
else:
|
| 135 |
-
for i, char in enumerate(word):
|
| 136 |
-
# "不" before tone4 should be bu2, e.g. 不怕
|
| 137 |
-
if char == "不" and i + 1 < len(word) and finals[i + 1][-1] == "4":
|
| 138 |
-
finals[i] = finals[i][:-1] + "2"
|
| 139 |
-
return finals
|
| 140 |
-
|
| 141 |
-
def _yi_sandhi(self, word: str, finals: List[str]) -> List[str]:
|
| 142 |
-
# "一" in number sequences, e.g. 一零零, 二一零
|
| 143 |
-
if word.find("一") != -1 and all([item.isnumeric() for item in word if item != "一"]):
|
| 144 |
-
return finals
|
| 145 |
-
# "一" between reduplication words should be yi5, e.g. 看一看
|
| 146 |
-
elif len(word) == 3 and word[1] == "一" and word[0] == word[-1]:
|
| 147 |
-
finals[1] = finals[1][:-1] + "5"
|
| 148 |
-
# when "一" is ordinal word, it should be yi1
|
| 149 |
-
elif word.startswith("第一"):
|
| 150 |
-
finals[1] = finals[1][:-1] + "1"
|
| 151 |
-
else:
|
| 152 |
-
for i, char in enumerate(word):
|
| 153 |
-
if char == "一" and i + 1 < len(word):
|
| 154 |
-
# "一" before tone4 should be yi2, e.g. 一段
|
| 155 |
-
if finals[i + 1][-1] == "4":
|
| 156 |
-
finals[i] = finals[i][:-1] + "2"
|
| 157 |
-
# "一" before non-tone4 should be yi4, e.g. 一天
|
| 158 |
-
else:
|
| 159 |
-
# "一" 后面如果是标点,还读一声
|
| 160 |
-
if word[i + 1] not in self.punc:
|
| 161 |
-
finals[i] = finals[i][:-1] + "4"
|
| 162 |
-
return finals
|
| 163 |
-
|
| 164 |
-
@staticmethod
|
| 165 |
-
def _split_word(word: str) -> List[str]:
|
| 166 |
-
word_list = jieba.cut_for_search(word)
|
| 167 |
-
word_list = sorted(word_list, key=lambda i: len(i), reverse=False)
|
| 168 |
-
first_subword = word_list[0]
|
| 169 |
-
first_begin_idx = word.find(first_subword)
|
| 170 |
-
if first_begin_idx == 0:
|
| 171 |
-
second_subword = word[len(first_subword):]
|
| 172 |
-
new_word_list = [first_subword, second_subword]
|
| 173 |
-
else:
|
| 174 |
-
second_subword = word[: -len(first_subword)]
|
| 175 |
-
new_word_list = [second_subword, first_subword]
|
| 176 |
-
return new_word_list
|
| 177 |
-
|
| 178 |
-
def _three_sandhi(self, word: str, finals: List[str]) -> List[str]:
|
| 179 |
-
if len(word) == 2 and self._all_tone_three(finals):
|
| 180 |
-
finals[0] = finals[0][:-1] + "2"
|
| 181 |
-
elif len(word) == 3:
|
| 182 |
-
word_list = self._split_word(word)
|
| 183 |
-
if self._all_tone_three(finals):
|
| 184 |
-
# disyllabic + monosyllabic, e.g. 蒙古/包
|
| 185 |
-
if len(word_list[0]) == 2:
|
| 186 |
-
finals[0] = finals[0][:-1] + "2"
|
| 187 |
-
finals[1] = finals[1][:-1] + "2"
|
| 188 |
-
# monosyllabic + disyllabic, e.g. 纸/老虎
|
| 189 |
-
elif len(word_list[0]) == 1:
|
| 190 |
-
finals[1] = finals[1][:-1] + "2"
|
| 191 |
-
else:
|
| 192 |
-
finals_list = [finals[: len(word_list[0])], finals[len(word_list[0]):]]
|
| 193 |
-
if len(finals_list) == 2:
|
| 194 |
-
for i, sub in enumerate(finals_list):
|
| 195 |
-
# e.g. 所有/人
|
| 196 |
-
if self._all_tone_three(sub) and len(sub) == 2:
|
| 197 |
-
finals_list[i][0] = finals_list[i][0][:-1] + "2"
|
| 198 |
-
# e.g. 好/喜欢
|
| 199 |
-
elif (
|
| 200 |
-
i == 1
|
| 201 |
-
and not self._all_tone_three(sub)
|
| 202 |
-
and finals_list[i][0][-1] == "3"
|
| 203 |
-
and finals_list[0][-1][-1] == "3"
|
| 204 |
-
):
|
| 205 |
-
finals_list[0][-1] = finals_list[0][-1][:-1] + "2"
|
| 206 |
-
finals = sum(finals_list, [])
|
| 207 |
-
# split idiom into two words whose length is 2
|
| 208 |
-
elif len(word) == 4:
|
| 209 |
-
finals_list = [finals[:2], finals[2:]]
|
| 210 |
-
finals = []
|
| 211 |
-
for sub in finals_list:
|
| 212 |
-
if self._all_tone_three(sub):
|
| 213 |
-
sub[0] = sub[0][:-1] + "2"
|
| 214 |
-
finals += sub
|
| 215 |
-
|
| 216 |
-
return finals
|
| 217 |
-
|
| 218 |
-
@staticmethod
|
| 219 |
-
def _all_tone_three(finals: List[str]) -> bool:
|
| 220 |
-
# 增加 len(x) > 0 的判断,防止空字符串导致崩溃
|
| 221 |
-
return all(len(x) > 0 and x[-1] == "3" for x in finals)
|
| 222 |
-
|
| 223 |
-
@staticmethod
|
| 224 |
-
def _merge_bu(seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
| 225 |
-
new_seg = []
|
| 226 |
-
last_word = ""
|
| 227 |
-
for word, pos in seg:
|
| 228 |
-
if last_word == "不":
|
| 229 |
-
word = last_word + word
|
| 230 |
-
if word != "不":
|
| 231 |
-
new_seg.append((word, pos))
|
| 232 |
-
last_word = word[:]
|
| 233 |
-
if last_word == "不":
|
| 234 |
-
new_seg.append((last_word, "d"))
|
| 235 |
-
return new_seg
|
| 236 |
-
|
| 237 |
-
@staticmethod
|
| 238 |
-
def _merge_yi(seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
| 239 |
-
new_seg = []
|
| 240 |
-
i = 0
|
| 241 |
-
# function 1
|
| 242 |
-
while i < len(seg):
|
| 243 |
-
word, pos = seg[i]
|
| 244 |
-
merged = False
|
| 245 |
-
if i - 1 >= 0 and word == "一" and i + 1 < len(seg):
|
| 246 |
-
last = new_seg[-1] if new_seg else seg[i - 1]
|
| 247 |
-
if last[0] == seg[i + 1][0] and last[1] == "v" and seg[i + 1][1] == "v":
|
| 248 |
-
combined = last[0] + "一" + seg[i + 1][0]
|
| 249 |
-
new_seg[-1] = [combined, last[1]]
|
| 250 |
-
i += 2
|
| 251 |
-
merged = True
|
| 252 |
-
if not merged:
|
| 253 |
-
new_seg.append([word, pos])
|
| 254 |
-
i += 1
|
| 255 |
-
seg = new_seg
|
| 256 |
-
new_seg = []
|
| 257 |
-
# function 2
|
| 258 |
-
for word, pos in seg:
|
| 259 |
-
if new_seg and new_seg[-1][0] == "一":
|
| 260 |
-
new_seg[-1][0] = new_seg[-1][0] + word
|
| 261 |
-
else:
|
| 262 |
-
new_seg.append([word, pos])
|
| 263 |
-
return new_seg
|
| 264 |
-
|
| 265 |
-
# the first and the second words are all_tone_three
|
| 266 |
-
def _merge_continuous_three_tones(self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
| 267 |
-
new_seg = []
|
| 268 |
-
sub_finals_list = [
|
| 269 |
-
lazy_pinyin(word, neutral_tone_with_five=True, style=Style.FINALS_TONE3) for (word, pos) in seg
|
| 270 |
-
]
|
| 271 |
-
assert len(sub_finals_list) == len(seg)
|
| 272 |
-
merge_last = [False] * len(seg)
|
| 273 |
-
for i, (word, pos) in enumerate(seg):
|
| 274 |
-
if (
|
| 275 |
-
i - 1 >= 0
|
| 276 |
-
and self._all_tone_three(sub_finals_list[i - 1])
|
| 277 |
-
and self._all_tone_three(sub_finals_list[i])
|
| 278 |
-
and not merge_last[i - 1]
|
| 279 |
-
):
|
| 280 |
-
# if the last word is reduplication, not merge, because reduplication need to be _neural_sandhi
|
| 281 |
-
if not self._is_reduplication(seg[i - 1][0]) and len(seg[i - 1][0]) + len(seg[i][0]) <= 3:
|
| 282 |
-
new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
|
| 283 |
-
merge_last[i] = True
|
| 284 |
-
else:
|
| 285 |
-
new_seg.append([word, pos])
|
| 286 |
-
else:
|
| 287 |
-
new_seg.append([word, pos])
|
| 288 |
-
|
| 289 |
-
return new_seg
|
| 290 |
-
|
| 291 |
-
@staticmethod
|
| 292 |
-
def _is_reduplication(word: str) -> bool:
|
| 293 |
-
return len(word) == 2 and word[0] == word[1]
|
| 294 |
-
|
| 295 |
-
# the last char of first word and the first char of second word is tone_three
|
| 296 |
-
def _merge_continuous_three_tones_2(self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
| 297 |
-
new_seg = []
|
| 298 |
-
sub_finals_list = [
|
| 299 |
-
lazy_pinyin(word, neutral_tone_with_five=True, style=Style.FINALS_TONE3) for (word, pos) in seg
|
| 300 |
-
]
|
| 301 |
-
assert len(sub_finals_list) == len(seg)
|
| 302 |
-
merge_last = [False] * len(seg)
|
| 303 |
-
for i, (word, pos) in enumerate(seg):
|
| 304 |
-
if (
|
| 305 |
-
i - 1 >= 0
|
| 306 |
-
and sub_finals_list[i - 1][-1][-1] == "3"
|
| 307 |
-
and sub_finals_list[i][0][-1] == "3"
|
| 308 |
-
and not merge_last[i - 1]
|
| 309 |
-
):
|
| 310 |
-
# if the last word is reduplication, not merge, because reduplication need to be _neural_sandhi
|
| 311 |
-
if not self._is_reduplication(seg[i - 1][0]) and len(seg[i - 1][0]) + len(seg[i][0]) <= 3:
|
| 312 |
-
new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
|
| 313 |
-
merge_last[i] = True
|
| 314 |
-
else:
|
| 315 |
-
new_seg.append([word, pos])
|
| 316 |
-
else:
|
| 317 |
-
new_seg.append([word, pos])
|
| 318 |
-
return new_seg
|
| 319 |
-
|
| 320 |
-
@staticmethod
|
| 321 |
-
def _merge_er(seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
| 322 |
-
new_seg = []
|
| 323 |
-
for i, (word, pos) in enumerate(seg):
|
| 324 |
-
if i - 1 >= 0 and word == "儿" and seg[i - 1][0] != "#":
|
| 325 |
-
new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
|
| 326 |
-
else:
|
| 327 |
-
new_seg.append([word, pos])
|
| 328 |
-
return new_seg
|
| 329 |
-
|
| 330 |
-
@staticmethod
|
| 331 |
-
def _merge_reduplication(seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
| 332 |
-
new_seg = []
|
| 333 |
-
for i, (word, pos) in enumerate(seg):
|
| 334 |
-
if new_seg and word == new_seg[-1][0]:
|
| 335 |
-
new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
|
| 336 |
-
else:
|
| 337 |
-
new_seg.append([word, pos])
|
| 338 |
-
return new_seg
|
| 339 |
-
|
| 340 |
-
def pre_merge_for_modify(self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
| 341 |
-
seg = self._merge_bu(seg)
|
| 342 |
-
seg = self._merge_yi(seg)
|
| 343 |
-
seg = self._merge_reduplication(seg)
|
| 344 |
-
seg = self._merge_continuous_three_tones(seg)
|
| 345 |
-
seg = self._merge_continuous_three_tones_2(seg)
|
| 346 |
-
seg = self._merge_er(seg)
|
| 347 |
-
return seg
|
| 348 |
-
|
| 349 |
-
def modified_tone(self, word: str, pos: str, finals: List[str]) -> List[str]:
|
| 350 |
-
finals = self._bu_sandhi(word, finals)
|
| 351 |
-
finals = self._yi_sandhi(word, finals)
|
| 352 |
-
finals = self._neural_sandhi(word, pos, finals)
|
| 353 |
-
finals = self._three_sandhi(word, finals)
|
| 354 |
-
return finals
|
|
|
|
| 1 |
+
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
"""
|
| 16 |
+
中文拼音变调(Tone Sandhi)自动处理器
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
from typing import List
|
| 20 |
+
from typing import Tuple
|
| 21 |
+
import jieba_fast as jieba
|
| 22 |
+
from pypinyin import lazy_pinyin
|
| 23 |
+
from pypinyin import Style
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class ToneSandhi:
|
| 27 |
+
def __init__(self):
|
| 28 |
+
self.must_neural_tone_words = {
|
| 29 |
+
"麻烦", "麻利", "鸳鸯", "高粱", "骨头", "骆驼", "马虎", "首饰", "馒头", "馄饨",
|
| 30 |
+
"风筝", "难为", "队伍", "阔气", "闺女", "门道", "锄头", "铺盖", "铃铛", "铁匠",
|
| 31 |
+
"钥匙", "里脊", "里头", "部分", "那么", "道士", "造化", "迷糊", "连累", "这么",
|
| 32 |
+
"这个", "运气", "过去", "软和", "转悠", "踏实", "跳蚤", "跟头", "趔趄", "财主",
|
| 33 |
+
"豆腐", "讲究", "记性", "记号", "认识", "规矩", "见识", "裁缝", "补丁", "衣裳",
|
| 34 |
+
"衣服", "衙门", "街坊", "行李", "行当", "蛤蟆", "蘑菇", "薄荷", "葫芦", "葡萄",
|
| 35 |
+
"萝卜", "荸荠", "苗条", "苗头", "苍蝇", "芝麻", "舒服", "舒坦", "舌头", "自在",
|
| 36 |
+
"膏药", "脾气", "脑袋", "脊梁", "能耐", "胳膊", "胭脂", "胡萝", "胡琴", "胡同",
|
| 37 |
+
"聪明", "耽误", "耽搁", "耷拉", "耳朵", "老爷", "老实", "老婆", "老头", "老太",
|
| 38 |
+
"翻腾", "罗嗦", "罐头", "编辑", "结实", "红火", "累赘", "糨糊", "糊涂", "精神",
|
| 39 |
+
"粮食", "簸箕", "篱笆", "算计", "算盘", "答应", "笤帚", "笑语", "笑话", "窟窿",
|
| 40 |
+
"窝囊", "窗户", "稳当", "稀罕", "称呼", "秧歌", "秀气", "秀才", "福气", "祖宗",
|
| 41 |
+
"砚台", "码头", "石榴", "石头", "石匠", "知识", "眼睛", "眯缝", "眨巴", "眉毛",
|
| 42 |
+
"相声", "盘算", "白净", "痢疾", "痛快", "疟疾", "疙瘩", "疏忽", "畜生", "生意",
|
| 43 |
+
"甘蔗", "琵琶", "琢磨", "琉璃", "玻璃", "玫瑰", "玄乎", "狐狸", "状元", "特务",
|
| 44 |
+
"牲口", "牙碜", "牌楼", "爽快", "爱人", "热闹", "烧饼", "烟筒", "烂糊", "点心",
|
| 45 |
+
"炊帚", "灯笼", "火候", "漂亮", "滑溜", "溜达", "温和", "清楚", "消息", "浪头",
|
| 46 |
+
"活泼", "比方", "正经", "欺负", "模糊", "槟榔", "棺材", "棒槌", "棉花", "核桃",
|
| 47 |
+
"栅栏", "柴火", "架势", "枕头", "枇杷", "机灵", "本事", "木头", "木匠", "朋友",
|
| 48 |
+
"月饼", "月亮", "暖和", "明白", "时候", "新鲜", "故事", "收拾", "收成", "提防",
|
| 49 |
+
"挖苦", "挑剔", "指甲", "指头", "拾掇", "拳头", "拨弄", "招牌", "招呼", "抬举",
|
| 50 |
+
"护士", "折腾", "扫帚", "打量", "打算", "打点", "打扮", "打听", "打发", "扎实",
|
| 51 |
+
"扁担", "戒指", "懒得", "意识", "意思", "情形", "悟性", "怪物", "思量", "怎么",
|
| 52 |
+
"念头", "念叨", "快活", "忙活", "志气", "心思", "得罪", "张罗", "弟兄", "开通",
|
| 53 |
+
"应酬", "庄稼", "干事", "帮手", "帐篷", "希罕", "师父", "师傅", "巴结", "巴掌",
|
| 54 |
+
"差事", "工夫", "岁数", "屁股", "尾巴", "少爷", "小气", "小伙", "将就", "对头",
|
| 55 |
+
"对付", "寡妇", "家伙", "客气", "实在", "官司", "学问", "学生", "字号", "嫁妆",
|
| 56 |
+
"媳妇", "媒人", "婆家", "娘家", "委屈", "姑娘", "姐夫", "妯娌", "妥当", "妖精",
|
| 57 |
+
"奴才", "女婿", "头发", "太阳", "大爷", "大方", "大意", "大夫", "多少", "多么",
|
| 58 |
+
"外甥", "壮实", "地道", "地方", "在乎", "困难", "嘴巴", "嘱咐", "嘟囔", "嘀咕",
|
| 59 |
+
"喜欢", "喇嘛", "喇叭", "商量", "唾沫", "哑巴", "哈欠", "哆嗦", "咳嗽", "和尚",
|
| 60 |
+
"告诉", "告示", "含糊", "吓唬", "后头", "名字", "名堂", "合同", "吆喝", "叫唤",
|
| 61 |
+
"口袋", "厚道", "厉害", "千斤", "包袱", "包涵", "匀称", "勤快", "动静", "动弹",
|
| 62 |
+
"功夫", "力气", "前头", "刺猬", "刺激", "别扭", "利落", "利索", "利害", "分析",
|
| 63 |
+
"出息", "凑合", "凉快", "冷战", "冤枉", "冒失", "养活", "关系", "先生", "兄弟",
|
| 64 |
+
"便宜", "使唤", "佩服", "作坊", "体面", "位置", "似的", "伙计", "休息", "什么",
|
| 65 |
+
"人家", "亲戚", "亲家", "交情", "云彩", "事情", "买卖", "主意", "丫头", "丧气",
|
| 66 |
+
"两口", "东西", "东家", "世故", "不由", "不在", "下水", "下巴", "上头", "上司",
|
| 67 |
+
"丈夫", "丈人", "一辈", "那个", "菩萨", "父亲", "母亲", "咕噜", "邋遢", "费用",
|
| 68 |
+
"冤家", "甜头", "介绍", "荒唐", "大人", "泥鳅", "幸福", "熟悉", "计划", "扑腾",
|
| 69 |
+
"蜡烛", "姥爷", "照顾", "喉咙", "吉他", "弄堂", "蚂蚱", "凤凰", "拖沓", "寒碜",
|
| 70 |
+
"糟蹋", "倒腾", "报复", "逻辑", "盘缠", "喽啰", "牢骚", "咖喱", "扫把", "惦记",
|
| 71 |
+
}
|
| 72 |
+
self.must_not_neural_tone_words = {
|
| 73 |
+
"男子", "女子", "分子", "原子", "量子", "莲子", "石子", "瓜子", "电子", "人人",
|
| 74 |
+
"虎虎", "幺幺", "干嘛", "学子", "哈哈", "数数", "袅袅", "局地", "以下", "娃哈哈",
|
| 75 |
+
"花花草草", "留得", "耕地", "想想", "熙熙", "攘攘", "卵子", "死死", "冉冉", "恳恳",
|
| 76 |
+
"佼佼", "吵吵", "打打", "考考", "整整", "莘莘", "落地", "算子", "家家户户", "青青",
|
| 77 |
+
}
|
| 78 |
+
self.punc = ":,;。?!“”‘’':,;.?!"
|
| 79 |
+
|
| 80 |
+
# the meaning of jieba pos tag: https://blog.csdn.net/weixin_44174352/article/details/113731041
|
| 81 |
+
# e.g.
|
| 82 |
+
# word: "家里"
|
| 83 |
+
# pos: "s"
|
| 84 |
+
# finals: ['ia1', 'i3']
|
| 85 |
+
def _neural_sandhi(self, word: str, pos: str, finals: List[str]) -> List[str]:
|
| 86 |
+
# reduplication words for n. and v. e.g. 奶奶, 试试, 旺旺
|
| 87 |
+
for j, item in enumerate(word):
|
| 88 |
+
if (
|
| 89 |
+
j - 1 >= 0
|
| 90 |
+
and item == word[j - 1]
|
| 91 |
+
and pos[0] in {"n", "v", "a"}
|
| 92 |
+
and word not in self.must_not_neural_tone_words
|
| 93 |
+
):
|
| 94 |
+
finals[j] = finals[j][:-1] + "5"
|
| 95 |
+
ge_idx = word.find("个")
|
| 96 |
+
if len(word) >= 1 and word[-1] in "吧呢哈啊呐噻嘛吖嗨呐哦哒额滴哩哟喽啰耶喔诶":
|
| 97 |
+
finals[-1] = finals[-1][:-1] + "5"
|
| 98 |
+
elif len(word) >= 1 and word[-1] in "的地得":
|
| 99 |
+
finals[-1] = finals[-1][:-1] + "5"
|
| 100 |
+
# e.g. 走了, 看着, 去过
|
| 101 |
+
elif len(word) == 1 and word in "了着过" and pos in {"ul", "uz", "ug"}:
|
| 102 |
+
finals[-1] = finals[-1][:-1] + "5"
|
| 103 |
+
elif len(word) > 1 and word[-1] in "们子" and pos in {"r", "n"} and word not in self.must_not_neural_tone_words:
|
| 104 |
+
finals[-1] = finals[-1][:-1] + "5"
|
| 105 |
+
# e.g. 桌上, 地下, 家里
|
| 106 |
+
elif len(word) > 1 and word[-1] in "上下里" and pos in {"s", "l", "f"}:
|
| 107 |
+
finals[-1] = finals[-1][:-1] + "5"
|
| 108 |
+
# e.g. 上来, 下去
|
| 109 |
+
elif len(word) > 1 and word[-1] in "来去" and word[-2] in "上下进出回过起开":
|
| 110 |
+
finals[-1] = finals[-1][:-1] + "5"
|
| 111 |
+
# 个做量词
|
| 112 |
+
elif (
|
| 113 |
+
ge_idx >= 1 and (word[ge_idx - 1].isnumeric() or word[ge_idx - 1] in "几有两半多各整每做是")
|
| 114 |
+
) or word == "个":
|
| 115 |
+
finals[ge_idx] = finals[ge_idx][:-1] + "5"
|
| 116 |
+
else:
|
| 117 |
+
if word in self.must_neural_tone_words or word[-2:] in self.must_neural_tone_words:
|
| 118 |
+
finals[-1] = finals[-1][:-1] + "5"
|
| 119 |
+
|
| 120 |
+
word_list = self._split_word(word)
|
| 121 |
+
finals_list = [finals[: len(word_list[0])], finals[len(word_list[0]):]]
|
| 122 |
+
for i, word in enumerate(word_list):
|
| 123 |
+
# conventional neural in Chinese
|
| 124 |
+
if word in self.must_neural_tone_words or word[-2:] in self.must_neural_tone_words:
|
| 125 |
+
finals_list[i][-1] = finals_list[i][-1][:-1] + "5"
|
| 126 |
+
finals = sum(finals_list, [])
|
| 127 |
+
return finals
|
| 128 |
+
|
| 129 |
+
@staticmethod
|
| 130 |
+
def _bu_sandhi(word: str, finals: List[str]) -> List[str]:
|
| 131 |
+
# e.g. 看不懂
|
| 132 |
+
if len(word) == 3 and word[1] == "不":
|
| 133 |
+
finals[1] = finals[1][:-1] + "5"
|
| 134 |
+
else:
|
| 135 |
+
for i, char in enumerate(word):
|
| 136 |
+
# "不" before tone4 should be bu2, e.g. 不怕
|
| 137 |
+
if char == "不" and i + 1 < len(word) and finals[i + 1][-1] == "4":
|
| 138 |
+
finals[i] = finals[i][:-1] + "2"
|
| 139 |
+
return finals
|
| 140 |
+
|
| 141 |
+
def _yi_sandhi(self, word: str, finals: List[str]) -> List[str]:
|
| 142 |
+
# "一" in number sequences, e.g. 一零零, 二一零
|
| 143 |
+
if word.find("一") != -1 and all([item.isnumeric() for item in word if item != "一"]):
|
| 144 |
+
return finals
|
| 145 |
+
# "一" between reduplication words should be yi5, e.g. 看一看
|
| 146 |
+
elif len(word) == 3 and word[1] == "一" and word[0] == word[-1]:
|
| 147 |
+
finals[1] = finals[1][:-1] + "5"
|
| 148 |
+
# when "一" is ordinal word, it should be yi1
|
| 149 |
+
elif word.startswith("第一"):
|
| 150 |
+
finals[1] = finals[1][:-1] + "1"
|
| 151 |
+
else:
|
| 152 |
+
for i, char in enumerate(word):
|
| 153 |
+
if char == "一" and i + 1 < len(word):
|
| 154 |
+
# "一" before tone4 should be yi2, e.g. 一段
|
| 155 |
+
if finals[i + 1][-1] == "4":
|
| 156 |
+
finals[i] = finals[i][:-1] + "2"
|
| 157 |
+
# "一" before non-tone4 should be yi4, e.g. 一天
|
| 158 |
+
else:
|
| 159 |
+
# "一" 后面如果是标点,还读一声
|
| 160 |
+
if word[i + 1] not in self.punc:
|
| 161 |
+
finals[i] = finals[i][:-1] + "4"
|
| 162 |
+
return finals
|
| 163 |
+
|
| 164 |
+
@staticmethod
|
| 165 |
+
def _split_word(word: str) -> List[str]:
|
| 166 |
+
word_list = jieba.cut_for_search(word)
|
| 167 |
+
word_list = sorted(word_list, key=lambda i: len(i), reverse=False)
|
| 168 |
+
first_subword = word_list[0]
|
| 169 |
+
first_begin_idx = word.find(first_subword)
|
| 170 |
+
if first_begin_idx == 0:
|
| 171 |
+
second_subword = word[len(first_subword):]
|
| 172 |
+
new_word_list = [first_subword, second_subword]
|
| 173 |
+
else:
|
| 174 |
+
second_subword = word[: -len(first_subword)]
|
| 175 |
+
new_word_list = [second_subword, first_subword]
|
| 176 |
+
return new_word_list
|
| 177 |
+
|
| 178 |
+
def _three_sandhi(self, word: str, finals: List[str]) -> List[str]:
|
| 179 |
+
if len(word) == 2 and self._all_tone_three(finals):
|
| 180 |
+
finals[0] = finals[0][:-1] + "2"
|
| 181 |
+
elif len(word) == 3:
|
| 182 |
+
word_list = self._split_word(word)
|
| 183 |
+
if self._all_tone_three(finals):
|
| 184 |
+
# disyllabic + monosyllabic, e.g. 蒙古/包
|
| 185 |
+
if len(word_list[0]) == 2:
|
| 186 |
+
finals[0] = finals[0][:-1] + "2"
|
| 187 |
+
finals[1] = finals[1][:-1] + "2"
|
| 188 |
+
# monosyllabic + disyllabic, e.g. 纸/老虎
|
| 189 |
+
elif len(word_list[0]) == 1:
|
| 190 |
+
finals[1] = finals[1][:-1] + "2"
|
| 191 |
+
else:
|
| 192 |
+
finals_list = [finals[: len(word_list[0])], finals[len(word_list[0]):]]
|
| 193 |
+
if len(finals_list) == 2:
|
| 194 |
+
for i, sub in enumerate(finals_list):
|
| 195 |
+
# e.g. 所有/人
|
| 196 |
+
if self._all_tone_three(sub) and len(sub) == 2:
|
| 197 |
+
finals_list[i][0] = finals_list[i][0][:-1] + "2"
|
| 198 |
+
# e.g. 好/喜欢
|
| 199 |
+
elif (
|
| 200 |
+
i == 1
|
| 201 |
+
and not self._all_tone_three(sub)
|
| 202 |
+
and finals_list[i][0][-1] == "3"
|
| 203 |
+
and finals_list[0][-1][-1] == "3"
|
| 204 |
+
):
|
| 205 |
+
finals_list[0][-1] = finals_list[0][-1][:-1] + "2"
|
| 206 |
+
finals = sum(finals_list, [])
|
| 207 |
+
# split idiom into two words whose length is 2
|
| 208 |
+
elif len(word) == 4:
|
| 209 |
+
finals_list = [finals[:2], finals[2:]]
|
| 210 |
+
finals = []
|
| 211 |
+
for sub in finals_list:
|
| 212 |
+
if self._all_tone_three(sub):
|
| 213 |
+
sub[0] = sub[0][:-1] + "2"
|
| 214 |
+
finals += sub
|
| 215 |
+
|
| 216 |
+
return finals
|
| 217 |
+
|
| 218 |
+
@staticmethod
|
| 219 |
+
def _all_tone_three(finals: List[str]) -> bool:
|
| 220 |
+
# 增加 len(x) > 0 的判断,防止空字符串导致崩溃
|
| 221 |
+
return all(len(x) > 0 and x[-1] == "3" for x in finals)
|
| 222 |
+
|
| 223 |
+
@staticmethod
|
| 224 |
+
def _merge_bu(seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
| 225 |
+
new_seg = []
|
| 226 |
+
last_word = ""
|
| 227 |
+
for word, pos in seg:
|
| 228 |
+
if last_word == "不":
|
| 229 |
+
word = last_word + word
|
| 230 |
+
if word != "不":
|
| 231 |
+
new_seg.append((word, pos))
|
| 232 |
+
last_word = word[:]
|
| 233 |
+
if last_word == "不":
|
| 234 |
+
new_seg.append((last_word, "d"))
|
| 235 |
+
return new_seg
|
| 236 |
+
|
| 237 |
+
@staticmethod
|
| 238 |
+
def _merge_yi(seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
| 239 |
+
new_seg = []
|
| 240 |
+
i = 0
|
| 241 |
+
# function 1
|
| 242 |
+
while i < len(seg):
|
| 243 |
+
word, pos = seg[i]
|
| 244 |
+
merged = False
|
| 245 |
+
if i - 1 >= 0 and word == "一" and i + 1 < len(seg):
|
| 246 |
+
last = new_seg[-1] if new_seg else seg[i - 1]
|
| 247 |
+
if last[0] == seg[i + 1][0] and last[1] == "v" and seg[i + 1][1] == "v":
|
| 248 |
+
combined = last[0] + "一" + seg[i + 1][0]
|
| 249 |
+
new_seg[-1] = [combined, last[1]]
|
| 250 |
+
i += 2
|
| 251 |
+
merged = True
|
| 252 |
+
if not merged:
|
| 253 |
+
new_seg.append([word, pos])
|
| 254 |
+
i += 1
|
| 255 |
+
seg = new_seg
|
| 256 |
+
new_seg = []
|
| 257 |
+
# function 2
|
| 258 |
+
for word, pos in seg:
|
| 259 |
+
if new_seg and new_seg[-1][0] == "一":
|
| 260 |
+
new_seg[-1][0] = new_seg[-1][0] + word
|
| 261 |
+
else:
|
| 262 |
+
new_seg.append([word, pos])
|
| 263 |
+
return new_seg
|
| 264 |
+
|
| 265 |
+
# the first and the second words are all_tone_three
|
| 266 |
+
def _merge_continuous_three_tones(self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
| 267 |
+
new_seg = []
|
| 268 |
+
sub_finals_list = [
|
| 269 |
+
lazy_pinyin(word, neutral_tone_with_five=True, style=Style.FINALS_TONE3) for (word, pos) in seg
|
| 270 |
+
]
|
| 271 |
+
assert len(sub_finals_list) == len(seg)
|
| 272 |
+
merge_last = [False] * len(seg)
|
| 273 |
+
for i, (word, pos) in enumerate(seg):
|
| 274 |
+
if (
|
| 275 |
+
i - 1 >= 0
|
| 276 |
+
and self._all_tone_three(sub_finals_list[i - 1])
|
| 277 |
+
and self._all_tone_three(sub_finals_list[i])
|
| 278 |
+
and not merge_last[i - 1]
|
| 279 |
+
):
|
| 280 |
+
# if the last word is reduplication, not merge, because reduplication need to be _neural_sandhi
|
| 281 |
+
if not self._is_reduplication(seg[i - 1][0]) and len(seg[i - 1][0]) + len(seg[i][0]) <= 3:
|
| 282 |
+
new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
|
| 283 |
+
merge_last[i] = True
|
| 284 |
+
else:
|
| 285 |
+
new_seg.append([word, pos])
|
| 286 |
+
else:
|
| 287 |
+
new_seg.append([word, pos])
|
| 288 |
+
|
| 289 |
+
return new_seg
|
| 290 |
+
|
| 291 |
+
@staticmethod
|
| 292 |
+
def _is_reduplication(word: str) -> bool:
|
| 293 |
+
return len(word) == 2 and word[0] == word[1]
|
| 294 |
+
|
| 295 |
+
# the last char of first word and the first char of second word is tone_three
|
| 296 |
+
def _merge_continuous_three_tones_2(self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
| 297 |
+
new_seg = []
|
| 298 |
+
sub_finals_list = [
|
| 299 |
+
lazy_pinyin(word, neutral_tone_with_five=True, style=Style.FINALS_TONE3) for (word, pos) in seg
|
| 300 |
+
]
|
| 301 |
+
assert len(sub_finals_list) == len(seg)
|
| 302 |
+
merge_last = [False] * len(seg)
|
| 303 |
+
for i, (word, pos) in enumerate(seg):
|
| 304 |
+
if (
|
| 305 |
+
i - 1 >= 0
|
| 306 |
+
and sub_finals_list[i - 1][-1][-1] == "3"
|
| 307 |
+
and sub_finals_list[i][0][-1] == "3"
|
| 308 |
+
and not merge_last[i - 1]
|
| 309 |
+
):
|
| 310 |
+
# if the last word is reduplication, not merge, because reduplication need to be _neural_sandhi
|
| 311 |
+
if not self._is_reduplication(seg[i - 1][0]) and len(seg[i - 1][0]) + len(seg[i][0]) <= 3:
|
| 312 |
+
new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
|
| 313 |
+
merge_last[i] = True
|
| 314 |
+
else:
|
| 315 |
+
new_seg.append([word, pos])
|
| 316 |
+
else:
|
| 317 |
+
new_seg.append([word, pos])
|
| 318 |
+
return new_seg
|
| 319 |
+
|
| 320 |
+
@staticmethod
|
| 321 |
+
def _merge_er(seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
| 322 |
+
new_seg = []
|
| 323 |
+
for i, (word, pos) in enumerate(seg):
|
| 324 |
+
if i - 1 >= 0 and word == "儿" and seg[i - 1][0] != "#":
|
| 325 |
+
new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
|
| 326 |
+
else:
|
| 327 |
+
new_seg.append([word, pos])
|
| 328 |
+
return new_seg
|
| 329 |
+
|
| 330 |
+
@staticmethod
|
| 331 |
+
def _merge_reduplication(seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
| 332 |
+
new_seg = []
|
| 333 |
+
for i, (word, pos) in enumerate(seg):
|
| 334 |
+
if new_seg and word == new_seg[-1][0]:
|
| 335 |
+
new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
|
| 336 |
+
else:
|
| 337 |
+
new_seg.append([word, pos])
|
| 338 |
+
return new_seg
|
| 339 |
+
|
| 340 |
+
def pre_merge_for_modify(self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
| 341 |
+
seg = self._merge_bu(seg)
|
| 342 |
+
seg = self._merge_yi(seg)
|
| 343 |
+
seg = self._merge_reduplication(seg)
|
| 344 |
+
seg = self._merge_continuous_three_tones(seg)
|
| 345 |
+
seg = self._merge_continuous_three_tones_2(seg)
|
| 346 |
+
seg = self._merge_er(seg)
|
| 347 |
+
return seg
|
| 348 |
+
|
| 349 |
+
def modified_tone(self, word: str, pos: str, finals: List[str]) -> List[str]:
|
| 350 |
+
finals = self._bu_sandhi(word, finals)
|
| 351 |
+
finals = self._yi_sandhi(word, finals)
|
| 352 |
+
finals = self._neural_sandhi(word, pos, finals)
|
| 353 |
+
finals = self._three_sandhi(word, finals)
|
| 354 |
+
return finals
|
genie_tts/G2P/Chinese/__pycache__/ChineseG2P.cpython-311.pyc
ADDED
|
Binary file (11.9 kB). View file
|
|
|
genie_tts/G2P/Chinese/__pycache__/CorrectPronunciation.cpython-311.pyc
ADDED
|
Binary file (2.99 kB). View file
|
|
|
genie_tts/G2P/Chinese/__pycache__/Erhua.cpython-311.pyc
ADDED
|
Binary file (2.88 kB). View file
|
|
|
genie_tts/G2P/Chinese/__pycache__/ToneSandhi.cpython-311.pyc
ADDED
|
Binary file (23.8 kB). View file
|
|
|
genie_tts/G2P/Chinese/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (190 Bytes). View file
|
|
|
genie_tts/G2P/English/EnglishG2P.py
CHANGED
|
@@ -1,296 +1,296 @@
|
|
| 1 |
-
import pickle
|
| 2 |
-
import os
|
| 3 |
-
import re
|
| 4 |
-
from typing import List, Dict, Tuple
|
| 5 |
-
|
| 6 |
-
import numpy as np
|
| 7 |
-
import nltk
|
| 8 |
-
from nltk.tokenize import TweetTokenizer
|
| 9 |
-
from nltk import pos_tag
|
| 10 |
-
|
| 11 |
-
from .Normalization import normalize
|
| 12 |
-
from .WordSegment import segment_text
|
| 13 |
-
from ..SymbolsV2 import symbols_v2, symbol_to_id_v2
|
| 14 |
-
from ..SymbolsV2 import PUNCTUATION
|
| 15 |
-
from ...Core.Resources import English_G2P_DIR
|
| 16 |
-
|
| 17 |
-
# nltk 路径和分词器初始化
|
| 18 |
-
nltk.data.path.append(English_G2P_DIR)
|
| 19 |
-
word_tokenize = TweetTokenizer().tokenize
|
| 20 |
-
|
| 21 |
-
# 路径定义
|
| 22 |
-
CMU_DICT_PATH = os.path.join(English_G2P_DIR, "cmudict.rep")
|
| 23 |
-
CMU_DICT_FAST_PATH = os.path.join(English_G2P_DIR, "cmudict-fast.rep")
|
| 24 |
-
CMU_DICT_HOT_PATH = os.path.join(English_G2P_DIR, "engdict-hot.rep")
|
| 25 |
-
CACHE_PATH = os.path.join(English_G2P_DIR, "engdict_cache.pickle")
|
| 26 |
-
NAMECACHE_PATH = os.path.join(English_G2P_DIR, "namedict_cache.pickle")
|
| 27 |
-
MODEL_PATH = os.path.join(English_G2P_DIR, "checkpoint20.npz")
|
| 28 |
-
|
| 29 |
-
# 正则表达式和映射
|
| 30 |
-
REP_MAP = {
|
| 31 |
-
"[;::,;]": ",",
|
| 32 |
-
'["’]': "'",
|
| 33 |
-
"。": ".",
|
| 34 |
-
"!": "!",
|
| 35 |
-
"?": "?",
|
| 36 |
-
}
|
| 37 |
-
REP_MAP_PATTERN = re.compile("|".join(re.escape(p) for p in REP_MAP.keys()))
|
| 38 |
-
PUNCTUATIONS_FOR_REGEX = "".join(re.escape(p) for p in PUNCTUATION)
|
| 39 |
-
CONSECUTIVE_PUNCTUATION_PATTERN = re.compile(rf"([{PUNCTUATIONS_FOR_REGEX}\s])([{PUNCTUATIONS_FOR_REGEX}])+")
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
# 辅助函数
|
| 43 |
-
def _read_cmu_dict(file_path: str) -> Dict[str, List[str]]:
|
| 44 |
-
g2p_dict = {}
|
| 45 |
-
with open(file_path, 'r', encoding='utf-8') as f:
|
| 46 |
-
for line in f:
|
| 47 |
-
line = line.strip()
|
| 48 |
-
if not line or line.startswith(';;;'): continue
|
| 49 |
-
parts = re.split(r'\s+', line, maxsplit=1)
|
| 50 |
-
if len(parts) < 2: continue
|
| 51 |
-
word, pron_str = parts[0].lower(), parts[1]
|
| 52 |
-
pron = pron_str.split(" ")
|
| 53 |
-
word = re.sub(r'\(\d+\)$', '', word)
|
| 54 |
-
if word not in g2p_dict: g2p_dict[word] = [pron]
|
| 55 |
-
return g2p_dict
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
def _load_and_cache_dict() -> Dict[str, List[List[str]]]:
|
| 59 |
-
with open(CACHE_PATH, "rb") as f:
|
| 60 |
-
g2p_dict = pickle.load(f)
|
| 61 |
-
hot_dict = _read_cmu_dict(CMU_DICT_HOT_PATH)
|
| 62 |
-
if hot_dict: g2p_dict.update(hot_dict)
|
| 63 |
-
return g2p_dict
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
def replace_phs(phs: List[str]) -> List[str]:
|
| 67 |
-
rep_map = {"'": "-"}
|
| 68 |
-
phs_new = []
|
| 69 |
-
for ph in phs:
|
| 70 |
-
if ph in symbols_v2:
|
| 71 |
-
phs_new.append(ph)
|
| 72 |
-
elif ph in rep_map:
|
| 73 |
-
phs_new.append(rep_map[ph])
|
| 74 |
-
return phs_new
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
def replace_consecutive_punctuation(text: str) -> str:
|
| 78 |
-
return CONSECUTIVE_PUNCTUATION_PATTERN.sub(r"\1", text)
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
def text_normalize(text: str) -> str:
|
| 82 |
-
text = REP_MAP_PATTERN.sub(lambda x: REP_MAP[x.group()], text)
|
| 83 |
-
text = normalize(text)
|
| 84 |
-
text = replace_consecutive_punctuation(text)
|
| 85 |
-
return text
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
class CleanG2p:
|
| 89 |
-
"""
|
| 90 |
-
一个集成了神经网络预测功能的、独立的英文G2P转换器。
|
| 91 |
-
- 不再依赖 g2p_en 库,将模型推理逻辑直接内置。
|
| 92 |
-
- 依赖 numpy 库进行计算。
|
| 93 |
-
"""
|
| 94 |
-
|
| 95 |
-
def __init__(self):
|
| 96 |
-
# 1. 初始化标准组件
|
| 97 |
-
self.cmu = _load_and_cache_dict()
|
| 98 |
-
self.namedict = self._load_name_dict()
|
| 99 |
-
for word in ["AE", "AI", "AR", "IOS", "HUD", "OS"]:
|
| 100 |
-
self.cmu.pop(word.lower(), None)
|
| 101 |
-
self._setup_homographs()
|
| 102 |
-
|
| 103 |
-
# 2. 初始化神经网络模型组件
|
| 104 |
-
self._setup_nn_components()
|
| 105 |
-
self._load_nn_model()
|
| 106 |
-
|
| 107 |
-
def _setup_nn_components(self):
|
| 108 |
-
"""设置 G2P 神经网络所需的字母和音素表。"""
|
| 109 |
-
self.graphemes = ["<pad>", "<unk>", "</s>"] + list("abcdefghijklmnopqrstuvwxyz")
|
| 110 |
-
self.phonemes = ["<pad>", "<unk>", "<s>", "</s>"] + ['AA0', 'AA1', 'AA2', 'AE0', 'AE1', 'AE2', 'AH0', 'AH1',
|
| 111 |
-
'AH2', 'AO0',
|
| 112 |
-
'AO1', 'AO2', 'AW0', 'AW1', 'AW2', 'AY0', 'AY1', 'AY2',
|
| 113 |
-
'B', 'CH', 'D', 'DH',
|
| 114 |
-
'EH0', 'EH1', 'EH2', 'ER0', 'ER1', 'ER2', 'EY0', 'EY1',
|
| 115 |
-
'EY2', 'F', 'G', 'HH',
|
| 116 |
-
'IH0', 'IH1', 'IH2', 'IY0', 'IY1', 'IY2', 'JH', 'K', 'L',
|
| 117 |
-
'M', 'N', 'NG', 'OW0', 'OW1',
|
| 118 |
-
'OW2', 'OY0', 'OY1', 'OY2', 'P', 'R', 'S', 'SH', 'T', 'TH',
|
| 119 |
-
'UH0', 'UH1', 'UH2', 'UW',
|
| 120 |
-
'UW0', 'UW1', 'UW2', 'V', 'W', 'Y', 'Z', 'ZH']
|
| 121 |
-
self.g2idx = {g: idx for idx, g in enumerate(self.graphemes)}
|
| 122 |
-
self.idx2g = {idx: g for idx, g in enumerate(self.graphemes)}
|
| 123 |
-
self.p2idx = {p: idx for idx, p in enumerate(self.phonemes)}
|
| 124 |
-
self.idx2p = {idx: p for idx, p in enumerate(self.phonemes)}
|
| 125 |
-
|
| 126 |
-
def _load_nn_model(self):
|
| 127 |
-
"""从 .npz 文件加载预训练的神经网络权重。"""
|
| 128 |
-
if not os.path.exists(MODEL_PATH):
|
| 129 |
-
raise FileNotFoundError(f"G2P model file not found at: {MODEL_PATH}. "
|
| 130 |
-
f"Please ensure 'checkpoint20.npz' is in the correct directory.")
|
| 131 |
-
|
| 132 |
-
variables = np.load(MODEL_PATH)
|
| 133 |
-
self.enc_emb = variables["enc_emb"]
|
| 134 |
-
self.enc_w_ih = variables["enc_w_ih"]
|
| 135 |
-
self.enc_w_hh = variables["enc_w_hh"]
|
| 136 |
-
self.enc_b_ih = variables["enc_b_ih"]
|
| 137 |
-
self.enc_b_hh = variables["enc_b_hh"]
|
| 138 |
-
self.dec_emb = variables["dec_emb"]
|
| 139 |
-
self.dec_w_ih = variables["dec_w_ih"]
|
| 140 |
-
self.dec_w_hh = variables["dec_w_hh"]
|
| 141 |
-
self.dec_b_ih = variables["dec_b_ih"]
|
| 142 |
-
self.dec_b_hh = variables["dec_b_hh"]
|
| 143 |
-
self.fc_w = variables["fc_w"]
|
| 144 |
-
self.fc_b = variables["fc_b"]
|
| 145 |
-
# logger.info("G2P neural network model loaded successfully.")
|
| 146 |
-
|
| 147 |
-
@staticmethod
|
| 148 |
-
def _sigmoid(x):
|
| 149 |
-
return 1 / (1 + np.exp(-x))
|
| 150 |
-
|
| 151 |
-
def _grucell(self, x, h, w_ih, w_hh, b_ih, b_hh):
|
| 152 |
-
rzn_ih = np.matmul(x, w_ih.T) + b_ih
|
| 153 |
-
rzn_hh = np.matmul(h, w_hh.T) + b_hh
|
| 154 |
-
rz_ih, n_ih = rzn_ih[:, :rzn_ih.shape[-1] * 2 // 3], rzn_ih[:, rzn_ih.shape[-1] * 2 // 3:]
|
| 155 |
-
rz_hh, n_hh = rzn_hh[:, :rzn_hh.shape[-1] * 2 // 3], rzn_hh[:, rzn_hh.shape[-1] * 2 // 3:]
|
| 156 |
-
rz = self._sigmoid(rz_ih + rz_hh)
|
| 157 |
-
r, z = np.split(rz, 2, -1)
|
| 158 |
-
n = np.tanh(n_ih + r * n_hh)
|
| 159 |
-
h = (1 - z) * n + z * h
|
| 160 |
-
return h
|
| 161 |
-
|
| 162 |
-
def _gru(self, x, steps, w_ih, w_hh, b_ih, b_hh, h0=None):
|
| 163 |
-
if h0 is None:
|
| 164 |
-
h0 = np.zeros((x.shape[0], w_hh.shape[1]), np.float32)
|
| 165 |
-
h = h0
|
| 166 |
-
outputs = np.zeros((x.shape[0], steps, w_hh.shape[1]), np.float32)
|
| 167 |
-
for t in range(steps):
|
| 168 |
-
h = self._grucell(x[:, t, :], h, w_ih, w_hh, b_ih, b_hh)
|
| 169 |
-
outputs[:, t, ::] = h
|
| 170 |
-
return outputs
|
| 171 |
-
|
| 172 |
-
def _encode(self, word: str) -> np.ndarray:
|
| 173 |
-
chars = list(word.lower()) + ["</s>"]
|
| 174 |
-
x = [self.g2idx.get(char, self.g2idx["<unk>"]) for char in chars]
|
| 175 |
-
x = np.take(self.enc_emb, np.expand_dims(x, 0), axis=0)
|
| 176 |
-
return x
|
| 177 |
-
|
| 178 |
-
def predict(self, word: str) -> List[str]:
|
| 179 |
-
"""使用内置的神经网络模型预测单词的发音。"""
|
| 180 |
-
# Encoder
|
| 181 |
-
enc = self._encode(word)
|
| 182 |
-
enc = self._gru(enc, len(word) + 1, self.enc_w_ih, self.enc_w_hh,
|
| 183 |
-
self.enc_b_ih, self.enc_b_hh, h0=np.zeros((1, self.enc_w_hh.shape[-1]), np.float32))
|
| 184 |
-
last_hidden = enc[:, -1, :]
|
| 185 |
-
|
| 186 |
-
# Decoder
|
| 187 |
-
dec = np.take(self.dec_emb, [self.p2idx["<s>"]], axis=0) # Start with <s>
|
| 188 |
-
h = last_hidden
|
| 189 |
-
preds = []
|
| 190 |
-
for _ in range(20): # Max steps
|
| 191 |
-
h = self._grucell(dec, h, self.dec_w_ih, self.dec_w_hh, self.dec_b_ih, self.dec_b_hh)
|
| 192 |
-
logits = np.matmul(h, self.fc_w.T) + self.fc_b
|
| 193 |
-
pred_idx = logits.argmax()
|
| 194 |
-
if pred_idx == self.p2idx["</s>"]: break
|
| 195 |
-
preds.append(pred_idx)
|
| 196 |
-
dec = np.take(self.dec_emb, [pred_idx], axis=0)
|
| 197 |
-
|
| 198 |
-
return [self.idx2p.get(idx, "<unk>") for idx in preds]
|
| 199 |
-
|
| 200 |
-
# --- 标准 G2P 逻辑 ---
|
| 201 |
-
|
| 202 |
-
@staticmethod
|
| 203 |
-
def _load_name_dict() -> Dict[str, List[List[str]]]:
|
| 204 |
-
if os.path.exists(NAMECACHE_PATH):
|
| 205 |
-
with open(NAMECACHE_PATH, "rb") as f: return pickle.load(f)
|
| 206 |
-
return {}
|
| 207 |
-
|
| 208 |
-
def _setup_homographs(self):
|
| 209 |
-
self.homograph2features: Dict[str, Tuple[List[str], List[str], str]] = {
|
| 210 |
-
"read": (["R", "EH1", "D"], ["R", "IY1", "D"], "VBD"),
|
| 211 |
-
"complex": (["K", "AH0", "M", "P", "L", "EH1", "K", "S"], ["K", "AA1", "M", "P", "L", "EH0", "K", "S"],
|
| 212 |
-
"JJ"),
|
| 213 |
-
"lead": (["L", "IY1", "D"], ["L", "EH1", "D"], "NN"),
|
| 214 |
-
"presents": (["P", "R", "IY0", "Z", "EH1", "N", "T", "S"], ["P", "R", "EH1", "Z", "AH0", "N", "T", "S"],
|
| 215 |
-
"VBZ"),
|
| 216 |
-
}
|
| 217 |
-
|
| 218 |
-
def __call__(self, text: str) -> List[str]:
|
| 219 |
-
original_words = word_tokenize(text)
|
| 220 |
-
normalized_text = text_normalize(text)
|
| 221 |
-
normalized_words = word_tokenize(normalized_text)
|
| 222 |
-
|
| 223 |
-
corrected_words = []
|
| 224 |
-
original_idx, normalized_idx = 0, 0
|
| 225 |
-
while original_idx < len(original_words) and normalized_idx < len(normalized_words):
|
| 226 |
-
if original_words[original_idx] == "I" and \
|
| 227 |
-
" ".join(normalized_words[normalized_idx:normalized_idx + 2]) == "the first":
|
| 228 |
-
corrected_words.append("I")
|
| 229 |
-
original_idx += 1
|
| 230 |
-
normalized_idx += 2
|
| 231 |
-
else:
|
| 232 |
-
corrected_words.append(normalized_words[normalized_idx])
|
| 233 |
-
original_idx += 1
|
| 234 |
-
normalized_idx += 1
|
| 235 |
-
if normalized_idx < len(normalized_words):
|
| 236 |
-
corrected_words.extend(normalized_words[normalized_idx:])
|
| 237 |
-
|
| 238 |
-
if not corrected_words: return []
|
| 239 |
-
|
| 240 |
-
tokens = pos_tag(corrected_words)
|
| 241 |
-
prons = []
|
| 242 |
-
for o_word, pos in tokens:
|
| 243 |
-
word = o_word.lower()
|
| 244 |
-
if re.search("[a-z]", word) is None:
|
| 245 |
-
pron = [word]
|
| 246 |
-
elif word in self.homograph2features:
|
| 247 |
-
pron1, pron2, pos1 = self.homograph2features[word]
|
| 248 |
-
pron = pron1 if pos.startswith(pos1) else pron2
|
| 249 |
-
else:
|
| 250 |
-
pron = self._query_word(o_word)
|
| 251 |
-
prons.extend(pron)
|
| 252 |
-
prons.extend([" "])
|
| 253 |
-
return prons[:-1] if prons else []
|
| 254 |
-
|
| 255 |
-
def _query_word(self, o_word: str) -> List[str]:
|
| 256 |
-
word = o_word.lower()
|
| 257 |
-
if word in self.cmu:
|
| 258 |
-
if o_word == "A": return ["AH0"]
|
| 259 |
-
return self.cmu[word][0]
|
| 260 |
-
if o_word.istitle() and word in self.namedict:
|
| 261 |
-
return self.namedict[word][0]
|
| 262 |
-
if word.endswith("'s") and len(word) > 2:
|
| 263 |
-
base_pron = self._query_word(word[:-2])
|
| 264 |
-
if base_pron:
|
| 265 |
-
last_ph = base_pron[-1]
|
| 266 |
-
if last_ph in {"S", "Z", "SH", "ZH", "CH", "JH"}: return base_pron + ["AH0", "Z"]
|
| 267 |
-
if last_ph in {"P", "T", "K", "F", "TH"}: return base_pron + ["S"]
|
| 268 |
-
return base_pron + ["Z"]
|
| 269 |
-
if "-" in word and len(word) > 1:
|
| 270 |
-
parts = [p for p in word.split("-") if p]
|
| 271 |
-
if len(parts) > 1:
|
| 272 |
-
result = [ph for part in parts for ph in self._query_word(part)]
|
| 273 |
-
if result: return result
|
| 274 |
-
segments = segment_text(word)
|
| 275 |
-
if len(segments) > 1 and "".join(segments) == word:
|
| 276 |
-
result = [ph for segment in segments for ph in self._query_word(segment)]
|
| 277 |
-
if result: return result
|
| 278 |
-
|
| 279 |
-
return self.predict(o_word)
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
_g2p_instance: CleanG2p = CleanG2p()
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
def g2p(text: str) -> List[str]:
|
| 286 |
-
if _g2p_instance is None: raise RuntimeError("G2P model is not available.")
|
| 287 |
-
raw_phonemes = _g2p_instance(text)
|
| 288 |
-
undesired = {" ", "<pad>", "UW", "</s>", "<s>"}
|
| 289 |
-
phones = ["UNK" if ph == "<unk>" else ph for ph in raw_phonemes if ph not in undesired]
|
| 290 |
-
return replace_phs(phones)
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
def english_to_phones(text: str) -> List[int]:
|
| 294 |
-
phones = g2p(text)
|
| 295 |
-
phones = [symbol_to_id_v2[ph] for ph in phones]
|
| 296 |
-
return phones
|
|
|
|
| 1 |
+
import pickle
|
| 2 |
+
import os
|
| 3 |
+
import re
|
| 4 |
+
from typing import List, Dict, Tuple
|
| 5 |
+
|
| 6 |
+
import numpy as np
|
| 7 |
+
import nltk
|
| 8 |
+
from nltk.tokenize import TweetTokenizer
|
| 9 |
+
from nltk import pos_tag
|
| 10 |
+
|
| 11 |
+
from .Normalization import normalize
|
| 12 |
+
from .WordSegment import segment_text
|
| 13 |
+
from ..SymbolsV2 import symbols_v2, symbol_to_id_v2
|
| 14 |
+
from ..SymbolsV2 import PUNCTUATION
|
| 15 |
+
from ...Core.Resources import English_G2P_DIR
|
| 16 |
+
|
| 17 |
+
# nltk 路径和分词器初始化
|
| 18 |
+
nltk.data.path.append(English_G2P_DIR)
|
| 19 |
+
word_tokenize = TweetTokenizer().tokenize
|
| 20 |
+
|
| 21 |
+
# 路径定义
|
| 22 |
+
CMU_DICT_PATH = os.path.join(English_G2P_DIR, "cmudict.rep")
|
| 23 |
+
CMU_DICT_FAST_PATH = os.path.join(English_G2P_DIR, "cmudict-fast.rep")
|
| 24 |
+
CMU_DICT_HOT_PATH = os.path.join(English_G2P_DIR, "engdict-hot.rep")
|
| 25 |
+
CACHE_PATH = os.path.join(English_G2P_DIR, "engdict_cache.pickle")
|
| 26 |
+
NAMECACHE_PATH = os.path.join(English_G2P_DIR, "namedict_cache.pickle")
|
| 27 |
+
MODEL_PATH = os.path.join(English_G2P_DIR, "checkpoint20.npz")
|
| 28 |
+
|
| 29 |
+
# 正则表达式和映射
|
| 30 |
+
REP_MAP = {
|
| 31 |
+
"[;::,;]": ",",
|
| 32 |
+
'["’]': "'",
|
| 33 |
+
"。": ".",
|
| 34 |
+
"!": "!",
|
| 35 |
+
"?": "?",
|
| 36 |
+
}
|
| 37 |
+
REP_MAP_PATTERN = re.compile("|".join(re.escape(p) for p in REP_MAP.keys()))
|
| 38 |
+
PUNCTUATIONS_FOR_REGEX = "".join(re.escape(p) for p in PUNCTUATION)
|
| 39 |
+
CONSECUTIVE_PUNCTUATION_PATTERN = re.compile(rf"([{PUNCTUATIONS_FOR_REGEX}\s])([{PUNCTUATIONS_FOR_REGEX}])+")
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
# 辅助函数
|
| 43 |
+
def _read_cmu_dict(file_path: str) -> Dict[str, List[str]]:
|
| 44 |
+
g2p_dict = {}
|
| 45 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 46 |
+
for line in f:
|
| 47 |
+
line = line.strip()
|
| 48 |
+
if not line or line.startswith(';;;'): continue
|
| 49 |
+
parts = re.split(r'\s+', line, maxsplit=1)
|
| 50 |
+
if len(parts) < 2: continue
|
| 51 |
+
word, pron_str = parts[0].lower(), parts[1]
|
| 52 |
+
pron = pron_str.split(" ")
|
| 53 |
+
word = re.sub(r'\(\d+\)$', '', word)
|
| 54 |
+
if word not in g2p_dict: g2p_dict[word] = [pron]
|
| 55 |
+
return g2p_dict
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def _load_and_cache_dict() -> Dict[str, List[List[str]]]:
|
| 59 |
+
with open(CACHE_PATH, "rb") as f:
|
| 60 |
+
g2p_dict = pickle.load(f)
|
| 61 |
+
hot_dict = _read_cmu_dict(CMU_DICT_HOT_PATH)
|
| 62 |
+
if hot_dict: g2p_dict.update(hot_dict)
|
| 63 |
+
return g2p_dict
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def replace_phs(phs: List[str]) -> List[str]:
|
| 67 |
+
rep_map = {"'": "-"}
|
| 68 |
+
phs_new = []
|
| 69 |
+
for ph in phs:
|
| 70 |
+
if ph in symbols_v2:
|
| 71 |
+
phs_new.append(ph)
|
| 72 |
+
elif ph in rep_map:
|
| 73 |
+
phs_new.append(rep_map[ph])
|
| 74 |
+
return phs_new
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def replace_consecutive_punctuation(text: str) -> str:
|
| 78 |
+
return CONSECUTIVE_PUNCTUATION_PATTERN.sub(r"\1", text)
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def text_normalize(text: str) -> str:
|
| 82 |
+
text = REP_MAP_PATTERN.sub(lambda x: REP_MAP[x.group()], text)
|
| 83 |
+
text = normalize(text)
|
| 84 |
+
text = replace_consecutive_punctuation(text)
|
| 85 |
+
return text
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
class CleanG2p:
|
| 89 |
+
"""
|
| 90 |
+
一个集成了神经网络预测功能的、独立的英文G2P转换器。
|
| 91 |
+
- 不再依赖 g2p_en 库,将模型推理逻辑直接内置。
|
| 92 |
+
- 依赖 numpy 库进行计算。
|
| 93 |
+
"""
|
| 94 |
+
|
| 95 |
+
def __init__(self):
|
| 96 |
+
# 1. 初始化标准组件
|
| 97 |
+
self.cmu = _load_and_cache_dict()
|
| 98 |
+
self.namedict = self._load_name_dict()
|
| 99 |
+
for word in ["AE", "AI", "AR", "IOS", "HUD", "OS"]:
|
| 100 |
+
self.cmu.pop(word.lower(), None)
|
| 101 |
+
self._setup_homographs()
|
| 102 |
+
|
| 103 |
+
# 2. 初始化神经网络模型组件
|
| 104 |
+
self._setup_nn_components()
|
| 105 |
+
self._load_nn_model()
|
| 106 |
+
|
| 107 |
+
def _setup_nn_components(self):
|
| 108 |
+
"""设置 G2P 神经网络所需的字母和音素表。"""
|
| 109 |
+
self.graphemes = ["<pad>", "<unk>", "</s>"] + list("abcdefghijklmnopqrstuvwxyz")
|
| 110 |
+
self.phonemes = ["<pad>", "<unk>", "<s>", "</s>"] + ['AA0', 'AA1', 'AA2', 'AE0', 'AE1', 'AE2', 'AH0', 'AH1',
|
| 111 |
+
'AH2', 'AO0',
|
| 112 |
+
'AO1', 'AO2', 'AW0', 'AW1', 'AW2', 'AY0', 'AY1', 'AY2',
|
| 113 |
+
'B', 'CH', 'D', 'DH',
|
| 114 |
+
'EH0', 'EH1', 'EH2', 'ER0', 'ER1', 'ER2', 'EY0', 'EY1',
|
| 115 |
+
'EY2', 'F', 'G', 'HH',
|
| 116 |
+
'IH0', 'IH1', 'IH2', 'IY0', 'IY1', 'IY2', 'JH', 'K', 'L',
|
| 117 |
+
'M', 'N', 'NG', 'OW0', 'OW1',
|
| 118 |
+
'OW2', 'OY0', 'OY1', 'OY2', 'P', 'R', 'S', 'SH', 'T', 'TH',
|
| 119 |
+
'UH0', 'UH1', 'UH2', 'UW',
|
| 120 |
+
'UW0', 'UW1', 'UW2', 'V', 'W', 'Y', 'Z', 'ZH']
|
| 121 |
+
self.g2idx = {g: idx for idx, g in enumerate(self.graphemes)}
|
| 122 |
+
self.idx2g = {idx: g for idx, g in enumerate(self.graphemes)}
|
| 123 |
+
self.p2idx = {p: idx for idx, p in enumerate(self.phonemes)}
|
| 124 |
+
self.idx2p = {idx: p for idx, p in enumerate(self.phonemes)}
|
| 125 |
+
|
| 126 |
+
def _load_nn_model(self):
|
| 127 |
+
"""从 .npz 文件加载预训练的神经网络权重。"""
|
| 128 |
+
if not os.path.exists(MODEL_PATH):
|
| 129 |
+
raise FileNotFoundError(f"G2P model file not found at: {MODEL_PATH}. "
|
| 130 |
+
f"Please ensure 'checkpoint20.npz' is in the correct directory.")
|
| 131 |
+
|
| 132 |
+
variables = np.load(MODEL_PATH)
|
| 133 |
+
self.enc_emb = variables["enc_emb"]
|
| 134 |
+
self.enc_w_ih = variables["enc_w_ih"]
|
| 135 |
+
self.enc_w_hh = variables["enc_w_hh"]
|
| 136 |
+
self.enc_b_ih = variables["enc_b_ih"]
|
| 137 |
+
self.enc_b_hh = variables["enc_b_hh"]
|
| 138 |
+
self.dec_emb = variables["dec_emb"]
|
| 139 |
+
self.dec_w_ih = variables["dec_w_ih"]
|
| 140 |
+
self.dec_w_hh = variables["dec_w_hh"]
|
| 141 |
+
self.dec_b_ih = variables["dec_b_ih"]
|
| 142 |
+
self.dec_b_hh = variables["dec_b_hh"]
|
| 143 |
+
self.fc_w = variables["fc_w"]
|
| 144 |
+
self.fc_b = variables["fc_b"]
|
| 145 |
+
# logger.info("G2P neural network model loaded successfully.")
|
| 146 |
+
|
| 147 |
+
@staticmethod
|
| 148 |
+
def _sigmoid(x):
|
| 149 |
+
return 1 / (1 + np.exp(-x))
|
| 150 |
+
|
| 151 |
+
def _grucell(self, x, h, w_ih, w_hh, b_ih, b_hh):
|
| 152 |
+
rzn_ih = np.matmul(x, w_ih.T) + b_ih
|
| 153 |
+
rzn_hh = np.matmul(h, w_hh.T) + b_hh
|
| 154 |
+
rz_ih, n_ih = rzn_ih[:, :rzn_ih.shape[-1] * 2 // 3], rzn_ih[:, rzn_ih.shape[-1] * 2 // 3:]
|
| 155 |
+
rz_hh, n_hh = rzn_hh[:, :rzn_hh.shape[-1] * 2 // 3], rzn_hh[:, rzn_hh.shape[-1] * 2 // 3:]
|
| 156 |
+
rz = self._sigmoid(rz_ih + rz_hh)
|
| 157 |
+
r, z = np.split(rz, 2, -1)
|
| 158 |
+
n = np.tanh(n_ih + r * n_hh)
|
| 159 |
+
h = (1 - z) * n + z * h
|
| 160 |
+
return h
|
| 161 |
+
|
| 162 |
+
def _gru(self, x, steps, w_ih, w_hh, b_ih, b_hh, h0=None):
|
| 163 |
+
if h0 is None:
|
| 164 |
+
h0 = np.zeros((x.shape[0], w_hh.shape[1]), np.float32)
|
| 165 |
+
h = h0
|
| 166 |
+
outputs = np.zeros((x.shape[0], steps, w_hh.shape[1]), np.float32)
|
| 167 |
+
for t in range(steps):
|
| 168 |
+
h = self._grucell(x[:, t, :], h, w_ih, w_hh, b_ih, b_hh)
|
| 169 |
+
outputs[:, t, ::] = h
|
| 170 |
+
return outputs
|
| 171 |
+
|
| 172 |
+
def _encode(self, word: str) -> np.ndarray:
|
| 173 |
+
chars = list(word.lower()) + ["</s>"]
|
| 174 |
+
x = [self.g2idx.get(char, self.g2idx["<unk>"]) for char in chars]
|
| 175 |
+
x = np.take(self.enc_emb, np.expand_dims(x, 0), axis=0)
|
| 176 |
+
return x
|
| 177 |
+
|
| 178 |
+
def predict(self, word: str) -> List[str]:
|
| 179 |
+
"""使用内置的神经网络模型预测单词的发音。"""
|
| 180 |
+
# Encoder
|
| 181 |
+
enc = self._encode(word)
|
| 182 |
+
enc = self._gru(enc, len(word) + 1, self.enc_w_ih, self.enc_w_hh,
|
| 183 |
+
self.enc_b_ih, self.enc_b_hh, h0=np.zeros((1, self.enc_w_hh.shape[-1]), np.float32))
|
| 184 |
+
last_hidden = enc[:, -1, :]
|
| 185 |
+
|
| 186 |
+
# Decoder
|
| 187 |
+
dec = np.take(self.dec_emb, [self.p2idx["<s>"]], axis=0) # Start with <s>
|
| 188 |
+
h = last_hidden
|
| 189 |
+
preds = []
|
| 190 |
+
for _ in range(20): # Max steps
|
| 191 |
+
h = self._grucell(dec, h, self.dec_w_ih, self.dec_w_hh, self.dec_b_ih, self.dec_b_hh)
|
| 192 |
+
logits = np.matmul(h, self.fc_w.T) + self.fc_b
|
| 193 |
+
pred_idx = logits.argmax()
|
| 194 |
+
if pred_idx == self.p2idx["</s>"]: break
|
| 195 |
+
preds.append(pred_idx)
|
| 196 |
+
dec = np.take(self.dec_emb, [pred_idx], axis=0)
|
| 197 |
+
|
| 198 |
+
return [self.idx2p.get(idx, "<unk>") for idx in preds]
|
| 199 |
+
|
| 200 |
+
# --- 标准 G2P 逻辑 ---
|
| 201 |
+
|
| 202 |
+
@staticmethod
|
| 203 |
+
def _load_name_dict() -> Dict[str, List[List[str]]]:
|
| 204 |
+
if os.path.exists(NAMECACHE_PATH):
|
| 205 |
+
with open(NAMECACHE_PATH, "rb") as f: return pickle.load(f)
|
| 206 |
+
return {}
|
| 207 |
+
|
| 208 |
+
def _setup_homographs(self):
|
| 209 |
+
self.homograph2features: Dict[str, Tuple[List[str], List[str], str]] = {
|
| 210 |
+
"read": (["R", "EH1", "D"], ["R", "IY1", "D"], "VBD"),
|
| 211 |
+
"complex": (["K", "AH0", "M", "P", "L", "EH1", "K", "S"], ["K", "AA1", "M", "P", "L", "EH0", "K", "S"],
|
| 212 |
+
"JJ"),
|
| 213 |
+
"lead": (["L", "IY1", "D"], ["L", "EH1", "D"], "NN"),
|
| 214 |
+
"presents": (["P", "R", "IY0", "Z", "EH1", "N", "T", "S"], ["P", "R", "EH1", "Z", "AH0", "N", "T", "S"],
|
| 215 |
+
"VBZ"),
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
def __call__(self, text: str) -> List[str]:
|
| 219 |
+
original_words = word_tokenize(text)
|
| 220 |
+
normalized_text = text_normalize(text)
|
| 221 |
+
normalized_words = word_tokenize(normalized_text)
|
| 222 |
+
|
| 223 |
+
corrected_words = []
|
| 224 |
+
original_idx, normalized_idx = 0, 0
|
| 225 |
+
while original_idx < len(original_words) and normalized_idx < len(normalized_words):
|
| 226 |
+
if original_words[original_idx] == "I" and \
|
| 227 |
+
" ".join(normalized_words[normalized_idx:normalized_idx + 2]) == "the first":
|
| 228 |
+
corrected_words.append("I")
|
| 229 |
+
original_idx += 1
|
| 230 |
+
normalized_idx += 2
|
| 231 |
+
else:
|
| 232 |
+
corrected_words.append(normalized_words[normalized_idx])
|
| 233 |
+
original_idx += 1
|
| 234 |
+
normalized_idx += 1
|
| 235 |
+
if normalized_idx < len(normalized_words):
|
| 236 |
+
corrected_words.extend(normalized_words[normalized_idx:])
|
| 237 |
+
|
| 238 |
+
if not corrected_words: return []
|
| 239 |
+
|
| 240 |
+
tokens = pos_tag(corrected_words)
|
| 241 |
+
prons = []
|
| 242 |
+
for o_word, pos in tokens:
|
| 243 |
+
word = o_word.lower()
|
| 244 |
+
if re.search("[a-z]", word) is None:
|
| 245 |
+
pron = [word]
|
| 246 |
+
elif word in self.homograph2features:
|
| 247 |
+
pron1, pron2, pos1 = self.homograph2features[word]
|
| 248 |
+
pron = pron1 if pos.startswith(pos1) else pron2
|
| 249 |
+
else:
|
| 250 |
+
pron = self._query_word(o_word)
|
| 251 |
+
prons.extend(pron)
|
| 252 |
+
prons.extend([" "])
|
| 253 |
+
return prons[:-1] if prons else []
|
| 254 |
+
|
| 255 |
+
def _query_word(self, o_word: str) -> List[str]:
|
| 256 |
+
word = o_word.lower()
|
| 257 |
+
if word in self.cmu:
|
| 258 |
+
if o_word == "A": return ["AH0"]
|
| 259 |
+
return self.cmu[word][0]
|
| 260 |
+
if o_word.istitle() and word in self.namedict:
|
| 261 |
+
return self.namedict[word][0]
|
| 262 |
+
if word.endswith("'s") and len(word) > 2:
|
| 263 |
+
base_pron = self._query_word(word[:-2])
|
| 264 |
+
if base_pron:
|
| 265 |
+
last_ph = base_pron[-1]
|
| 266 |
+
if last_ph in {"S", "Z", "SH", "ZH", "CH", "JH"}: return base_pron + ["AH0", "Z"]
|
| 267 |
+
if last_ph in {"P", "T", "K", "F", "TH"}: return base_pron + ["S"]
|
| 268 |
+
return base_pron + ["Z"]
|
| 269 |
+
if "-" in word and len(word) > 1:
|
| 270 |
+
parts = [p for p in word.split("-") if p]
|
| 271 |
+
if len(parts) > 1:
|
| 272 |
+
result = [ph for part in parts for ph in self._query_word(part)]
|
| 273 |
+
if result: return result
|
| 274 |
+
segments = segment_text(word)
|
| 275 |
+
if len(segments) > 1 and "".join(segments) == word:
|
| 276 |
+
result = [ph for segment in segments for ph in self._query_word(segment)]
|
| 277 |
+
if result: return result
|
| 278 |
+
|
| 279 |
+
return self.predict(o_word)
|
| 280 |
+
|
| 281 |
+
|
| 282 |
+
_g2p_instance: CleanG2p = CleanG2p()
|
| 283 |
+
|
| 284 |
+
|
| 285 |
+
def g2p(text: str) -> List[str]:
|
| 286 |
+
if _g2p_instance is None: raise RuntimeError("G2P model is not available.")
|
| 287 |
+
raw_phonemes = _g2p_instance(text)
|
| 288 |
+
undesired = {" ", "<pad>", "UW", "</s>", "<s>"}
|
| 289 |
+
phones = ["UNK" if ph == "<unk>" else ph for ph in raw_phonemes if ph not in undesired]
|
| 290 |
+
return replace_phs(phones)
|
| 291 |
+
|
| 292 |
+
|
| 293 |
+
def english_to_phones(text: str) -> List[int]:
|
| 294 |
+
phones = g2p(text)
|
| 295 |
+
phones = [symbol_to_id_v2[ph] for ph in phones]
|
| 296 |
+
return phones
|