修复其他语言默认无法重定向到英语的错误
Browse files- app.py +613 -611
- i18n/i18n.py +4 -1
app.py
CHANGED
|
@@ -1,611 +1,613 @@
|
|
| 1 |
-
import time
|
| 2 |
-
import os
|
| 3 |
-
import logging
|
| 4 |
-
|
| 5 |
-
import gradio as gr
|
| 6 |
-
import numpy as np
|
| 7 |
-
import pandas as pd
|
| 8 |
-
from pypinyin import lazy_pinyin
|
| 9 |
-
from i18n import gettext, Translate
|
| 10 |
-
|
| 11 |
-
from api import generate_api, get_audio, generate_voice, load_characters_csv
|
| 12 |
-
from utils import get_length
|
| 13 |
-
|
| 14 |
-
# 翻译文件位置
|
| 15 |
-
trans_file = os.path.join(os.path.dirname(__file__), "i18n", "translations.json")
|
| 16 |
-
|
| 17 |
-
# 关闭aiohttp的DEBUG日志
|
| 18 |
-
logging.getLogger("aiohttp").setLevel(logging.WARNING)
|
| 19 |
-
# logging.getLogger("gradio").setLevel(logging.WARNING)
|
| 20 |
-
|
| 21 |
-
# 带有时间的log
|
| 22 |
-
logging.basicConfig(
|
| 23 |
-
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
| 24 |
-
)
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
header = """header"""
|
| 28 |
-
|
| 29 |
-
terms = "terms"
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
def update_all_characters(lang, category):
|
| 33 |
-
new_characters, category = load_characters_csv(lang)
|
| 34 |
-
initial_characters = get_characters(kind=category[0], all_characters=new_characters)
|
| 35 |
-
return (
|
| 36 |
-
new_characters,
|
| 37 |
-
initial_characters,
|
| 38 |
-
gr.Gallery(
|
| 39 |
-
value=[[char["头像"], char["名称"]] for char in initial_characters],
|
| 40 |
-
show_label=False,
|
| 41 |
-
elem_id="character_gallery",
|
| 42 |
-
columns=[11],
|
| 43 |
-
object_fit="contain",
|
| 44 |
-
height="auto",
|
| 45 |
-
interactive=False,
|
| 46 |
-
allow_preview=False,
|
| 47 |
-
selected_index=None,
|
| 48 |
-
),
|
| 49 |
-
category,
|
| 50 |
-
gr.update(choices=category, value=category[0])
|
| 51 |
-
)
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
def get_characters(
|
| 55 |
-
query=None, page=1, per_page=400, kind="原神", lang="zh", all_characters=None
|
| 56 |
-
):
|
| 57 |
-
# 使用传入的 all_characters 参数
|
| 58 |
-
filtered_characters = all_characters[all_characters["类别"] == kind]
|
| 59 |
-
|
| 60 |
-
if query:
|
| 61 |
-
# 使用拼音和汉字进行搜索
|
| 62 |
-
filtered_characters = filtered_characters[
|
| 63 |
-
filtered_characters["名称"].str.contains(query, case=False)
|
| 64 |
-
]
|
| 65 |
-
if filtered_characters.empty and lang == "zh":
|
| 66 |
-
filtered_characters = all_characters[all_characters["类别"] == kind]
|
| 67 |
-
filtered_characters = filtered_characters[
|
| 68 |
-
filtered_characters["名称"]
|
| 69 |
-
.apply(lambda x: "".join(lazy_pinyin(x)))
|
| 70 |
-
.str.contains(query, case=False)
|
| 71 |
-
]
|
| 72 |
-
|
| 73 |
-
# 按名称分组,并选择每组的第一个记录
|
| 74 |
-
unique_characters = (
|
| 75 |
-
filtered_characters.groupby("名称").first().reset_index().sort_values(by="id")
|
| 76 |
-
)
|
| 77 |
-
|
| 78 |
-
# 处理头像数据
|
| 79 |
-
import pickle
|
| 80 |
-
|
| 81 |
-
def process_avatar(avatar):
|
| 82 |
-
if not isinstance(avatar, str):
|
| 83 |
-
try:
|
| 84 |
-
return pickle.loads(bytes(avatar))
|
| 85 |
-
except:
|
| 86 |
-
return avatar
|
| 87 |
-
return avatar
|
| 88 |
-
|
| 89 |
-
unique_characters['头像'] = unique_characters['头像'].apply(process_avatar)
|
| 90 |
-
|
| 91 |
-
# 应用分页
|
| 92 |
-
start_index = (page - 1) * per_page
|
| 93 |
-
end_index = start_index + per_page
|
| 94 |
-
|
| 95 |
-
return unique_characters.iloc[start_index:end_index].to_dict("records")
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
async def generate(selected_character=None, selected_characters=[], text="", lang="zh"):
|
| 99 |
-
if selected_character:
|
| 100 |
-
characters = [selected_character] + selected_characters
|
| 101 |
-
else:
|
| 102 |
-
characters = selected_characters
|
| 103 |
-
if not selected_character and not selected_characters:
|
| 104 |
-
if lang == "zh":
|
| 105 |
-
raise gr.Error("请先选择一个角色")
|
| 106 |
-
elif lang == "en":
|
| 107 |
-
raise gr.Error("Please select a character first")
|
| 108 |
-
elif lang == "ja":
|
| 109 |
-
raise gr.Error("まず、キャラクターを選択してください")
|
| 110 |
-
elif lang == "ko":
|
| 111 |
-
raise gr.Error("먼저 캐릭터를 선택하세요")
|
| 112 |
-
voice_ids = [char.get("voice_id") for char in characters if char.get("voice_id")]
|
| 113 |
-
|
| 114 |
-
if not voice_ids:
|
| 115 |
-
raise gr.Error("所选角色没有关联的 voice_id")
|
| 116 |
-
|
| 117 |
-
start_time = time.time()
|
| 118 |
-
# 假设我们只使用第一个选择的角色的名称
|
| 119 |
-
if voice_ids == "1":
|
| 120 |
-
if lang == "zh":
|
| 121 |
-
raise gr.Error("该角色暂未创建语音")
|
| 122 |
-
elif lang == "en":
|
| 123 |
-
raise gr.Error("The character has not been created yet")
|
| 124 |
-
elif lang == "ja":
|
| 125 |
-
raise gr.Error("そのキャラクターの音声はまだ作成されていません")
|
| 126 |
-
elif lang == "ko":
|
| 127 |
-
raise gr.Error("해당 캐릭터의 음성이 아직 생성되지 않았습니다")
|
| 128 |
-
|
| 129 |
-
if text == "":
|
| 130 |
-
if lang == "zh":
|
| 131 |
-
raise gr.Error("请输入需要合成的文本")
|
| 132 |
-
elif lang == "en":
|
| 133 |
-
raise gr.Error("Please enter the text to be synthesized")
|
| 134 |
-
elif lang == "ja":
|
| 135 |
-
raise gr.Error("合成するテキストを入力してください")
|
| 136 |
-
elif lang == "ko":
|
| 137 |
-
raise gr.Error("합성할 텍스트를 입력하세요")
|
| 138 |
-
|
| 139 |
-
if get_length(text) > 1024:
|
| 140 |
-
if lang == "zh":
|
| 141 |
-
raise gr.Error("长度请控制在1024个字符以内")
|
| 142 |
-
elif lang == "en":
|
| 143 |
-
raise gr.Error("The text length exceeds 1024 words")
|
| 144 |
-
elif lang == "ja":
|
| 145 |
-
raise gr.Error("
|
| 146 |
-
elif lang == "ko":
|
| 147 |
-
raise gr.Error("텍스트 길이가 1024자를 초과합니다")
|
| 148 |
-
|
| 149 |
-
audio = await generate_api(voice_ids, text)
|
| 150 |
-
end_time = time.time()
|
| 151 |
-
if lang == "zh":
|
| 152 |
-
cost_time = f"合成共花费{end_time - start_time:.2f}秒"
|
| 153 |
-
elif lang == "en":
|
| 154 |
-
cost_time = (
|
| 155 |
-
f"Total time spent synthesizing: {end_time - start_time:.2f} seconds"
|
| 156 |
-
)
|
| 157 |
-
elif lang == "ja":
|
| 158 |
-
cost_time = f"合成にかかった時間: {end_time - start_time:.2f}秒"
|
| 159 |
-
elif lang == "ko":
|
| 160 |
-
cost_time = f"합성에 소요된 시간: {end_time - start_time:.2f}초"
|
| 161 |
-
if isinstance(audio, str):
|
| 162 |
-
print(audio)
|
| 163 |
-
raise gr.Error(audio)
|
| 164 |
-
else:
|
| 165 |
-
return audio, cost_time
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
def get_character_emotions(character, all_characters):
|
| 169 |
-
# 从all_characters中筛选出与当前角色名称相同的所有记录
|
| 170 |
-
character_records = all_characters[all_characters["名称"] == character["名称"]]
|
| 171 |
-
|
| 172 |
-
# 按情绪去重并获取完整的角色信息
|
| 173 |
-
character_infos = character_records.drop_duplicates(subset=["情绪"]).to_dict(
|
| 174 |
-
"records"
|
| 175 |
-
)
|
| 176 |
-
|
| 177 |
-
# 如果没有找到角色信息,返回一个包含默认值的字典
|
| 178 |
-
return (
|
| 179 |
-
character_infos
|
| 180 |
-
if character_infos
|
| 181 |
-
else [{"名称": character["名称"], "情绪": "默认情绪"}]
|
| 182 |
-
)
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
def update_character_info(character_name, emotion, current_character, all_characters):
|
| 186 |
-
character_info = None
|
| 187 |
-
if character_name and emotion:
|
| 188 |
-
character_info = all_characters[
|
| 189 |
-
(all_characters["名称"] == character_name)
|
| 190 |
-
& (all_characters["情绪"] == emotion)
|
| 191 |
-
]
|
| 192 |
-
if character_name == "":
|
| 193 |
-
return None
|
| 194 |
-
character_info = character_info.iloc[0].to_dict()
|
| 195 |
-
return character_info, all_characters
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
def add_new_voice(current_character, selected_characters, kind, lang, all_characters):
|
| 199 |
-
if not current_character:
|
| 200 |
-
if lang == "zh":
|
| 201 |
-
raise gr.Error("请先选择一个角色")
|
| 202 |
-
elif lang == "en":
|
| 203 |
-
raise gr.Error("Please select a character first")
|
| 204 |
-
elif lang == "ja":
|
| 205 |
-
raise gr.Error("まず、キャラクターを選択してください")
|
| 206 |
-
elif lang == "ko":
|
| 207 |
-
raise gr.Error("먼저 캐릭터를 선택하세요")
|
| 208 |
-
|
| 209 |
-
if len(selected_characters) >= 5:
|
| 210 |
-
raise gr.Error("已达到最大选择数(5个)")
|
| 211 |
-
|
| 212 |
-
# 检查是否已存在相同角色
|
| 213 |
-
existing_char = next(
|
| 214 |
-
(
|
| 215 |
-
char
|
| 216 |
-
for char in selected_characters
|
| 217 |
-
if char["名称"] == current_character["名称"]
|
| 218 |
-
),
|
| 219 |
-
None,
|
| 220 |
-
)
|
| 221 |
-
if existing_char:
|
| 222 |
-
# 如果情绪不同,更新情绪
|
| 223 |
-
if existing_char["情绪"] != current_character["情绪"]:
|
| 224 |
-
existing_char["情绪"] = current_character["情绪"]
|
| 225 |
-
else:
|
| 226 |
-
selected_characters.insert(0, current_character)
|
| 227 |
-
|
| 228 |
-
updated_characters = get_characters(
|
| 229 |
-
kind=kind, lang=lang, all_characters=all_characters
|
| 230 |
-
)
|
| 231 |
-
# ! 取消gallery选中状态,返回个新的gallery是必要的,否则会保留上一次的选中状态。这里sonnet很喜欢改成返回一个数组,但这不能清空gallery的选中状态
|
| 232 |
-
updated_gallery = gr.Gallery(
|
| 233 |
-
value=[[char["头像"], char["名称"]] for char in updated_characters],
|
| 234 |
-
show_label=False,
|
| 235 |
-
elem_id="character_gallery",
|
| 236 |
-
columns=[11],
|
| 237 |
-
object_fit="contain",
|
| 238 |
-
height="auto",
|
| 239 |
-
interactive=False,
|
| 240 |
-
allow_preview=False,
|
| 241 |
-
selected_index=None,
|
| 242 |
-
)
|
| 243 |
-
|
| 244 |
-
return (
|
| 245 |
-
None,
|
| 246 |
-
gr.update(value=""),
|
| 247 |
-
gr.update(choices=[]),
|
| 248 |
-
selected_characters,
|
| 249 |
-
updated_characters,
|
| 250 |
-
updated_gallery,
|
| 251 |
-
gr.update(visible=True),
|
| 252 |
-
all_characters,
|
| 253 |
-
)
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
def update_selected_chars_display(selected_characters):
|
| 257 |
-
updates = []
|
| 258 |
-
for i, (name, emotion, _, row) in enumerate(selected_chars_rows):
|
| 259 |
-
if i < len(selected_characters):
|
| 260 |
-
char = selected_characters[i]
|
| 261 |
-
updates.extend(
|
| 262 |
-
[
|
| 263 |
-
gr.update(value=char["名称"], visible=True),
|
| 264 |
-
gr.update(value=char["情绪"], visible=True),
|
| 265 |
-
gr.update(visible=True),
|
| 266 |
-
gr.update(visible=True),
|
| 267 |
-
]
|
| 268 |
-
)
|
| 269 |
-
else:
|
| 270 |
-
updates.extend(
|
| 271 |
-
[
|
| 272 |
-
gr.update(value="", visible=False),
|
| 273 |
-
gr.update(value="", visible=False),
|
| 274 |
-
gr.update(visible=False),
|
| 275 |
-
gr.update(visible=False),
|
| 276 |
-
]
|
| 277 |
-
)
|
| 278 |
-
return updates
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
def remove_character(index, selected_characters):
|
| 282 |
-
if 0 <= index < len(selected_characters):
|
| 283 |
-
del selected_characters[index]
|
| 284 |
-
return selected_characters, gr.update(visible=True)
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
def update_gallery(kind, query, all_characters):
|
| 288 |
-
updated_characters = get_characters(
|
| 289 |
-
kind=kind, query=query, lang=lang, all_characters=all_characters
|
| 290 |
-
)
|
| 291 |
-
return (
|
| 292 |
-
updated_characters,
|
| 293 |
-
[[char["头像"], char["名称"]] for char in updated_characters],
|
| 294 |
-
all_characters,
|
| 295 |
-
)
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
def on_select(evt: gr.SelectData, characters, selected_characters, all_characters):
|
| 299 |
-
# 如果没有选择角色,换人的时候清空
|
| 300 |
-
if len(selected_characters) == 0:
|
| 301 |
-
selected_characters = []
|
| 302 |
-
|
| 303 |
-
selected = characters[evt.index]
|
| 304 |
-
emotions = get_character_emotions(selected, all_characters)
|
| 305 |
-
normal_index = 0
|
| 306 |
-
for index, emotion in enumerate(emotions):
|
| 307 |
-
if (
|
| 308 |
-
emotion["情绪"] == "正常"
|
| 309 |
-
or emotion["情绪"] == "보통"
|
| 310 |
-
or emotion["情绪"] == "normal"
|
| 311 |
-
):
|
| 312 |
-
normal_index = index
|
| 313 |
-
break
|
| 314 |
-
|
| 315 |
-
default_emotion = emotions[normal_index]["情绪"] if emotions else ""
|
| 316 |
-
default_voice_id = emotions[normal_index]["voice_id"] if emotions else ""
|
| 317 |
-
|
| 318 |
-
character_dict = selected.copy()
|
| 319 |
-
character_dict["情绪"] = default_emotion
|
| 320 |
-
character_dict["voice_id"] = default_voice_id
|
| 321 |
-
return (
|
| 322 |
-
selected["名称"],
|
| 323 |
-
gr.Dropdown(
|
| 324 |
-
choices=[emotion["情绪"] for emotion in emotions], value=default_emotion
|
| 325 |
-
),
|
| 326 |
-
character_dict,
|
| 327 |
-
selected_characters,
|
| 328 |
-
)
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
async def update_prompt_audio(current_character):
|
| 332 |
-
if current_character:
|
| 333 |
-
return await get_audio(current_character.get("voice_id"))
|
| 334 |
-
else:
|
| 335 |
-
return None
|
| 336 |
-
|
| 337 |
-
async def create_voice(avatar, name, emotion, tags, gender, audio_data, lang):
|
| 338 |
-
updates = {}
|
| 339 |
-
for field, value in [("avatar", avatar), ("name", name), ("emotion", emotion), ("tags", tags), ("gender", gender), ("audio_data", audio_data)]:
|
| 340 |
-
if field in ["avatar", "audio_data"]:
|
| 341 |
-
if value is None or (isinstance(value, np.ndarray) and value.size == 0):
|
| 342 |
-
updates[field] = gr.update(value=None)
|
| 343 |
-
elif value == "":
|
| 344 |
-
updates[field] = gr.update(value="")
|
| 345 |
-
|
| 346 |
-
if updates:
|
| 347 |
-
if lang == "zh":
|
| 348 |
-
gr.Warning("请填写完整信息")
|
| 349 |
-
elif lang == "en":
|
| 350 |
-
gr.Warning("Please fill in all the information")
|
| 351 |
-
elif lang == "ja":
|
| 352 |
-
gr.Warning("すべての情報を入力してください")
|
| 353 |
-
elif lang == "ko":
|
| 354 |
-
gr.Warning("모든 정보를 입력하세요")
|
| 355 |
-
return tuple(updates.get(field, gr.update()) for field in ["avatar", "name", "emotion", "tags", "gender", "audio_data"])
|
| 356 |
-
duration = len(audio_data[1]) / audio_data[0]
|
| 357 |
-
if duration < 3.2 or duration > 8:
|
| 358 |
-
if lang == "zh":
|
| 359 |
-
gr.Warning("音频时长请控制在3.2-8秒之间")
|
| 360 |
-
elif lang == "en":
|
| 361 |
-
gr.Warning("The audio duration should be between 3.2 and 8 seconds")
|
| 362 |
-
elif lang == "ja":
|
| 363 |
-
gr.Warning("音声の長さは3.2秒から8秒の間にしてください")
|
| 364 |
-
elif lang == "ko":
|
| 365 |
-
gr.Warning("음성 길이는 3.2초에서 8초 사이로 설정해야 합니다")
|
| 366 |
-
return avatar, name, emotion, tags, gender, audio_data
|
| 367 |
-
await generate_voice(avatar, name, emotion, tags, gender, audio_data, lang)
|
| 368 |
-
if lang == "zh":
|
| 369 |
-
gr.Info("创建成功,您创建的语音将在审核后上线")
|
| 370 |
-
elif lang == "en":
|
| 371 |
-
gr.Info("Creation successful. The voice you created will be available after review.")
|
| 372 |
-
elif lang == "ja":
|
| 373 |
-
gr.Info("作成が完了しました。作成された音声は審査後に公開されます。")
|
| 374 |
-
elif lang == "ko":
|
| 375 |
-
gr.Info("생성 완료. 귀하가 생성한 음성은 검토 후 공개될 예정입니다.")
|
| 376 |
-
return avatar, name, emotion, tags, gender, audio_data
|
| 377 |
-
|
| 378 |
-
head = """
|
| 379 |
-
<title>Free Online Text to Speech (TTS) | Convert Text to Audio</title>
|
| 380 |
-
<meta name="description" content="Text to Speech(TTS) for free! 5-second voice cloning, no sign-up required.">
|
| 381 |
-
<meta name="keywords" content="text to speech, TTS, free TTS, online TTS, speech synthesis, voice generator">
|
| 382 |
-
"""
|
| 383 |
-
with gr.Blocks(title="Online Free TTS", theme=gr.themes.Soft(), head=head) as demo:
|
| 384 |
-
gr.Markdown(
|
| 385 |
-
"Online Free TTS(Text-to-Speech). Ultra-low latency, 5-second voice cloning."
|
| 386 |
-
)
|
| 387 |
-
lang = gr.Radio(
|
| 388 |
-
choices=[("中文", "zh"), ("English", "en"), ("日本語", "ja"), ("한국인", "ko")],
|
| 389 |
-
label=gettext("Language"),
|
| 390 |
-
value="en",
|
| 391 |
-
scale=1,
|
| 392 |
-
)
|
| 393 |
-
all_characters_state = gr.State(load_characters_csv("en")[0])
|
| 394 |
-
category = gr.State(load_characters_csv("en")[1])
|
| 395 |
-
|
| 396 |
-
with Translate(trans_file, lang, placeholder_langs=["en", "zh", "ja", "ko"]):
|
| 397 |
-
gr.Markdown(value=gettext(header))
|
| 398 |
-
with gr.Group():
|
| 399 |
-
initial_characters = get_characters(
|
| 400 |
-
kind="原神", lang="zh", all_characters=all_characters_state.value
|
| 401 |
-
)
|
| 402 |
-
characters = gr.State(initial_characters)
|
| 403 |
-
selected_characters = gr.State([])
|
| 404 |
-
current_character = gr.State(None)
|
| 405 |
-
|
| 406 |
-
with gr.Tab(gettext("Synthesis Voice")):
|
| 407 |
-
with gr.Blocks():
|
| 408 |
-
with gr.Row():
|
| 409 |
-
kind = gr.Dropdown(
|
| 410 |
-
choices=category.value,
|
| 411 |
-
value=
|
| 412 |
-
label=gettext("Select character category"),
|
| 413 |
-
)
|
| 414 |
-
query = gr.Textbox(
|
| 415 |
-
label=gettext("Search character"),
|
| 416 |
-
value="",
|
| 417 |
-
lines=1,
|
| 418 |
-
max_lines=1,
|
| 419 |
-
interactive=True,
|
| 420 |
-
)
|
| 421 |
-
with gr.Blocks():
|
| 422 |
-
gallery = gr.Gallery(
|
| 423 |
-
value=[
|
| 424 |
-
[char["头像"], char["名称"]] for char in characters.value
|
| 425 |
-
],
|
| 426 |
-
show_label=False,
|
| 427 |
-
elem_id="character_gallery",
|
| 428 |
-
columns=[11],
|
| 429 |
-
object_fit="contain",
|
| 430 |
-
height="auto",
|
| 431 |
-
interactive=False,
|
| 432 |
-
allow_preview=False,
|
| 433 |
-
selected_index=None,
|
| 434 |
-
)
|
| 435 |
-
with gr.Row():
|
| 436 |
-
character_name = gr.Textbox(
|
| 437 |
-
label=gettext("Currently selected character"),
|
| 438 |
-
interactive=False,
|
| 439 |
-
max_lines=1,
|
| 440 |
-
)
|
| 441 |
-
info_type = gr.Dropdown(choices=[], label=gettext("Select emotion"))
|
| 442 |
-
with gr.Row():
|
| 443 |
-
add_voice_button = gr.Button(
|
| 444 |
-
gettext("Add new voice"), variant="primary"
|
| 445 |
-
)
|
| 446 |
-
|
| 447 |
-
selected_chars_container = gr.Column(
|
| 448 |
-
elem_id="selected_chars_container", visible=False
|
| 449 |
-
)
|
| 450 |
-
|
| 451 |
-
with selected_chars_container:
|
| 452 |
-
gr.Markdown(gettext("### Selected characters"))
|
| 453 |
-
selected_chars_rows = []
|
| 454 |
-
for i in range(5): # 假设最多选择5个角色
|
| 455 |
-
with gr.Row() as row:
|
| 456 |
-
name = gr.Textbox(
|
| 457 |
-
label=gettext("Name"), interactive=False, max_lines=1
|
| 458 |
-
)
|
| 459 |
-
emotion = gr.Textbox(
|
| 460 |
-
label=gettext("Emotion"), interactive=False, max_lines=1
|
| 461 |
-
)
|
| 462 |
-
delete_btn = gr.Button(gettext("Delete"), scale=0)
|
| 463 |
-
selected_chars_rows.append((name, emotion, delete_btn, row))
|
| 464 |
-
|
| 465 |
-
with gr.Row():
|
| 466 |
-
with gr.Column():
|
| 467 |
-
text = gr.Textbox(
|
| 468 |
-
label=gettext("Text to synthesize"),
|
| 469 |
-
value="",
|
| 470 |
-
lines=10,
|
| 471 |
-
max_lines=10,
|
| 472 |
-
)
|
| 473 |
-
inference_button = gr.Button(
|
| 474 |
-
gettext("🎉 Synthesize Voice 🎉"), variant="primary", size="lg"
|
| 475 |
-
)
|
| 476 |
-
with gr.Column():
|
| 477 |
-
prompt_audio = gr.Audio(
|
| 478 |
-
label=gettext("Reference audio for synthesis"),
|
| 479 |
-
interactive=False,
|
| 480 |
-
type="numpy",
|
| 481 |
-
)
|
| 482 |
-
output = gr.Audio(
|
| 483 |
-
label=gettext("Output audio"), interactive=False, type="numpy"
|
| 484 |
-
)
|
| 485 |
-
cost_time = gr.Textbox(
|
| 486 |
-
label=gettext("Synthesis time"),
|
| 487 |
-
interactive=False,
|
| 488 |
-
show_label=False,
|
| 489 |
-
max_lines=1,
|
| 490 |
-
)
|
| 491 |
-
try:
|
| 492 |
-
inference_button.click(
|
| 493 |
-
fn=generate,
|
| 494 |
-
inputs=[current_character, selected_characters, text, lang],
|
| 495 |
-
outputs=[output, cost_time],
|
| 496 |
-
)
|
| 497 |
-
except gr.Error as e:
|
| 498 |
-
gr.Error(e)
|
| 499 |
-
except Exception as e:
|
| 500 |
-
pass
|
| 501 |
-
|
| 502 |
-
with gr.Tab(gettext("Create Voice")):
|
| 503 |
-
with gr.Row():
|
| 504 |
-
avatar = gr.Image(label=gettext("Avatar"), interactive=True, type="pil", image_mode="RGBA")
|
| 505 |
-
with gr.Column():
|
| 506 |
-
with gr.Row():
|
| 507 |
-
name = gr.Textbox(
|
| 508 |
-
label=gettext("Name"), interactive=True, max_lines=1
|
| 509 |
-
)
|
| 510 |
-
emotion = gr.Textbox(
|
| 511 |
-
label=gettext("Emotion\n(Happy, Sad, Angry)"), interactive=True, max_lines=1
|
| 512 |
-
)
|
| 513 |
-
tags = gr.Textbox(
|
| 514 |
-
label=gettext("Tags\n(Genshin, Cute, Girl, Boy, etc.)"), interactive=True, max_lines=1
|
| 515 |
-
)
|
| 516 |
-
gender = gr.Dropdown(
|
| 517 |
-
label=gettext("Gender"),
|
| 518 |
-
choices=[
|
| 519 |
-
(gettext("Male"), "male"),
|
| 520 |
-
(gettext("Female"), "female"),
|
| 521 |
-
(gettext("Non-Binary"), "non-binary"),
|
| 522 |
-
],
|
| 523 |
-
interactive=True,
|
| 524 |
-
)
|
| 525 |
-
audio_data = gr.Audio(label=gettext("Prompt Audio(min 3.2s, max 8s)"), interactive=True)
|
| 526 |
-
create_button = gr.Button(gettext("Create Voice"), variant="primary")
|
| 527 |
-
|
| 528 |
-
gr.Markdown(gettext(terms))
|
| 529 |
-
# -------------- 绑定事件 --------------
|
| 530 |
-
|
| 531 |
-
lang.change(
|
| 532 |
-
fn=update_all_characters,
|
| 533 |
-
inputs=[lang, category],
|
| 534 |
-
outputs=[all_characters_state, characters, gallery, category, kind],
|
| 535 |
-
)
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
| 560 |
-
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
|
| 564 |
-
|
| 565 |
-
|
| 566 |
-
|
| 567 |
-
|
| 568 |
-
)
|
| 569 |
-
|
| 570 |
-
|
| 571 |
-
|
| 572 |
-
|
| 573 |
-
|
| 574 |
-
|
| 575 |
-
|
| 576 |
-
)
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
|
| 584 |
-
|
| 585 |
-
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
|
| 605 |
-
|
| 606 |
-
|
| 607 |
-
|
| 608 |
-
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
)
|
|
|
|
|
|
|
|
|
| 1 |
+
import time
|
| 2 |
+
import os
|
| 3 |
+
import logging
|
| 4 |
+
|
| 5 |
+
import gradio as gr
|
| 6 |
+
import numpy as np
|
| 7 |
+
import pandas as pd
|
| 8 |
+
from pypinyin import lazy_pinyin
|
| 9 |
+
from i18n import gettext, Translate
|
| 10 |
+
|
| 11 |
+
from api import generate_api, get_audio, generate_voice, load_characters_csv
|
| 12 |
+
from utils import get_length
|
| 13 |
+
|
| 14 |
+
# 翻译文件位置
|
| 15 |
+
trans_file = os.path.join(os.path.dirname(__file__), "i18n", "translations.json")
|
| 16 |
+
|
| 17 |
+
# 关闭aiohttp的DEBUG日志
|
| 18 |
+
logging.getLogger("aiohttp").setLevel(logging.WARNING)
|
| 19 |
+
# logging.getLogger("gradio").setLevel(logging.WARNING)
|
| 20 |
+
|
| 21 |
+
# 带有时间的log
|
| 22 |
+
logging.basicConfig(
|
| 23 |
+
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
header = """header"""
|
| 28 |
+
|
| 29 |
+
terms = "terms"
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def update_all_characters(lang, category):
|
| 33 |
+
new_characters, category = load_characters_csv(lang)
|
| 34 |
+
initial_characters = get_characters(kind=category[0], all_characters=new_characters)
|
| 35 |
+
return (
|
| 36 |
+
new_characters,
|
| 37 |
+
initial_characters,
|
| 38 |
+
gr.Gallery(
|
| 39 |
+
value=[[char["头像"], char["名称"]] for char in initial_characters],
|
| 40 |
+
show_label=False,
|
| 41 |
+
elem_id="character_gallery",
|
| 42 |
+
columns=[11],
|
| 43 |
+
object_fit="contain",
|
| 44 |
+
height="auto",
|
| 45 |
+
interactive=False,
|
| 46 |
+
allow_preview=False,
|
| 47 |
+
selected_index=None,
|
| 48 |
+
),
|
| 49 |
+
category,
|
| 50 |
+
gr.update(choices=category, value=category[0])
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def get_characters(
|
| 55 |
+
query=None, page=1, per_page=400, kind="原神", lang="zh", all_characters=None
|
| 56 |
+
):
|
| 57 |
+
# 使用传入的 all_characters 参数
|
| 58 |
+
filtered_characters = all_characters[all_characters["类别"] == kind]
|
| 59 |
+
|
| 60 |
+
if query:
|
| 61 |
+
# 使用拼音和汉字进行搜索
|
| 62 |
+
filtered_characters = filtered_characters[
|
| 63 |
+
filtered_characters["名称"].str.contains(query, case=False)
|
| 64 |
+
]
|
| 65 |
+
if filtered_characters.empty and lang == "zh":
|
| 66 |
+
filtered_characters = all_characters[all_characters["类别"] == kind]
|
| 67 |
+
filtered_characters = filtered_characters[
|
| 68 |
+
filtered_characters["名称"]
|
| 69 |
+
.apply(lambda x: "".join(lazy_pinyin(x)))
|
| 70 |
+
.str.contains(query, case=False)
|
| 71 |
+
]
|
| 72 |
+
|
| 73 |
+
# 按名称分组,并选择每组的第一个记录
|
| 74 |
+
unique_characters = (
|
| 75 |
+
filtered_characters.groupby("名称").first().reset_index().sort_values(by="id")
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
# 处理头像数据
|
| 79 |
+
import pickle
|
| 80 |
+
|
| 81 |
+
def process_avatar(avatar):
|
| 82 |
+
if not isinstance(avatar, str):
|
| 83 |
+
try:
|
| 84 |
+
return pickle.loads(bytes(avatar))
|
| 85 |
+
except:
|
| 86 |
+
return avatar
|
| 87 |
+
return avatar
|
| 88 |
+
|
| 89 |
+
unique_characters['头像'] = unique_characters['头像'].apply(process_avatar)
|
| 90 |
+
|
| 91 |
+
# 应用分页
|
| 92 |
+
start_index = (page - 1) * per_page
|
| 93 |
+
end_index = start_index + per_page
|
| 94 |
+
|
| 95 |
+
return unique_characters.iloc[start_index:end_index].to_dict("records")
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
async def generate(selected_character=None, selected_characters=[], text="", lang="zh"):
|
| 99 |
+
if selected_character:
|
| 100 |
+
characters = [selected_character] + selected_characters
|
| 101 |
+
else:
|
| 102 |
+
characters = selected_characters
|
| 103 |
+
if not selected_character and not selected_characters:
|
| 104 |
+
if lang == "zh":
|
| 105 |
+
raise gr.Error("请先选择一个角色")
|
| 106 |
+
elif lang == "en":
|
| 107 |
+
raise gr.Error("Please select a character first")
|
| 108 |
+
elif lang == "ja":
|
| 109 |
+
raise gr.Error("まず、キャラクターを選択してください")
|
| 110 |
+
elif lang == "ko":
|
| 111 |
+
raise gr.Error("먼저 캐릭터를 선택하세요")
|
| 112 |
+
voice_ids = [char.get("voice_id") for char in characters if char.get("voice_id")]
|
| 113 |
+
|
| 114 |
+
if not voice_ids:
|
| 115 |
+
raise gr.Error("所选角色没有关联的 voice_id")
|
| 116 |
+
|
| 117 |
+
start_time = time.time()
|
| 118 |
+
# 假设我们只使用第一个选择的角色的名称
|
| 119 |
+
if voice_ids == "1":
|
| 120 |
+
if lang == "zh":
|
| 121 |
+
raise gr.Error("该角色暂未创建语音")
|
| 122 |
+
elif lang == "en":
|
| 123 |
+
raise gr.Error("The character has not been created yet")
|
| 124 |
+
elif lang == "ja":
|
| 125 |
+
raise gr.Error("そのキャラクターの音声はまだ作成されていません")
|
| 126 |
+
elif lang == "ko":
|
| 127 |
+
raise gr.Error("해당 캐릭터의 음성이 아직 생성되지 않았습니다")
|
| 128 |
+
|
| 129 |
+
if text == "":
|
| 130 |
+
if lang == "zh":
|
| 131 |
+
raise gr.Error("请输入需要合成的文本")
|
| 132 |
+
elif lang == "en":
|
| 133 |
+
raise gr.Error("Please enter the text to be synthesized")
|
| 134 |
+
elif lang == "ja":
|
| 135 |
+
raise gr.Error("合成するテキストを入力してください")
|
| 136 |
+
elif lang == "ko":
|
| 137 |
+
raise gr.Error("합성할 텍스트를 입력하세요")
|
| 138 |
+
|
| 139 |
+
if get_length(text) > 1024:
|
| 140 |
+
if lang == "zh":
|
| 141 |
+
raise gr.Error("长度请控制在1024个字符以内")
|
| 142 |
+
elif lang == "en":
|
| 143 |
+
raise gr.Error("The text length exceeds 1024 words")
|
| 144 |
+
elif lang == "ja":
|
| 145 |
+
raise gr.Error("テキストの長さが1024文字を超えています")
|
| 146 |
+
elif lang == "ko":
|
| 147 |
+
raise gr.Error("텍스트 길이가 1024자를 초과합니다")
|
| 148 |
+
|
| 149 |
+
audio = await generate_api(voice_ids, text)
|
| 150 |
+
end_time = time.time()
|
| 151 |
+
if lang == "zh":
|
| 152 |
+
cost_time = f"合成共花费{end_time - start_time:.2f}秒"
|
| 153 |
+
elif lang == "en":
|
| 154 |
+
cost_time = (
|
| 155 |
+
f"Total time spent synthesizing: {end_time - start_time:.2f} seconds"
|
| 156 |
+
)
|
| 157 |
+
elif lang == "ja":
|
| 158 |
+
cost_time = f"合成にかかった時間: {end_time - start_time:.2f}秒"
|
| 159 |
+
elif lang == "ko":
|
| 160 |
+
cost_time = f"합성에 소요된 시간: {end_time - start_time:.2f}초"
|
| 161 |
+
if isinstance(audio, str):
|
| 162 |
+
print(audio)
|
| 163 |
+
raise gr.Error(audio)
|
| 164 |
+
else:
|
| 165 |
+
return audio, cost_time
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
def get_character_emotions(character, all_characters):
|
| 169 |
+
# 从all_characters中筛选出与当前角色名称相同的所有记录
|
| 170 |
+
character_records = all_characters[all_characters["名称"] == character["名称"]]
|
| 171 |
+
|
| 172 |
+
# 按情绪去重并获取完整的角色信息
|
| 173 |
+
character_infos = character_records.drop_duplicates(subset=["情绪"]).to_dict(
|
| 174 |
+
"records"
|
| 175 |
+
)
|
| 176 |
+
|
| 177 |
+
# 如果没有找到角色信息,返回一个包含默认值的字典
|
| 178 |
+
return (
|
| 179 |
+
character_infos
|
| 180 |
+
if character_infos
|
| 181 |
+
else [{"名称": character["名称"], "情绪": "默认情绪"}]
|
| 182 |
+
)
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
def update_character_info(character_name, emotion, current_character, all_characters):
|
| 186 |
+
character_info = None
|
| 187 |
+
if character_name and emotion:
|
| 188 |
+
character_info = all_characters[
|
| 189 |
+
(all_characters["名称"] == character_name)
|
| 190 |
+
& (all_characters["情绪"] == emotion)
|
| 191 |
+
]
|
| 192 |
+
if character_name == "":
|
| 193 |
+
return None
|
| 194 |
+
character_info = character_info.iloc[0].to_dict()
|
| 195 |
+
return character_info, all_characters
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
def add_new_voice(current_character, selected_characters, kind, lang, all_characters):
|
| 199 |
+
if not current_character:
|
| 200 |
+
if lang == "zh":
|
| 201 |
+
raise gr.Error("请先选择一个角色")
|
| 202 |
+
elif lang == "en":
|
| 203 |
+
raise gr.Error("Please select a character first")
|
| 204 |
+
elif lang == "ja":
|
| 205 |
+
raise gr.Error("まず、キャラクターを選択してください")
|
| 206 |
+
elif lang == "ko":
|
| 207 |
+
raise gr.Error("먼저 캐릭터를 선택하세요")
|
| 208 |
+
|
| 209 |
+
if len(selected_characters) >= 5:
|
| 210 |
+
raise gr.Error("已达到最大选择数(5个)")
|
| 211 |
+
|
| 212 |
+
# 检查是否已存在相同角色
|
| 213 |
+
existing_char = next(
|
| 214 |
+
(
|
| 215 |
+
char
|
| 216 |
+
for char in selected_characters
|
| 217 |
+
if char["名称"] == current_character["名称"]
|
| 218 |
+
),
|
| 219 |
+
None,
|
| 220 |
+
)
|
| 221 |
+
if existing_char:
|
| 222 |
+
# 如果情绪不同,更新情绪
|
| 223 |
+
if existing_char["情绪"] != current_character["情绪"]:
|
| 224 |
+
existing_char["情绪"] = current_character["情绪"]
|
| 225 |
+
else:
|
| 226 |
+
selected_characters.insert(0, current_character)
|
| 227 |
+
|
| 228 |
+
updated_characters = get_characters(
|
| 229 |
+
kind=kind, lang=lang, all_characters=all_characters
|
| 230 |
+
)
|
| 231 |
+
# ! 取消gallery选中状态,返回个新的gallery是必要的,否则会保留上一次的选中状态。这里sonnet很喜欢改成返回一个数组,但这不能清空gallery的选中状态
|
| 232 |
+
updated_gallery = gr.Gallery(
|
| 233 |
+
value=[[char["头像"], char["名称"]] for char in updated_characters],
|
| 234 |
+
show_label=False,
|
| 235 |
+
elem_id="character_gallery",
|
| 236 |
+
columns=[11],
|
| 237 |
+
object_fit="contain",
|
| 238 |
+
height="auto",
|
| 239 |
+
interactive=False,
|
| 240 |
+
allow_preview=False,
|
| 241 |
+
selected_index=None,
|
| 242 |
+
)
|
| 243 |
+
|
| 244 |
+
return (
|
| 245 |
+
None,
|
| 246 |
+
gr.update(value=""),
|
| 247 |
+
gr.update(choices=[]),
|
| 248 |
+
selected_characters,
|
| 249 |
+
updated_characters,
|
| 250 |
+
updated_gallery,
|
| 251 |
+
gr.update(visible=True),
|
| 252 |
+
all_characters,
|
| 253 |
+
)
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
def update_selected_chars_display(selected_characters):
|
| 257 |
+
updates = []
|
| 258 |
+
for i, (name, emotion, _, row) in enumerate(selected_chars_rows):
|
| 259 |
+
if i < len(selected_characters):
|
| 260 |
+
char = selected_characters[i]
|
| 261 |
+
updates.extend(
|
| 262 |
+
[
|
| 263 |
+
gr.update(value=char["名称"], visible=True),
|
| 264 |
+
gr.update(value=char["情绪"], visible=True),
|
| 265 |
+
gr.update(visible=True),
|
| 266 |
+
gr.update(visible=True),
|
| 267 |
+
]
|
| 268 |
+
)
|
| 269 |
+
else:
|
| 270 |
+
updates.extend(
|
| 271 |
+
[
|
| 272 |
+
gr.update(value="", visible=False),
|
| 273 |
+
gr.update(value="", visible=False),
|
| 274 |
+
gr.update(visible=False),
|
| 275 |
+
gr.update(visible=False),
|
| 276 |
+
]
|
| 277 |
+
)
|
| 278 |
+
return updates
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
def remove_character(index, selected_characters):
|
| 282 |
+
if 0 <= index < len(selected_characters):
|
| 283 |
+
del selected_characters[index]
|
| 284 |
+
return selected_characters, gr.update(visible=True)
|
| 285 |
+
|
| 286 |
+
|
| 287 |
+
def update_gallery(kind, query, all_characters):
|
| 288 |
+
updated_characters = get_characters(
|
| 289 |
+
kind=kind, query=query, lang=lang, all_characters=all_characters
|
| 290 |
+
)
|
| 291 |
+
return (
|
| 292 |
+
updated_characters,
|
| 293 |
+
[[char["头像"], char["名称"]] for char in updated_characters],
|
| 294 |
+
all_characters,
|
| 295 |
+
)
|
| 296 |
+
|
| 297 |
+
|
| 298 |
+
def on_select(evt: gr.SelectData, characters, selected_characters, all_characters):
|
| 299 |
+
# 如果没有选择角色,换人的时候清空
|
| 300 |
+
if len(selected_characters) == 0:
|
| 301 |
+
selected_characters = []
|
| 302 |
+
|
| 303 |
+
selected = characters[evt.index]
|
| 304 |
+
emotions = get_character_emotions(selected, all_characters)
|
| 305 |
+
normal_index = 0
|
| 306 |
+
for index, emotion in enumerate(emotions):
|
| 307 |
+
if (
|
| 308 |
+
emotion["情绪"] == "正常"
|
| 309 |
+
or emotion["情绪"] == "보통"
|
| 310 |
+
or emotion["情绪"] == "normal"
|
| 311 |
+
):
|
| 312 |
+
normal_index = index
|
| 313 |
+
break
|
| 314 |
+
|
| 315 |
+
default_emotion = emotions[normal_index]["情绪"] if emotions else ""
|
| 316 |
+
default_voice_id = emotions[normal_index]["voice_id"] if emotions else ""
|
| 317 |
+
|
| 318 |
+
character_dict = selected.copy()
|
| 319 |
+
character_dict["情绪"] = default_emotion
|
| 320 |
+
character_dict["voice_id"] = default_voice_id
|
| 321 |
+
return (
|
| 322 |
+
selected["名称"],
|
| 323 |
+
gr.Dropdown(
|
| 324 |
+
choices=[emotion["情绪"] for emotion in emotions], value=default_emotion
|
| 325 |
+
),
|
| 326 |
+
character_dict,
|
| 327 |
+
selected_characters,
|
| 328 |
+
)
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
async def update_prompt_audio(current_character):
|
| 332 |
+
if current_character:
|
| 333 |
+
return await get_audio(current_character.get("voice_id"))
|
| 334 |
+
else:
|
| 335 |
+
return None
|
| 336 |
+
|
| 337 |
+
async def create_voice(avatar, name, emotion, tags, gender, audio_data, lang):
|
| 338 |
+
updates = {}
|
| 339 |
+
for field, value in [("avatar", avatar), ("name", name), ("emotion", emotion), ("tags", tags), ("gender", gender), ("audio_data", audio_data)]:
|
| 340 |
+
if field in ["avatar", "audio_data"]:
|
| 341 |
+
if value is None or (isinstance(value, np.ndarray) and value.size == 0):
|
| 342 |
+
updates[field] = gr.update(value=None)
|
| 343 |
+
elif value == "":
|
| 344 |
+
updates[field] = gr.update(value="")
|
| 345 |
+
|
| 346 |
+
if updates:
|
| 347 |
+
if lang == "zh":
|
| 348 |
+
gr.Warning("请填写完整信息")
|
| 349 |
+
elif lang == "en":
|
| 350 |
+
gr.Warning("Please fill in all the information")
|
| 351 |
+
elif lang == "ja":
|
| 352 |
+
gr.Warning("すべての情報を入力してください")
|
| 353 |
+
elif lang == "ko":
|
| 354 |
+
gr.Warning("모든 정보를 입력하세요")
|
| 355 |
+
return tuple(updates.get(field, gr.update()) for field in ["avatar", "name", "emotion", "tags", "gender", "audio_data"])
|
| 356 |
+
duration = len(audio_data[1]) / audio_data[0]
|
| 357 |
+
if duration < 3.2 or duration > 8:
|
| 358 |
+
if lang == "zh":
|
| 359 |
+
gr.Warning("音频时长请控制在3.2-8秒之间")
|
| 360 |
+
elif lang == "en":
|
| 361 |
+
gr.Warning("The audio duration should be between 3.2 and 8 seconds")
|
| 362 |
+
elif lang == "ja":
|
| 363 |
+
gr.Warning("音声の長さは3.2秒から8秒の間にしてください")
|
| 364 |
+
elif lang == "ko":
|
| 365 |
+
gr.Warning("음성 길이는 3.2초에서 8초 사이로 설정해야 합니다")
|
| 366 |
+
return avatar, name, emotion, tags, gender, audio_data
|
| 367 |
+
await generate_voice(avatar, name, emotion, tags, gender, audio_data, lang)
|
| 368 |
+
if lang == "zh":
|
| 369 |
+
gr.Info("创建成功,您创建的语音将在审核后上线")
|
| 370 |
+
elif lang == "en":
|
| 371 |
+
gr.Info("Creation successful. The voice you created will be available after review.")
|
| 372 |
+
elif lang == "ja":
|
| 373 |
+
gr.Info("作成が完了しました。作成された音声は審査後に公開されます。")
|
| 374 |
+
elif lang == "ko":
|
| 375 |
+
gr.Info("생성 완료. 귀하가 생성한 음성은 검토 후 공개될 예정입니다.")
|
| 376 |
+
return avatar, name, emotion, tags, gender, audio_data
|
| 377 |
+
|
| 378 |
+
head = """
|
| 379 |
+
<title>Free Online Text to Speech (TTS) | Convert Text to Audio</title>
|
| 380 |
+
<meta name="description" content="Text to Speech(TTS) for free! 5-second voice cloning, no sign-up required.">
|
| 381 |
+
<meta name="keywords" content="text to speech, TTS, free TTS, online TTS, speech synthesis, voice generator">
|
| 382 |
+
"""
|
| 383 |
+
with gr.Blocks(title="Online Free TTS", theme=gr.themes.Soft(), head=head) as demo:
|
| 384 |
+
gr.Markdown(
|
| 385 |
+
"Online Free TTS(Text-to-Speech). Ultra-low latency, 5-second voice cloning."
|
| 386 |
+
)
|
| 387 |
+
lang = gr.Radio(
|
| 388 |
+
choices=[("中文", "zh"), ("English", "en"), ("日本語", "ja"), ("한국인", "ko")],
|
| 389 |
+
label=gettext("Language"),
|
| 390 |
+
value="en",
|
| 391 |
+
scale=1,
|
| 392 |
+
)
|
| 393 |
+
all_characters_state = gr.State(load_characters_csv("en")[0])
|
| 394 |
+
category = gr.State(load_characters_csv("en")[1])
|
| 395 |
+
|
| 396 |
+
with Translate(trans_file, lang, placeholder_langs=["en", "zh", "ja", "ko"]):
|
| 397 |
+
gr.Markdown(value=gettext(header))
|
| 398 |
+
with gr.Group():
|
| 399 |
+
initial_characters = get_characters(
|
| 400 |
+
kind="原神", lang="zh", all_characters=all_characters_state.value
|
| 401 |
+
)
|
| 402 |
+
characters = gr.State(initial_characters)
|
| 403 |
+
selected_characters = gr.State([])
|
| 404 |
+
current_character = gr.State(None)
|
| 405 |
+
|
| 406 |
+
with gr.Tab(gettext("Synthesis Voice")):
|
| 407 |
+
with gr.Blocks():
|
| 408 |
+
with gr.Row():
|
| 409 |
+
kind = gr.Dropdown(
|
| 410 |
+
choices=category.value,
|
| 411 |
+
value=category.value[0],
|
| 412 |
+
label=gettext("Select character category"),
|
| 413 |
+
)
|
| 414 |
+
query = gr.Textbox(
|
| 415 |
+
label=gettext("Search character"),
|
| 416 |
+
value="",
|
| 417 |
+
lines=1,
|
| 418 |
+
max_lines=1,
|
| 419 |
+
interactive=True,
|
| 420 |
+
)
|
| 421 |
+
with gr.Blocks():
|
| 422 |
+
gallery = gr.Gallery(
|
| 423 |
+
value=[
|
| 424 |
+
[char["头像"], char["名称"]] for char in characters.value
|
| 425 |
+
],
|
| 426 |
+
show_label=False,
|
| 427 |
+
elem_id="character_gallery",
|
| 428 |
+
columns=[11],
|
| 429 |
+
object_fit="contain",
|
| 430 |
+
height="auto",
|
| 431 |
+
interactive=False,
|
| 432 |
+
allow_preview=False,
|
| 433 |
+
selected_index=None,
|
| 434 |
+
)
|
| 435 |
+
with gr.Row():
|
| 436 |
+
character_name = gr.Textbox(
|
| 437 |
+
label=gettext("Currently selected character"),
|
| 438 |
+
interactive=False,
|
| 439 |
+
max_lines=1,
|
| 440 |
+
)
|
| 441 |
+
info_type = gr.Dropdown(choices=[], label=gettext("Select emotion"))
|
| 442 |
+
with gr.Row():
|
| 443 |
+
add_voice_button = gr.Button(
|
| 444 |
+
gettext("Add new voice"), variant="primary"
|
| 445 |
+
)
|
| 446 |
+
|
| 447 |
+
selected_chars_container = gr.Column(
|
| 448 |
+
elem_id="selected_chars_container", visible=False
|
| 449 |
+
)
|
| 450 |
+
|
| 451 |
+
with selected_chars_container:
|
| 452 |
+
gr.Markdown(gettext("### Selected characters"))
|
| 453 |
+
selected_chars_rows = []
|
| 454 |
+
for i in range(5): # 假设最多选择5个角色
|
| 455 |
+
with gr.Row() as row:
|
| 456 |
+
name = gr.Textbox(
|
| 457 |
+
label=gettext("Name"), interactive=False, max_lines=1
|
| 458 |
+
)
|
| 459 |
+
emotion = gr.Textbox(
|
| 460 |
+
label=gettext("Emotion"), interactive=False, max_lines=1
|
| 461 |
+
)
|
| 462 |
+
delete_btn = gr.Button(gettext("Delete"), scale=0)
|
| 463 |
+
selected_chars_rows.append((name, emotion, delete_btn, row))
|
| 464 |
+
|
| 465 |
+
with gr.Row():
|
| 466 |
+
with gr.Column():
|
| 467 |
+
text = gr.Textbox(
|
| 468 |
+
label=gettext("Text to synthesize"),
|
| 469 |
+
value="",
|
| 470 |
+
lines=10,
|
| 471 |
+
max_lines=10,
|
| 472 |
+
)
|
| 473 |
+
inference_button = gr.Button(
|
| 474 |
+
gettext("🎉 Synthesize Voice 🎉"), variant="primary", size="lg"
|
| 475 |
+
)
|
| 476 |
+
with gr.Column():
|
| 477 |
+
prompt_audio = gr.Audio(
|
| 478 |
+
label=gettext("Reference audio for synthesis"),
|
| 479 |
+
interactive=False,
|
| 480 |
+
type="numpy",
|
| 481 |
+
)
|
| 482 |
+
output = gr.Audio(
|
| 483 |
+
label=gettext("Output audio"), interactive=False, type="numpy"
|
| 484 |
+
)
|
| 485 |
+
cost_time = gr.Textbox(
|
| 486 |
+
label=gettext("Synthesis time"),
|
| 487 |
+
interactive=False,
|
| 488 |
+
show_label=False,
|
| 489 |
+
max_lines=1,
|
| 490 |
+
)
|
| 491 |
+
try:
|
| 492 |
+
inference_button.click(
|
| 493 |
+
fn=generate,
|
| 494 |
+
inputs=[current_character, selected_characters, text, lang],
|
| 495 |
+
outputs=[output, cost_time],
|
| 496 |
+
)
|
| 497 |
+
except gr.Error as e:
|
| 498 |
+
gr.Error(e)
|
| 499 |
+
except Exception as e:
|
| 500 |
+
pass
|
| 501 |
+
|
| 502 |
+
with gr.Tab(gettext("Create Voice")):
|
| 503 |
+
with gr.Row():
|
| 504 |
+
avatar = gr.Image(label=gettext("Avatar"), interactive=True, type="pil", image_mode="RGBA")
|
| 505 |
+
with gr.Column():
|
| 506 |
+
with gr.Row():
|
| 507 |
+
name = gr.Textbox(
|
| 508 |
+
label=gettext("Name"), interactive=True, max_lines=1
|
| 509 |
+
)
|
| 510 |
+
emotion = gr.Textbox(
|
| 511 |
+
label=gettext("Emotion\n(Happy, Sad, Angry)"), interactive=True, max_lines=1
|
| 512 |
+
)
|
| 513 |
+
tags = gr.Textbox(
|
| 514 |
+
label=gettext("Tags\n(Genshin, Cute, Girl, Boy, etc.)"), interactive=True, max_lines=1
|
| 515 |
+
)
|
| 516 |
+
gender = gr.Dropdown(
|
| 517 |
+
label=gettext("Gender"),
|
| 518 |
+
choices=[
|
| 519 |
+
(gettext("Male"), "male"),
|
| 520 |
+
(gettext("Female"), "female"),
|
| 521 |
+
(gettext("Non-Binary"), "non-binary"),
|
| 522 |
+
],
|
| 523 |
+
interactive=True,
|
| 524 |
+
)
|
| 525 |
+
audio_data = gr.Audio(label=gettext("Prompt Audio(min 3.2s, max 8s)"), interactive=True)
|
| 526 |
+
create_button = gr.Button(gettext("Create Voice"), variant="primary")
|
| 527 |
+
|
| 528 |
+
gr.Markdown(gettext(terms))
|
| 529 |
+
# -------------- 绑定事件 --------------
|
| 530 |
+
|
| 531 |
+
lang.change(
|
| 532 |
+
fn=update_all_characters,
|
| 533 |
+
inputs=[lang, category],
|
| 534 |
+
outputs=[all_characters_state, characters, gallery, category, kind],
|
| 535 |
+
)
|
| 536 |
+
|
| 537 |
+
demo.load(update_all_characters, inputs=[lang, category], outputs=[all_characters_state, characters, gallery, category, kind])
|
| 538 |
+
|
| 539 |
+
add_voice_button.click(
|
| 540 |
+
fn=add_new_voice,
|
| 541 |
+
inputs=[
|
| 542 |
+
current_character,
|
| 543 |
+
selected_characters,
|
| 544 |
+
kind,
|
| 545 |
+
lang,
|
| 546 |
+
all_characters_state,
|
| 547 |
+
],
|
| 548 |
+
outputs=[
|
| 549 |
+
current_character,
|
| 550 |
+
character_name,
|
| 551 |
+
info_type,
|
| 552 |
+
selected_characters,
|
| 553 |
+
characters,
|
| 554 |
+
gallery,
|
| 555 |
+
selected_chars_container,
|
| 556 |
+
all_characters_state,
|
| 557 |
+
],
|
| 558 |
+
).then(
|
| 559 |
+
fn=update_selected_chars_display,
|
| 560 |
+
inputs=[selected_characters],
|
| 561 |
+
outputs=[item for row in selected_chars_rows for item in row],
|
| 562 |
+
)
|
| 563 |
+
|
| 564 |
+
gallery.select(
|
| 565 |
+
fn=on_select,
|
| 566 |
+
inputs=[characters, selected_characters, all_characters_state],
|
| 567 |
+
outputs=[character_name, info_type, current_character, selected_characters],
|
| 568 |
+
).then(
|
| 569 |
+
fn=update_prompt_audio, inputs=[current_character], outputs=[prompt_audio]
|
| 570 |
+
)
|
| 571 |
+
|
| 572 |
+
info_type.change(
|
| 573 |
+
fn=update_character_info,
|
| 574 |
+
inputs=[character_name, info_type, current_character, all_characters_state],
|
| 575 |
+
outputs=[current_character, all_characters_state],
|
| 576 |
+
).then(
|
| 577 |
+
fn=update_prompt_audio, inputs=[current_character], outputs=[prompt_audio]
|
| 578 |
+
)
|
| 579 |
+
|
| 580 |
+
for i, (_, _, delete_btn, _) in enumerate(selected_chars_rows):
|
| 581 |
+
delete_btn.click(
|
| 582 |
+
fn=remove_character,
|
| 583 |
+
inputs=[gr.Number(value=i, visible=False), selected_characters],
|
| 584 |
+
outputs=[selected_characters, selected_chars_container],
|
| 585 |
+
).then(
|
| 586 |
+
fn=update_selected_chars_display,
|
| 587 |
+
inputs=[selected_characters],
|
| 588 |
+
outputs=[item for row in selected_chars_rows for item in row],
|
| 589 |
+
)
|
| 590 |
+
|
| 591 |
+
kind.change(
|
| 592 |
+
fn=update_gallery,
|
| 593 |
+
inputs=[kind, query, all_characters_state],
|
| 594 |
+
outputs=[characters, gallery, all_characters_state],
|
| 595 |
+
)
|
| 596 |
+
|
| 597 |
+
query.change(
|
| 598 |
+
fn=update_gallery,
|
| 599 |
+
inputs=[kind, query, all_characters_state],
|
| 600 |
+
outputs=[characters, gallery, all_characters_state],
|
| 601 |
+
)
|
| 602 |
+
|
| 603 |
+
create_button.click(
|
| 604 |
+
fn=create_voice,
|
| 605 |
+
inputs=[avatar, name, emotion, tags, gender, audio_data, lang],
|
| 606 |
+
outputs=[avatar, name, emotion, tags, gender, audio_data],
|
| 607 |
+
)
|
| 608 |
+
|
| 609 |
+
|
| 610 |
+
if __name__ == "__main__":
|
| 611 |
+
demo.queue(default_concurrency_limit=None).launch(
|
| 612 |
+
show_api=False
|
| 613 |
+
)
|
i18n/i18n.py
CHANGED
|
@@ -225,7 +225,10 @@ def translate_blocks(
|
|
| 225 |
TranslateContext.add_translation(translation)
|
| 226 |
|
| 227 |
def on_load(request: gr.Request):
|
| 228 |
-
|
|
|
|
|
|
|
|
|
|
| 229 |
|
| 230 |
def on_lang_change(request: gr.Request, lang: str):
|
| 231 |
TranslateContext.lang_per_session[request.session_hash] = lang
|
|
|
|
| 225 |
TranslateContext.add_translation(translation)
|
| 226 |
|
| 227 |
def on_load(request: gr.Request):
|
| 228 |
+
lang = get_lang_from_request(request)
|
| 229 |
+
if lang not in translation.keys():
|
| 230 |
+
lang = "en"
|
| 231 |
+
return lang
|
| 232 |
|
| 233 |
def on_lang_change(request: gr.Request, lang: str):
|
| 234 |
TranslateContext.lang_per_session[request.session_hash] = lang
|