Fix OAuth workshop login and TTS defaults
#1
by ShadowInk - opened
- app.py +4 -4
- modal_apps/modal_tts.py +6 -1
- src/character_registry.py +8 -8
- src/character_workshop.py +14 -2
- src/tts_engine.py +8 -1
- tests/test_character_workshop.py +14 -0
- tests/test_tts_engine.py +43 -0
app.py
CHANGED
|
@@ -853,7 +853,7 @@ def _initial_state(character_id: str) -> dict:
|
|
| 853 |
"events": [],
|
| 854 |
"last_vision_note": None,
|
| 855 |
"character": character,
|
| 856 |
-
"voice": _default_voice_state(character, enabled=
|
| 857 |
}
|
| 858 |
|
| 859 |
|
|
@@ -933,8 +933,8 @@ def switch_character(character_id: str):
|
|
| 933 |
gr.update(value=voice["style"]),
|
| 934 |
gr.update(value=voice["speed"]),
|
| 935 |
gr.update(value=voice["energy"]),
|
| 936 |
-
gr.update(value=
|
| 937 |
-
"
|
| 938 |
None,
|
| 939 |
)
|
| 940 |
|
|
@@ -1557,7 +1557,7 @@ def build_demo() -> gr.Blocks:
|
|
| 1557 |
audio_status = gr.Markdown(value="等待新的语音回复。", elem_classes=["vc-audio-status"])
|
| 1558 |
|
| 1559 |
with gr.Accordion("语音控制", open=False):
|
| 1560 |
-
voice_enabled = gr.Checkbox(value=
|
| 1561 |
voice_id = gr.Dropdown(
|
| 1562 |
choices=_voice_choices(default_character),
|
| 1563 |
value=default_voice["voice_id"],
|
|
|
|
| 853 |
"events": [],
|
| 854 |
"last_vision_note": None,
|
| 855 |
"character": character,
|
| 856 |
+
"voice": _default_voice_state(character, enabled=False),
|
| 857 |
}
|
| 858 |
|
| 859 |
|
|
|
|
| 933 |
gr.update(value=voice["style"]),
|
| 934 |
gr.update(value=voice["speed"]),
|
| 935 |
gr.update(value=voice["energy"]),
|
| 936 |
+
gr.update(value=False),
|
| 937 |
+
"语音生成默认关闭,打开后会额外等待 TTS 服务。",
|
| 938 |
None,
|
| 939 |
)
|
| 940 |
|
|
|
|
| 1557 |
audio_status = gr.Markdown(value="等待新的语音回复。", elem_classes=["vc-audio-status"])
|
| 1558 |
|
| 1559 |
with gr.Accordion("语音控制", open=False):
|
| 1560 |
+
voice_enabled = gr.Checkbox(value=False, label="生成语音回复(会额外等待 TTS)")
|
| 1561 |
voice_id = gr.Dropdown(
|
| 1562 |
choices=_voice_choices(default_character),
|
| 1563 |
value=default_voice["voice_id"],
|
modal_apps/modal_tts.py
CHANGED
|
@@ -15,6 +15,11 @@ LANGUAGE_ID = os.environ.get("VC_TTS_LANGUAGE_ID", "zh")
|
|
| 15 |
VOICE_DIR = "/voices"
|
| 16 |
HF_SECRET_NAME = os.environ.get("VC_HF_SECRET_NAME", "hf-token")
|
| 17 |
HF_SECRETS = [] if os.environ.get("VC_SKIP_HF_SECRET") == "1" else [modal.Secret.from_name(HF_SECRET_NAME)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
|
| 20 |
image = (
|
|
@@ -212,7 +217,7 @@ class CharacterTTS:
|
|
| 212 |
import torchaudio as ta
|
| 213 |
|
| 214 |
if self.backend == "kokoro":
|
| 215 |
-
voice =
|
| 216 |
generator = self.model(text, voice=voice)
|
| 217 |
chunks = []
|
| 218 |
for _, _, audio in generator:
|
|
|
|
| 15 |
VOICE_DIR = "/voices"
|
| 16 |
HF_SECRET_NAME = os.environ.get("VC_HF_SECRET_NAME", "hf-token")
|
| 17 |
HF_SECRETS = [] if os.environ.get("VC_SKIP_HF_SECRET") == "1" else [modal.Secret.from_name(HF_SECRET_NAME)]
|
| 18 |
+
KOKORO_VOICE_ALIASES = {
|
| 19 |
+
"default": "zm_yunxi",
|
| 20 |
+
"star_neutral": "zm_yunxi",
|
| 21 |
+
"star_soft": "zm_yunxi",
|
| 22 |
+
}
|
| 23 |
|
| 24 |
|
| 25 |
image = (
|
|
|
|
| 217 |
import torchaudio as ta
|
| 218 |
|
| 219 |
if self.backend == "kokoro":
|
| 220 |
+
voice = KOKORO_VOICE_ALIASES.get(voice_id, voice_id)
|
| 221 |
generator = self.model(text, voice=voice)
|
| 222 |
chunks = []
|
| 223 |
for _, _, audio in generator:
|
src/character_registry.py
CHANGED
|
@@ -49,16 +49,16 @@ CHARACTER_PACKAGES: dict[str, dict] = {
|
|
| 49 |
},
|
| 50 |
"skills": ["daily_chat", "emotional_support", "battle_focus", "style_guard"],
|
| 51 |
"voice": {
|
| 52 |
-
"voice_id": "
|
| 53 |
-
"voice_label": "
|
| 54 |
-
"backend": "
|
| 55 |
-
"audio_prompt_path":
|
| 56 |
-
"pace": "
|
| 57 |
-
"energy": 0.
|
| 58 |
-
"default_style": "
|
| 59 |
"allowed_emotions": ["neutral", "soft", "happy", "concerned", "battle_focus"],
|
| 60 |
},
|
| 61 |
-
"voice_options": [("
|
| 62 |
"visual": {
|
| 63 |
"accent": "#67e8f9",
|
| 64 |
"background": "#111827",
|
|
|
|
| 49 |
},
|
| 50 |
"skills": ["daily_chat", "emotional_support", "battle_focus", "style_guard"],
|
| 51 |
"voice": {
|
| 52 |
+
"voice_id": "star_neutral",
|
| 53 |
+
"voice_label": "中性通讯",
|
| 54 |
+
"backend": "kokoro",
|
| 55 |
+
"audio_prompt_path": None,
|
| 56 |
+
"pace": "normal",
|
| 57 |
+
"energy": 0.48,
|
| 58 |
+
"default_style": "neutral",
|
| 59 |
"allowed_emotions": ["neutral", "soft", "happy", "concerned", "battle_focus"],
|
| 60 |
},
|
| 61 |
+
"voice_options": [("中性通讯", "star_neutral"), ("低声叙述", "zm_yunxi"), ("明亮少女", "zf_xiaoxiao")],
|
| 62 |
"visual": {
|
| 63 |
"accent": "#67e8f9",
|
| 64 |
"background": "#111827",
|
src/character_workshop.py
CHANGED
|
@@ -845,9 +845,21 @@ def _is_relative_to(path: Path, root: Path) -> bool:
|
|
| 845 |
def _profile_value(profile: Any, key: str) -> str:
|
| 846 |
if profile is None:
|
| 847 |
return ""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 848 |
if isinstance(profile, dict):
|
| 849 |
-
|
| 850 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 851 |
|
| 852 |
|
| 853 |
def _hash_user_id(value: str) -> str:
|
|
|
|
| 845 |
def _profile_value(profile: Any, key: str) -> str:
|
| 846 |
if profile is None:
|
| 847 |
return ""
|
| 848 |
+
aliases = {
|
| 849 |
+
"username": ("username", "preferred_username", "login"),
|
| 850 |
+
"name": ("name", "display_name", "username", "preferred_username"),
|
| 851 |
+
}.get(key, (key,))
|
| 852 |
if isinstance(profile, dict):
|
| 853 |
+
for alias in aliases:
|
| 854 |
+
value = profile.get(alias)
|
| 855 |
+
if value:
|
| 856 |
+
return str(value).strip()
|
| 857 |
+
return ""
|
| 858 |
+
for alias in aliases:
|
| 859 |
+
value = getattr(profile, alias, "")
|
| 860 |
+
if value:
|
| 861 |
+
return str(value).strip()
|
| 862 |
+
return ""
|
| 863 |
|
| 864 |
|
| 865 |
def _hash_user_id(value: str) -> str:
|
src/tts_engine.py
CHANGED
|
@@ -6,6 +6,7 @@ from urllib.parse import urlparse
|
|
| 6 |
|
| 7 |
|
| 8 |
DEFAULT_TTS_URL = "https://veronicaulises0--virtual-characters-tts-charactertts-tts.modal.run"
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
def synthesize_sentence(text: str, character: dict, voice_state: dict | None = None) -> str | None:
|
|
@@ -26,7 +27,13 @@ def synthesize_sentence(text: str, character: dict, voice_state: dict | None = N
|
|
| 26 |
"audio_prompt_path": voice_state.get("audio_prompt_path") or voice.get("audio_prompt_path"),
|
| 27 |
"backend": voice.get("backend", "chatterbox"),
|
| 28 |
}
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
response.raise_for_status()
|
| 31 |
path = Path(tempfile.gettempdir()) / f"virtual_characters_tts_{uuid.uuid4().hex}.wav"
|
| 32 |
path.write_bytes(response.content)
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
DEFAULT_TTS_URL = "https://veronicaulises0--virtual-characters-tts-charactertts-tts.modal.run"
|
| 9 |
+
DEFAULT_TTS_READ_TIMEOUT_SECONDS = 45.0
|
| 10 |
|
| 11 |
|
| 12 |
def synthesize_sentence(text: str, character: dict, voice_state: dict | None = None) -> str | None:
|
|
|
|
| 27 |
"audio_prompt_path": voice_state.get("audio_prompt_path") or voice.get("audio_prompt_path"),
|
| 28 |
"backend": voice.get("backend", "chatterbox"),
|
| 29 |
}
|
| 30 |
+
timeout = httpx.Timeout(
|
| 31 |
+
connect=float(os.environ.get("VC_TTS_CONNECT_TIMEOUT", "8")),
|
| 32 |
+
read=float(os.environ.get("VC_TTS_READ_TIMEOUT", str(DEFAULT_TTS_READ_TIMEOUT_SECONDS))),
|
| 33 |
+
write=8,
|
| 34 |
+
pool=8,
|
| 35 |
+
)
|
| 36 |
+
response = httpx.post(url, json=payload, timeout=timeout, trust_env=False)
|
| 37 |
response.raise_for_status()
|
| 38 |
path = Path(tempfile.gettempdir()) / f"virtual_characters_tts_{uuid.uuid4().hex}.wav"
|
| 39 |
path.write_bytes(response.content)
|
tests/test_character_workshop.py
CHANGED
|
@@ -148,6 +148,20 @@ class CharacterWorkshopTests(unittest.TestCase):
|
|
| 148 |
with self.assertRaises(ValueError):
|
| 149 |
workshop.load_workshop_run(alice_runs[0][1], user=bob)
|
| 150 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
def test_resume_main_candidates_from_manifest(self) -> None:
|
| 152 |
user = workshop.get_current_user(_Profile("alice", "Alice"))
|
| 153 |
state = workshop.create_initial_state(
|
|
|
|
| 148 |
with self.assertRaises(ValueError):
|
| 149 |
workshop.load_workshop_run(alice_runs[0][1], user=bob)
|
| 150 |
|
| 151 |
+
def test_hf_oauth_profile_uses_preferred_username(self) -> None:
|
| 152 |
+
profile = {
|
| 153 |
+
"name": "Shadow Ink",
|
| 154 |
+
"preferred_username": "ShadowInk",
|
| 155 |
+
"profile": "https://huggingface.co/ShadowInk",
|
| 156 |
+
"picture": "",
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
user = workshop.get_current_user(profile)
|
| 160 |
+
|
| 161 |
+
self.assertTrue(user.authenticated)
|
| 162 |
+
self.assertEqual(user.username, "ShadowInk")
|
| 163 |
+
self.assertEqual(user.storage_key, "shadowink")
|
| 164 |
+
|
| 165 |
def test_resume_main_candidates_from_manifest(self) -> None:
|
| 166 |
user = workshop.get_current_user(_Profile("alice", "Alice"))
|
| 167 |
state = workshop.create_initial_state(
|
tests/test_tts_engine.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import unittest
|
| 4 |
+
from unittest.mock import patch
|
| 5 |
+
|
| 6 |
+
from src import tts_engine
|
| 7 |
+
from modal_apps import modal_tts
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class TtsEngineTests(unittest.TestCase):
|
| 11 |
+
def test_kokoro_star_voice_maps_to_neutral_voice(self) -> None:
|
| 12 |
+
self.assertEqual(modal_tts.KOKORO_VOICE_ALIASES["star_neutral"], "zm_yunxi")
|
| 13 |
+
self.assertEqual(modal_tts.KOKORO_VOICE_ALIASES["default"], "zm_yunxi")
|
| 14 |
+
|
| 15 |
+
def test_synthesize_uses_short_timeout(self) -> None:
|
| 16 |
+
class Response:
|
| 17 |
+
content = b"RIFF....WAVE"
|
| 18 |
+
|
| 19 |
+
def raise_for_status(self):
|
| 20 |
+
return None
|
| 21 |
+
|
| 22 |
+
calls = []
|
| 23 |
+
|
| 24 |
+
def fake_post(url, json, timeout, trust_env):
|
| 25 |
+
calls.append({"url": url, "json": json, "timeout": timeout, "trust_env": trust_env})
|
| 26 |
+
return Response()
|
| 27 |
+
|
| 28 |
+
with patch.dict("os.environ", {"VC_MODAL_TTS_URL": "https://tts.example.test"}, clear=False):
|
| 29 |
+
with patch("httpx.post", fake_post):
|
| 30 |
+
path = tts_engine.synthesize_sentence(
|
| 31 |
+
"你好。",
|
| 32 |
+
{"voice": {"voice_id": "star_neutral", "backend": "kokoro"}},
|
| 33 |
+
{"voice_id": "star_neutral", "enabled": True},
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
self.assertIsNotNone(path)
|
| 37 |
+
self.assertEqual(calls[0]["json"]["voice_id"], "star_neutral")
|
| 38 |
+
self.assertEqual(calls[0]["timeout"].read, tts_engine.DEFAULT_TTS_READ_TIMEOUT_SECONDS)
|
| 39 |
+
self.assertFalse(calls[0]["trust_env"])
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
if __name__ == "__main__":
|
| 43 |
+
unittest.main()
|