Fix OAuth workshop login and TTS defaults

#1
by ShadowInk - opened
app.py CHANGED
@@ -853,7 +853,7 @@ def _initial_state(character_id: str) -> dict:
853
  "events": [],
854
  "last_vision_note": None,
855
  "character": character,
856
- "voice": _default_voice_state(character, enabled=True),
857
  }
858
 
859
 
@@ -933,8 +933,8 @@ def switch_character(character_id: str):
933
  gr.update(value=voice["style"]),
934
  gr.update(value=voice["speed"]),
935
  gr.update(value=voice["energy"]),
936
- gr.update(value=True),
937
- "等待新的语音回复。",
938
  None,
939
  )
940
 
@@ -1557,7 +1557,7 @@ def build_demo() -> gr.Blocks:
1557
  audio_status = gr.Markdown(value="等待新的语音回复。", elem_classes=["vc-audio-status"])
1558
 
1559
  with gr.Accordion("语音控制", open=False):
1560
- voice_enabled = gr.Checkbox(value=True, label="生成语音回复")
1561
  voice_id = gr.Dropdown(
1562
  choices=_voice_choices(default_character),
1563
  value=default_voice["voice_id"],
 
853
  "events": [],
854
  "last_vision_note": None,
855
  "character": character,
856
+ "voice": _default_voice_state(character, enabled=False),
857
  }
858
 
859
 
 
933
  gr.update(value=voice["style"]),
934
  gr.update(value=voice["speed"]),
935
  gr.update(value=voice["energy"]),
936
+ gr.update(value=False),
937
+ "语音生成默认关闭,打开后会额外等待 TTS 服务。",
938
  None,
939
  )
940
 
 
1557
  audio_status = gr.Markdown(value="等待新的语音回复。", elem_classes=["vc-audio-status"])
1558
 
1559
  with gr.Accordion("语音控制", open=False):
1560
+ voice_enabled = gr.Checkbox(value=False, label="生成语音回复(会额外等待 TTS)")
1561
  voice_id = gr.Dropdown(
1562
  choices=_voice_choices(default_character),
1563
  value=default_voice["voice_id"],
modal_apps/modal_tts.py CHANGED
@@ -15,6 +15,11 @@ LANGUAGE_ID = os.environ.get("VC_TTS_LANGUAGE_ID", "zh")
15
  VOICE_DIR = "/voices"
16
  HF_SECRET_NAME = os.environ.get("VC_HF_SECRET_NAME", "hf-token")
17
  HF_SECRETS = [] if os.environ.get("VC_SKIP_HF_SECRET") == "1" else [modal.Secret.from_name(HF_SECRET_NAME)]
 
 
 
 
 
18
 
19
 
20
  image = (
@@ -212,7 +217,7 @@ class CharacterTTS:
212
  import torchaudio as ta
213
 
214
  if self.backend == "kokoro":
215
- voice = "zf_xiaoxiao" if voice_id == "default" else voice_id
216
  generator = self.model(text, voice=voice)
217
  chunks = []
218
  for _, _, audio in generator:
 
15
  VOICE_DIR = "/voices"
16
  HF_SECRET_NAME = os.environ.get("VC_HF_SECRET_NAME", "hf-token")
17
  HF_SECRETS = [] if os.environ.get("VC_SKIP_HF_SECRET") == "1" else [modal.Secret.from_name(HF_SECRET_NAME)]
18
+ KOKORO_VOICE_ALIASES = {
19
+ "default": "zm_yunxi",
20
+ "star_neutral": "zm_yunxi",
21
+ "star_soft": "zm_yunxi",
22
+ }
23
 
24
 
25
  image = (
 
217
  import torchaudio as ta
218
 
219
  if self.backend == "kokoro":
220
+ voice = KOKORO_VOICE_ALIASES.get(voice_id, voice_id)
221
  generator = self.model(text, voice=voice)
222
  chunks = []
223
  for _, _, audio in generator:
src/character_registry.py CHANGED
@@ -49,16 +49,16 @@ CHARACTER_PACKAGES: dict[str, dict] = {
49
  },
50
  "skills": ["daily_chat", "emotional_support", "battle_focus", "style_guard"],
51
  "voice": {
52
- "voice_id": "star_soft",
53
- "voice_label": "柔和通讯",
54
- "backend": "chatterbox",
55
- "audio_prompt_path": "star_soft.wav",
56
- "pace": "slow",
57
- "energy": 0.55,
58
- "default_style": "soft",
59
  "allowed_emotions": ["neutral", "soft", "happy", "concerned", "battle_focus"],
60
  },
61
- "voice_options": [("柔和通讯", "star_soft"), ("明亮少女", "zf_xiaoxiao"), ("中性叙述", "default")],
62
  "visual": {
63
  "accent": "#67e8f9",
64
  "background": "#111827",
 
49
  },
50
  "skills": ["daily_chat", "emotional_support", "battle_focus", "style_guard"],
51
  "voice": {
52
+ "voice_id": "star_neutral",
53
+ "voice_label": "中性通讯",
54
+ "backend": "kokoro",
55
+ "audio_prompt_path": None,
56
+ "pace": "normal",
57
+ "energy": 0.48,
58
+ "default_style": "neutral",
59
  "allowed_emotions": ["neutral", "soft", "happy", "concerned", "battle_focus"],
60
  },
61
+ "voice_options": [("中性通讯", "star_neutral"), ("低声叙述", "zm_yunxi"), ("明亮少女", "zf_xiaoxiao")],
62
  "visual": {
63
  "accent": "#67e8f9",
64
  "background": "#111827",
src/character_workshop.py CHANGED
@@ -845,9 +845,21 @@ def _is_relative_to(path: Path, root: Path) -> bool:
845
  def _profile_value(profile: Any, key: str) -> str:
846
  if profile is None:
847
  return ""
 
 
 
 
848
  if isinstance(profile, dict):
849
- return str(profile.get(key) or "").strip()
850
- return str(getattr(profile, key, "") or "").strip()
 
 
 
 
 
 
 
 
851
 
852
 
853
  def _hash_user_id(value: str) -> str:
 
845
  def _profile_value(profile: Any, key: str) -> str:
846
  if profile is None:
847
  return ""
848
+ aliases = {
849
+ "username": ("username", "preferred_username", "login"),
850
+ "name": ("name", "display_name", "username", "preferred_username"),
851
+ }.get(key, (key,))
852
  if isinstance(profile, dict):
853
+ for alias in aliases:
854
+ value = profile.get(alias)
855
+ if value:
856
+ return str(value).strip()
857
+ return ""
858
+ for alias in aliases:
859
+ value = getattr(profile, alias, "")
860
+ if value:
861
+ return str(value).strip()
862
+ return ""
863
 
864
 
865
  def _hash_user_id(value: str) -> str:
src/tts_engine.py CHANGED
@@ -6,6 +6,7 @@ from urllib.parse import urlparse
6
 
7
 
8
  DEFAULT_TTS_URL = "https://veronicaulises0--virtual-characters-tts-charactertts-tts.modal.run"
 
9
 
10
 
11
  def synthesize_sentence(text: str, character: dict, voice_state: dict | None = None) -> str | None:
@@ -26,7 +27,13 @@ def synthesize_sentence(text: str, character: dict, voice_state: dict | None = N
26
  "audio_prompt_path": voice_state.get("audio_prompt_path") or voice.get("audio_prompt_path"),
27
  "backend": voice.get("backend", "chatterbox"),
28
  }
29
- response = httpx.post(url, json=payload, timeout=180, trust_env=False)
 
 
 
 
 
 
30
  response.raise_for_status()
31
  path = Path(tempfile.gettempdir()) / f"virtual_characters_tts_{uuid.uuid4().hex}.wav"
32
  path.write_bytes(response.content)
 
6
 
7
 
8
  DEFAULT_TTS_URL = "https://veronicaulises0--virtual-characters-tts-charactertts-tts.modal.run"
9
+ DEFAULT_TTS_READ_TIMEOUT_SECONDS = 45.0
10
 
11
 
12
  def synthesize_sentence(text: str, character: dict, voice_state: dict | None = None) -> str | None:
 
27
  "audio_prompt_path": voice_state.get("audio_prompt_path") or voice.get("audio_prompt_path"),
28
  "backend": voice.get("backend", "chatterbox"),
29
  }
30
+ timeout = httpx.Timeout(
31
+ connect=float(os.environ.get("VC_TTS_CONNECT_TIMEOUT", "8")),
32
+ read=float(os.environ.get("VC_TTS_READ_TIMEOUT", str(DEFAULT_TTS_READ_TIMEOUT_SECONDS))),
33
+ write=8,
34
+ pool=8,
35
+ )
36
+ response = httpx.post(url, json=payload, timeout=timeout, trust_env=False)
37
  response.raise_for_status()
38
  path = Path(tempfile.gettempdir()) / f"virtual_characters_tts_{uuid.uuid4().hex}.wav"
39
  path.write_bytes(response.content)
tests/test_character_workshop.py CHANGED
@@ -148,6 +148,20 @@ class CharacterWorkshopTests(unittest.TestCase):
148
  with self.assertRaises(ValueError):
149
  workshop.load_workshop_run(alice_runs[0][1], user=bob)
150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  def test_resume_main_candidates_from_manifest(self) -> None:
152
  user = workshop.get_current_user(_Profile("alice", "Alice"))
153
  state = workshop.create_initial_state(
 
148
  with self.assertRaises(ValueError):
149
  workshop.load_workshop_run(alice_runs[0][1], user=bob)
150
 
151
+ def test_hf_oauth_profile_uses_preferred_username(self) -> None:
152
+ profile = {
153
+ "name": "Shadow Ink",
154
+ "preferred_username": "ShadowInk",
155
+ "profile": "https://huggingface.co/ShadowInk",
156
+ "picture": "",
157
+ }
158
+
159
+ user = workshop.get_current_user(profile)
160
+
161
+ self.assertTrue(user.authenticated)
162
+ self.assertEqual(user.username, "ShadowInk")
163
+ self.assertEqual(user.storage_key, "shadowink")
164
+
165
  def test_resume_main_candidates_from_manifest(self) -> None:
166
  user = workshop.get_current_user(_Profile("alice", "Alice"))
167
  state = workshop.create_initial_state(
tests/test_tts_engine.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import unittest
4
+ from unittest.mock import patch
5
+
6
+ from src import tts_engine
7
+ from modal_apps import modal_tts
8
+
9
+
10
+ class TtsEngineTests(unittest.TestCase):
11
+ def test_kokoro_star_voice_maps_to_neutral_voice(self) -> None:
12
+ self.assertEqual(modal_tts.KOKORO_VOICE_ALIASES["star_neutral"], "zm_yunxi")
13
+ self.assertEqual(modal_tts.KOKORO_VOICE_ALIASES["default"], "zm_yunxi")
14
+
15
+ def test_synthesize_uses_short_timeout(self) -> None:
16
+ class Response:
17
+ content = b"RIFF....WAVE"
18
+
19
+ def raise_for_status(self):
20
+ return None
21
+
22
+ calls = []
23
+
24
+ def fake_post(url, json, timeout, trust_env):
25
+ calls.append({"url": url, "json": json, "timeout": timeout, "trust_env": trust_env})
26
+ return Response()
27
+
28
+ with patch.dict("os.environ", {"VC_MODAL_TTS_URL": "https://tts.example.test"}, clear=False):
29
+ with patch("httpx.post", fake_post):
30
+ path = tts_engine.synthesize_sentence(
31
+ "你好。",
32
+ {"voice": {"voice_id": "star_neutral", "backend": "kokoro"}},
33
+ {"voice_id": "star_neutral", "enabled": True},
34
+ )
35
+
36
+ self.assertIsNotNone(path)
37
+ self.assertEqual(calls[0]["json"]["voice_id"], "star_neutral")
38
+ self.assertEqual(calls[0]["timeout"].read, tts_engine.DEFAULT_TTS_READ_TIMEOUT_SECONDS)
39
+ self.assertFalse(calls[0]["trust_env"])
40
+
41
+
42
+ if __name__ == "__main__":
43
+ unittest.main()