Spaces:

build-small-hackathon
/

virtual-characters

Sleeping

App Files Files Community

Fix OAuth workshop login and TTS defaults

by ShadowInk - opened 14 days ago

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+97

-16

Files changed (7) hide show

app.py +4 -4
modal_apps/modal_tts.py +6 -1
src/character_registry.py +8 -8
src/character_workshop.py +14 -2
src/tts_engine.py +8 -1
tests/test_character_workshop.py +14 -0
tests/test_tts_engine.py +43 -0

app.py CHANGED Viewed

@@ -853,7 +853,7 @@ def _initial_state(character_id: str) -> dict:
         "events": [],
         "last_vision_note": None,
         "character": character,
-        "voice": _default_voice_state(character, enabled=True),
     }
@@ -933,8 +933,8 @@ def switch_character(character_id: str):
         gr.update(value=voice["style"]),
         gr.update(value=voice["speed"]),
         gr.update(value=voice["energy"]),
-        gr.update(value=True),
-        "等待新的语音回复。",
         None,
     )
@@ -1557,7 +1557,7 @@ def build_demo() -> gr.Blocks:
                         audio_status = gr.Markdown(value="等待新的语音回复。", elem_classes=["vc-audio-status"])
                         with gr.Accordion("语音控制", open=False):
-                            voice_enabled = gr.Checkbox(value=True, label="生成语音回复")
                             voice_id = gr.Dropdown(
                                 choices=_voice_choices(default_character),
                                 value=default_voice["voice_id"],

         "events": [],
         "last_vision_note": None,
         "character": character,
+        "voice": _default_voice_state(character, enabled=False),
     }
         gr.update(value=voice["style"]),
         gr.update(value=voice["speed"]),
         gr.update(value=voice["energy"]),
+        gr.update(value=False),
+        "语音生成默认关闭，打开后会额外等待 TTS 服务。",
         None,
     )
                         audio_status = gr.Markdown(value="等待新的语音回复。", elem_classes=["vc-audio-status"])
                         with gr.Accordion("语音控制", open=False):
+                            voice_enabled = gr.Checkbox(value=False, label="生成语音回复（会额外等待 TTS）")
                             voice_id = gr.Dropdown(
                                 choices=_voice_choices(default_character),
                                 value=default_voice["voice_id"],

modal_apps/modal_tts.py CHANGED Viewed

@@ -15,6 +15,11 @@ LANGUAGE_ID = os.environ.get("VC_TTS_LANGUAGE_ID", "zh")
 VOICE_DIR = "/voices"
 HF_SECRET_NAME = os.environ.get("VC_HF_SECRET_NAME", "hf-token")
 HF_SECRETS = [] if os.environ.get("VC_SKIP_HF_SECRET") == "1" else [modal.Secret.from_name(HF_SECRET_NAME)]
 image = (
@@ -212,7 +217,7 @@ class CharacterTTS:
         import torchaudio as ta
         if self.backend == "kokoro":
-            voice = "zf_xiaoxiao" if voice_id == "default" else voice_id
             generator = self.model(text, voice=voice)
             chunks = []
             for _, _, audio in generator:

 VOICE_DIR = "/voices"
 HF_SECRET_NAME = os.environ.get("VC_HF_SECRET_NAME", "hf-token")
 HF_SECRETS = [] if os.environ.get("VC_SKIP_HF_SECRET") == "1" else [modal.Secret.from_name(HF_SECRET_NAME)]
+KOKORO_VOICE_ALIASES = {
+    "default": "zm_yunxi",
+    "star_neutral": "zm_yunxi",
+    "star_soft": "zm_yunxi",
+}
 image = (
         import torchaudio as ta
         if self.backend == "kokoro":
+            voice = KOKORO_VOICE_ALIASES.get(voice_id, voice_id)
             generator = self.model(text, voice=voice)
             chunks = []
             for _, _, audio in generator:

src/character_registry.py CHANGED Viewed

@@ -49,16 +49,16 @@ CHARACTER_PACKAGES: dict[str, dict] = {
         },
         "skills": ["daily_chat", "emotional_support", "battle_focus", "style_guard"],
         "voice": {
-            "voice_id": "star_soft",
-            "voice_label": "柔和通讯",
-            "backend": "chatterbox",
-            "audio_prompt_path": "star_soft.wav",
-            "pace": "slow",
-            "energy": 0.55,
-            "default_style": "soft",
             "allowed_emotions": ["neutral", "soft", "happy", "concerned", "battle_focus"],
         },
-        "voice_options": [("柔和通讯", "star_soft"), ("明亮少女", "zf_xiaoxiao"), ("中性叙述", "default")],
         "visual": {
             "accent": "#67e8f9",
             "background": "#111827",

         },
         "skills": ["daily_chat", "emotional_support", "battle_focus", "style_guard"],
         "voice": {
+            "voice_id": "star_neutral",
+            "voice_label": "中性通讯",
+            "backend": "kokoro",
+            "audio_prompt_path": None,
+            "pace": "normal",
+            "energy": 0.48,
+            "default_style": "neutral",
             "allowed_emotions": ["neutral", "soft", "happy", "concerned", "battle_focus"],
         },
+        "voice_options": [("中性通讯", "star_neutral"), ("低声叙述", "zm_yunxi"), ("明亮少女", "zf_xiaoxiao")],
         "visual": {
             "accent": "#67e8f9",
             "background": "#111827",

src/character_workshop.py CHANGED Viewed

@@ -845,9 +845,21 @@ def _is_relative_to(path: Path, root: Path) -> bool:
 def _profile_value(profile: Any, key: str) -> str:
     if profile is None:
         return ""
     if isinstance(profile, dict):
-        return str(profile.get(key) or "").strip()
-    return str(getattr(profile, key, "") or "").strip()
 def _hash_user_id(value: str) -> str:

 def _profile_value(profile: Any, key: str) -> str:
     if profile is None:
         return ""
+    aliases = {
+        "username": ("username", "preferred_username", "login"),
+        "name": ("name", "display_name", "username", "preferred_username"),
+    }.get(key, (key,))
     if isinstance(profile, dict):
+        for alias in aliases:
+            value = profile.get(alias)
+            if value:
+                return str(value).strip()
+        return ""
+    for alias in aliases:
+        value = getattr(profile, alias, "")
+        if value:
+            return str(value).strip()
+    return ""
 def _hash_user_id(value: str) -> str:

src/tts_engine.py CHANGED Viewed

@@ -6,6 +6,7 @@ from urllib.parse import urlparse
 DEFAULT_TTS_URL = "https://veronicaulises0--virtual-characters-tts-charactertts-tts.modal.run"
 def synthesize_sentence(text: str, character: dict, voice_state: dict | None = None) -> str | None:
@@ -26,7 +27,13 @@ def synthesize_sentence(text: str, character: dict, voice_state: dict | None = N
         "audio_prompt_path": voice_state.get("audio_prompt_path") or voice.get("audio_prompt_path"),
         "backend": voice.get("backend", "chatterbox"),
     }
-    response = httpx.post(url, json=payload, timeout=180, trust_env=False)
     response.raise_for_status()
     path = Path(tempfile.gettempdir()) / f"virtual_characters_tts_{uuid.uuid4().hex}.wav"
     path.write_bytes(response.content)

 DEFAULT_TTS_URL = "https://veronicaulises0--virtual-characters-tts-charactertts-tts.modal.run"
+DEFAULT_TTS_READ_TIMEOUT_SECONDS = 45.0
 def synthesize_sentence(text: str, character: dict, voice_state: dict | None = None) -> str | None:
         "audio_prompt_path": voice_state.get("audio_prompt_path") or voice.get("audio_prompt_path"),
         "backend": voice.get("backend", "chatterbox"),
     }
+    timeout = httpx.Timeout(
+        connect=float(os.environ.get("VC_TTS_CONNECT_TIMEOUT", "8")),
+        read=float(os.environ.get("VC_TTS_READ_TIMEOUT", str(DEFAULT_TTS_READ_TIMEOUT_SECONDS))),
+        write=8,
+        pool=8,
+    )
+    response = httpx.post(url, json=payload, timeout=timeout, trust_env=False)
     response.raise_for_status()
     path = Path(tempfile.gettempdir()) / f"virtual_characters_tts_{uuid.uuid4().hex}.wav"
     path.write_bytes(response.content)

tests/test_character_workshop.py CHANGED Viewed

@@ -148,6 +148,20 @@ class CharacterWorkshopTests(unittest.TestCase):
         with self.assertRaises(ValueError):
             workshop.load_workshop_run(alice_runs[0][1], user=bob)
     def test_resume_main_candidates_from_manifest(self) -> None:
         user = workshop.get_current_user(_Profile("alice", "Alice"))
         state = workshop.create_initial_state(

         with self.assertRaises(ValueError):
             workshop.load_workshop_run(alice_runs[0][1], user=bob)
+    def test_hf_oauth_profile_uses_preferred_username(self) -> None:
+        profile = {
+            "name": "Shadow Ink",
+            "preferred_username": "ShadowInk",
+            "profile": "https://huggingface.co/ShadowInk",
+            "picture": "",
+        }
+        user = workshop.get_current_user(profile)
+        self.assertTrue(user.authenticated)
+        self.assertEqual(user.username, "ShadowInk")
+        self.assertEqual(user.storage_key, "shadowink")
     def test_resume_main_candidates_from_manifest(self) -> None:
         user = workshop.get_current_user(_Profile("alice", "Alice"))
         state = workshop.create_initial_state(

tests/test_tts_engine.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from __future__ import annotations
+import unittest
+from unittest.mock import patch
+from src import tts_engine
+from modal_apps import modal_tts
+class TtsEngineTests(unittest.TestCase):
+    def test_kokoro_star_voice_maps_to_neutral_voice(self) -> None:
+        self.assertEqual(modal_tts.KOKORO_VOICE_ALIASES["star_neutral"], "zm_yunxi")
+        self.assertEqual(modal_tts.KOKORO_VOICE_ALIASES["default"], "zm_yunxi")
+    def test_synthesize_uses_short_timeout(self) -> None:
+        class Response:
+            content = b"RIFF....WAVE"
+            def raise_for_status(self):
+                return None
+        calls = []
+        def fake_post(url, json, timeout, trust_env):
+            calls.append({"url": url, "json": json, "timeout": timeout, "trust_env": trust_env})
+            return Response()
+        with patch.dict("os.environ", {"VC_MODAL_TTS_URL": "https://tts.example.test"}, clear=False):
+            with patch("httpx.post", fake_post):
+                path = tts_engine.synthesize_sentence(
+                    "你好。",
+                    {"voice": {"voice_id": "star_neutral", "backend": "kokoro"}},
+                    {"voice_id": "star_neutral", "enabled": True},
+                )
+        self.assertIsNotNone(path)
+        self.assertEqual(calls[0]["json"]["voice_id"], "star_neutral")
+        self.assertEqual(calls[0]["timeout"].read, tts_engine.DEFAULT_TTS_READ_TIMEOUT_SECONDS)
+        self.assertFalse(calls[0]["trust_env"])
+if __name__ == "__main__":
+    unittest.main()