virtual-characters / demo_modal_stack.py
ShadowInk's picture
Deploy Virtual Characters for Build Small Hackathon
005e075 verified
Raw
History Blame Contribute Delete
4.48 kB
import re
import tempfile
import time
from pathlib import Path
import gradio as gr
from modal_apps.modal_llm import PersonaLLM
from modal_apps.modal_tts import CharacterTTS
from src.character_registry import CHARACTER_PACKAGES, get_character
from src.stage_driver import render_character_stage
APP_CSS = """
#modal-demo-stage iframe, #modal-demo-stage { min-height: 460px; }
"""
def _character_choices() -> list[tuple[str, str]]:
return [(character["display_name"], character_id) for character_id, character in CHARACTER_PACKAGES.items()]
def _split_sentences(text: str) -> list[str]:
return [part.strip() for part in re.split(r"(?<=[。!?!?;;])\\s*", text) if part.strip()] or [text.strip()]
def _write_wav(audio: bytes, prefix: str = "vc_tts_") -> str:
handle = tempfile.NamedTemporaryFile(prefix=prefix, suffix=".wav", delete=False)
handle.write(audio)
handle.close()
return handle.name
def chat_once(message: str, history: list[dict], character_id: str, tts_enabled: bool):
if not message.strip():
yield history, None, {"status": "empty"}
return
character = get_character(character_id)
history = history + [{"role": "user", "content": message}, {"role": "assistant", "content": "Modal LLM 正在生成..."}]
yield history, None, {"status": "llm_generating"}
started = time.perf_counter()
llm_result = PersonaLLM().generate_text.remote(
user_text=message,
character=character,
max_new_tokens=120,
)
reply = llm_result["text"]
history[-1]["content"] = reply
debug = {
"status": "llm_done",
"llm_remote_s": llm_result.get("remote_s"),
"llm_output_tokens": llm_result.get("output_tokens"),
"client_elapsed_s": round(time.perf_counter() - started, 3),
}
yield history, None, debug
if not tts_enabled:
return
for index, sentence in enumerate(_split_sentences(reply), start=1):
if not sentence:
continue
debug = {**debug, "status": "tts_generating", "tts_sentence_index": index, "tts_sentence": sentence}
yield history, None, debug
tts_started = time.perf_counter()
audio = CharacterTTS().synthesize.remote(text=sentence, emotion="neutral")
audio_path = _write_wav(audio)
debug = {
**debug,
"status": "tts_chunk_done",
"tts_sentence_index": index,
"tts_remote_client_s": round(time.perf_counter() - tts_started, 3),
"audio_path": audio_path,
}
yield history, audio_path, debug
def switch_character(character_id: str):
character = get_character(character_id)
stage = {"expression": "idle", "motion": "breathe", "intensity": 0.35}
return character["summary"], render_character_stage(character, stage)
def build_demo() -> gr.Blocks:
default_id = "memory_girl"
default_character = get_character(default_id)
default_stage = {"expression": "idle", "motion": "breathe", "intensity": 0.35}
with gr.Blocks(title="Modal Virtual Character Smoke Demo") as demo:
with gr.Row():
with gr.Column(scale=1, min_width=260):
character_select = gr.Radio(_character_choices(), value=default_id, label="角色")
character_summary = gr.Markdown(default_character["summary"])
tts_enabled = gr.Checkbox(value=True, label="启用 Chatterbox TTS")
with gr.Column(scale=2, min_width=360):
stage = gr.HTML(
render_character_stage(default_character, default_stage),
elem_id="modal-demo-stage",
min_height=460,
)
with gr.Column(scale=2, min_width=360):
chatbot = gr.Chatbot(label="Modal 对话", height=380)
message = gr.Textbox(label="输入", lines=2, submit_btn=True)
audio = gr.Audio(label="分句语音", autoplay=True)
debug = gr.JSON(label="调试")
character_select.change(switch_character, inputs=[character_select], outputs=[character_summary, stage])
message.submit(
chat_once,
inputs=[message, chatbot, character_select, tts_enabled],
outputs=[chatbot, audio, debug],
).then(lambda: "", outputs=[message])
return demo
if __name__ == "__main__":
build_demo().queue().launch(css=APP_CSS, server_name="127.0.0.1", server_port=7862)