Spaces:

dsa2dsads
/

VoiceDirector

Paused

App Files Files Community

VoiceDirector / core /ui.py

dsa2dsads

Fix ZeroGPU preview callbacks to stay CPU-only and avoid main-process CUDA init

88733e7 verified about 2 months ago

raw

history blame contribute delete

31.5 kB

	import os
	from datetime import datetime, timezone
	from functools import partial
	from pathlib import Path

	try:
	import spaces # pyright: ignore[reportMissingImports]
	except ImportError: # pragma: no cover - local fallback when not running on HF Spaces.
	class _SpacesShim:
	@staticmethod
	def GPU(decorator_args, *decorator_kwargs):
	if decorator_args and callable(decorator_args[0]) and len(decorator_args) == 1 and not decorator_kwargs:
	return decorator_args[0]

	def _decorator(func):
	return func

	return _decorator

	spaces = _SpacesShim()

	from benchmarks.harness import (
	DEFAULT_STAGE1_SMOKE_SCRIPT_IDS,
	evaluate_internal_benchmark,
	evaluate_stage1_integration_smoke,
	)
	from core.adapters import (
	AVAILABLE_EDGE_TTS_VOICES,
	DEFAULT_EDGE_TTS_VOICE,
	EdgeTTSAdapter,
	FasterWhisperAdapter,
	FunASRSenseVoiceAdapter,
	MockASRAdapter,
	MockTTSAdapter,
	)
	from core.pipeline import (
	AUTO_REPLY_DELAY_SECONDS,
	create_streaming_turn_state,
	maybe_auto_reply_streaming_turn,
	preview_streaming_turn,
	run_controlled_turn,
	run_streaming_turn,
	stop_streaming_turn_state,
	)
	from utils.config_loader import (
	DEFAULT_BENCHMARK_CSV_PATH,
	DEFAULT_EVALUATION_CONTRACT_PATH,
	DEFAULT_RULES_PATH,
	TABLE_HEADERS,
	load_evaluation_contract,
	load_rule_catalog,
	rules_from_editor_rows,
	rules_to_table_rows,
	)
	from utils.tracking import LocalJsonlTracker, RunMetadata, make_run_id, resolve_code_version

	MINIMAL_UI_NOTICE = "当前为演示版，优先展示实时接话体验。首次加载模型可能稍慢；麦克风不可用时可改用下方兜底输入。"


	def _is_zero_gpu_runtime() -> bool:
	return os.getenv("SPACES_ZERO_GPU", "").lower() in {"1", "t", "true"}


	def _build_stream_preview_asr_adapters():
	# ZeroGPU forbids CUDA init from the main process, so preview callbacks must
	# stay on CPU when they are not wrapped by @spaces.GPU.
	if _is_zero_gpu_runtime():
	return {
	"funasr": FunASRSenseVoiceAdapter(device_order=("cpu",)),
	"faster-whisper": FasterWhisperAdapter(device_order=(("cpu", "int8"),)),
	}

	return {
	"funasr": FunASRSenseVoiceAdapter(),
	"faster-whisper": FasterWhisperAdapter(),
	}


	@spaces.GPU
	def _process_turn_for_space(
	audio_input,
	manual_text,
	asr_backend_key,
	tts_backend_key,
	tts_voice,
	rule_rows,
	*,
	tracker,
	manual_asr,
	asr_adapters,
	tts_adapters,
	code_version,
	):
	session_rules = rules_from_editor_rows(rule_rows)
	audio_path = Path(audio_input) if audio_input else None
	hypothesis_ids = ["H-001", "H-002", "H-003"] if audio_path else ["H-002", "H-004"]
	route_id = "R-001" if asr_backend_key == "funasr" else "R-002" if asr_backend_key == "faster-whisper" else "R-001"
	run_id = make_run_id(f"stage1-turn-{asr_backend_key or 'manual'}")
	metadata = RunMetadata(
	run_id=run_id,
	experiment_name="stage1-controlled-turn",
	hypothesis_ids=hypothesis_ids,
	baseline_run_id=None,
	route_id=route_id,
	seed=42,
	code_version=code_version,
	data_version=str(audio_path) if audio_path else "ui-manual-text",
	started_at=datetime.now(timezone.utc).isoformat(),
	status="running",
	)

	try:
	outcome = run_controlled_turn(
	run_id=run_id,
	audio_path=audio_path,
	manual_text=manual_text,
	rules=session_rules,
	audio_asr=asr_adapters.get(asr_backend_key),
	manual_asr=manual_asr,
	tts_adapter=tts_adapters.get(tts_backend_key),
	tts_voice=tts_voice,
	tracker=tracker,
	metadata=metadata,
	)
	except Exception as exc:
	return (
	"",
	"",
	"未命中",
	"未命中",
	"",
	f"处理失败：{exc}",
	None,
	_format_run_summary(
	run_id=run_id,
	input_mode="audio" if audio_path else "manual_text",
	asr_backend=asr_backend_key,
	asr_latency_ms=None,
	rule_id=None,
	matched_keyword=None,
	tts_status="失败",
	runtime_note=None,
	),
	)

	match = outcome.match_result
	tts_preview = outcome.tts_preview
	reply_text_value = match.reply or ("当前未命中规则。" if not match.matched else "")
	tts_status_text = _format_tts_status(tts_preview, match.matched, finalized=True)
	summary = _format_run_summary(
	run_id=outcome.run_id,
	input_mode=outcome.input_mode,
	asr_backend=outcome.asr_result.backend,
	asr_latency_ms=outcome.asr_result.latency_ms,
	rule_id=match.rule_id,
	matched_keyword=match.matched_keyword,
	tts_status=tts_status_text,
	runtime_note=outcome.asr_result.runtime_note,
	)
	return (
	outcome.asr_result.transcript,
	outcome.asr_result.partial_transcript,
	match.matched_keyword or "未命中",
	match.rule_id or "未命中",
	reply_text_value,
	tts_status_text,
	tts_preview.audio_path if tts_preview is not None else None,
	summary,
	)


	def _preview_live_microphone_stream_for_space(
	audio_chunk,
	stream_state,
	asr_backend_key,
	rule_rows,
	*,
	asr_adapters,
	):
	session_rules = rules_from_editor_rows(rule_rows)

	try:
	updated_state, asr_result, match = preview_streaming_turn(
	state=stream_state,
	audio_chunk=audio_chunk,
	rules=session_rules,
	audio_asr=asr_adapters[asr_backend_key],
	)
	except Exception as exc:
	safe_state = stream_state or create_streaming_turn_state(recording_active=True)
	return (
	safe_state,
	safe_state.transcript,
	safe_state.partial_transcript,
	safe_state.matched_keyword or "未命中",
	safe_state.matched_rule_id or "未命中",
	safe_state.matched_reply_text or "",
	f"实时识别失败：{exc}",
	safe_state.auto_reply_audio_path,
	_format_stream_preview_summary(state=safe_state, error=str(exc)),
	)

	reply_preview = match.reply or ("当前尚未命中规则。" if updated_state.transcript else "")
	return (
	updated_state,
	updated_state.transcript,
	updated_state.partial_transcript,
	match.matched_keyword or "未命中",
	match.rule_id or "未命中",
	reply_preview,
	_format_live_stream_status(updated_state, asr_result=asr_result, finalized=False),
	updated_state.auto_reply_audio_path,
	_format_stream_preview_summary(state=updated_state),
	)


	def _auto_reply_live_microphone_stream_for_space(
	stream_state,
	tts_backend_key,
	tts_voice,
	*,
	tts_adapters,
	):
	state = stream_state or create_streaming_turn_state()

	try:
	updated_state, tts_preview = maybe_auto_reply_streaming_turn(
	state=state,
	tts_adapter=tts_adapters.get(tts_backend_key),
	tts_voice=tts_voice,
	)
	except Exception as exc:
	return (
	state,
	f"自动播报失败：{exc}",
	state.auto_reply_audio_path,
	_format_stream_preview_summary(state=state, error=str(exc)),
	)

	return (
	updated_state,
	_format_live_stream_status(updated_state, finalized=False, auto_reply_triggered=tts_preview is not None),
	updated_state.auto_reply_audio_path,
	_format_stream_preview_summary(state=updated_state, auto_reply_triggered=tts_preview is not None),
	)


	@spaces.GPU
	def _finalize_live_microphone_stream_for_space(
	stream_state,
	asr_backend_key,
	tts_backend_key,
	tts_voice,
	rule_rows,
	*,
	tracker,
	manual_asr,
	asr_adapters,
	tts_adapters,
	code_version,
	):
	safe_state = stop_streaming_turn_state(stream_state or create_streaming_turn_state())
	session_rules = rules_from_editor_rows(rule_rows)
	run_id = make_run_id(f"stage1-live-stream-{asr_backend_key or 'manual'}")
	metadata = RunMetadata(
	run_id=run_id,
	experiment_name="stage1-live-stream-turn",
	hypothesis_ids=["H-001", "H-002", "H-003"],
	baseline_run_id=None,
	route_id="R-001" if asr_backend_key == "funasr" else "R-002",
	seed=42,
	code_version=code_version,
	data_version="ui-live-microphone-stream",
	started_at=datetime.now(timezone.utc).isoformat(),
	status="running",
	)

	reuse_auto_reply = bool(
	safe_state.current_match_key
	and safe_state.auto_reply_key == safe_state.current_match_key
	and safe_state.auto_reply_audio_path
	)

	try:
	outcome = run_streaming_turn(
	run_id=run_id,
	state=safe_state,
	rules=session_rules,
	audio_asr=asr_adapters[asr_backend_key],
	manual_asr=manual_asr,
	tts_adapter=None if reuse_auto_reply else tts_adapters.get(tts_backend_key),
	tts_voice=tts_voice,
	tracker=tracker,
	metadata=metadata,
	)
	except Exception as exc:
	return (
	create_streaming_turn_state(),
	safe_state.transcript,
	safe_state.partial_transcript,
	safe_state.matched_keyword or "未命中",
	safe_state.matched_rule_id or "未命中",
	safe_state.matched_reply_text or "",
	f"结束录音时处理失败：{exc}",
	safe_state.auto_reply_audio_path,
	_format_stream_preview_summary(state=safe_state, error=str(exc), finalized=True),
	)

	match = outcome.match_result
	tts_preview = outcome.tts_preview
	reply_text_value = safe_state.auto_reply_text or match.reply or ("当前未命中规则。" if not match.matched else "")
	tts_status_text = _format_live_stream_status(
	safe_state,
	asr_result=outcome.asr_result,
	finalized=True,
	auto_reply_triggered=reuse_auto_reply or tts_preview is not None,
	)
	summary = _format_run_summary(
	run_id=outcome.run_id,
	input_mode=outcome.input_mode,
	asr_backend=outcome.asr_result.backend,
	asr_latency_ms=outcome.asr_result.latency_ms,
	rule_id=match.rule_id,
	matched_keyword=match.matched_keyword,
	tts_status=tts_status_text,
	runtime_note=outcome.asr_result.runtime_note,
	)
	return (
	create_streaming_turn_state(),
	outcome.asr_result.transcript,
	outcome.asr_result.partial_transcript,
	match.matched_keyword or "未命中",
	match.rule_id or "未命中",
	reply_text_value,
	tts_status_text,
	safe_state.auto_reply_audio_path if reuse_auto_reply else (tts_preview.audio_path if tts_preview is not None else None),
	summary,
	)


	@spaces.GPU
	def _run_stage1_smoke_for_space(
	asr_backend_key,
	tts_backend_key,
	tts_voice,
	rule_rows,
	script_ids_text,
	*,
	benchmark_csv_path,
	tracker,
	asr_adapters,
	tts_adapters,
	code_version,
	):
	session_rules = rules_from_editor_rows(rule_rows)
	selected_script_ids = _parse_script_ids(script_ids_text)
	run_id = make_run_id(f"stage1-smoke-{asr_backend_key}")
	route_id = "R-001" if asr_backend_key == "funasr" else "R-002"
	metadata = RunMetadata(
	run_id=run_id,
	experiment_name="stage1-synthetic-audio-smoke",
	hypothesis_ids=["H-001", "H-002", "H-003", "H-005"],
	baseline_run_id=None,
	route_id=route_id,
	seed=42,
	code_version=code_version,
	data_version=f"{benchmark_csv_path}#synthetic-audio-smoke",
	started_at=datetime.now(timezone.utc).isoformat(),
	status="running",
	)
	summary = evaluate_stage1_integration_smoke(
	benchmark_csv_path,
	session_rules,
	asr_adapters[asr_backend_key],
	tts_adapters[tts_backend_key],
	tts_adapters[tts_backend_key],
	script_ids=selected_script_ids,
	tracker=tracker,
	metadata=metadata,
	input_voice=tts_voice,
	reply_voice=tts_voice,
	)
	return summary.to_markdown()


	def build_app(
	benchmark_csv_path: Path = DEFAULT_BENCHMARK_CSV_PATH,
	rules_path: Path = DEFAULT_RULES_PATH,
	evaluation_contract_path: Path = DEFAULT_EVALUATION_CONTRACT_PATH,
	):
	gr = _require_gradio()
	seed_rules = load_rule_catalog(rules_path)
	evaluation_contract = load_evaluation_contract(evaluation_contract_path)
	tracker = LocalJsonlTracker(Path(__file__).resolve().parents[1] / "results" / "tracking")
	manual_asr = MockASRAdapter()
	asr_adapters = {
	"funasr": FunASRSenseVoiceAdapter(),
	"faster-whisper": FasterWhisperAdapter(),
	}
	# Keep preview ASR instances separate so a ZeroGPU live-preview CPU fallback
	# does not pin the finalize/upload paths to CPU as well.
	stream_preview_asr_adapters = _build_stream_preview_asr_adapters()
	tts_adapters = {
	"edge-tts": EdgeTTSAdapter(),
	"mock": MockTTSAdapter(),
	}
	code_version = resolve_code_version(Path(__file__).resolve().parents[1])

	def apply_rules(rule_rows):
	session_rules = rules_from_editor_rows(rule_rows)
	return rules_to_table_rows(session_rules), _format_rule_status(session_rules)

	def start_live_microphone_stream():
	return (
	gr.Timer(value=0.25, active=True),
	create_streaming_turn_state(recording_active=True),
	"",
	"",
	"未命中",
	"未命中",
	"",
	f"实时监听中，命中规则后稳定 {int(AUTO_REPLY_DELAY_SECONDS)} 秒将自动播放回复语音。",
	None,
	_initial_live_stream_placeholder(),
	)

	process_turn = partial(
	_process_turn_for_space,
	tracker=tracker,
	manual_asr=manual_asr,
	asr_adapters=asr_adapters,
	tts_adapters=tts_adapters,
	code_version=code_version,
	)
	preview_live_stream = partial(
	_preview_live_microphone_stream_for_space,
	asr_adapters=stream_preview_asr_adapters,
	)
	auto_reply_live_stream = partial(
	_auto_reply_live_microphone_stream_for_space,
	tts_adapters=tts_adapters,
	)
	finalize_live_stream = partial(
	_finalize_live_microphone_stream_for_space,
	tracker=tracker,
	manual_asr=manual_asr,
	asr_adapters=asr_adapters,
	tts_adapters=tts_adapters,
	code_version=code_version,
	)

	def stop_live_microphone_stream(stream_state, asr_backend_key, tts_backend_key, tts_voice, rule_rows):
	result = finalize_live_stream(stream_state, asr_backend_key, tts_backend_key, tts_voice, rule_rows)
	return (gr.Timer(value=0.25, active=False), *result)

	def preview_benchmark(rule_rows):
	session_rules = rules_from_editor_rows(rule_rows)
	run_id = make_run_id("stage1-rule-preview")
	metadata = RunMetadata(
	run_id=run_id,
	experiment_name="stage1-rule-only-benchmark-preview",
	hypothesis_ids=["H-002"],
	baseline_run_id=None,
	route_id="R-001",
	seed=42,
	code_version=code_version,
	data_version=str(benchmark_csv_path),
	started_at=datetime.now(timezone.utc).isoformat(),
	status="running",
	)
	summary = evaluate_internal_benchmark(
	benchmark_csv_path,
	session_rules,
	tracker=tracker,
	metadata=metadata,
	)
	return summary.to_markdown()

	run_stage1_smoke = partial(
	_run_stage1_smoke_for_space,
	benchmark_csv_path=benchmark_csv_path,
	tracker=tracker,
	asr_adapters=asr_adapters,
	tts_adapters=tts_adapters,
	code_version=code_version,
	)

	with gr.Blocks(title="VoiceDirector 语音场控接话演示") as app:
	live_stream_state = gr.State(value=create_streaming_turn_state())
	auto_reply_timer = gr.Timer(value=0.25, active=False)

	gr.Markdown("# VoiceDirector 语音场控接话演示")
	gr.Markdown("实时语音接话演示：连续麦克风转写、自动规则匹配、自动回复语音。")
	gr.Markdown(f"提示：{MINIMAL_UI_NOTICE}")

	with gr.Row():
	with gr.Column(scale=5):
	gr.Markdown(
	f"### 主路径：麦克风连续流式识别\n点击麦克风开始说话，识别文本会持续刷新；命中规则稳定 {int(AUTO_REPLY_DELAY_SECONDS)} 秒后会自动播放回复语音。"
	)
	live_audio_input = gr.Audio(
	label="实时麦克风输入（连续流式）",
	sources=["microphone"],
	type="numpy",
	streaming=True,
	)
	asr_backend = gr.Dropdown(
	choices=[
	("FunASR / SenseVoice（主路径）", "funasr"),
	("faster-whisper（兜底）", "faster-whisper"),
	],
	value="funasr",
	label="识别后端",
	)
	tts_backend = gr.Dropdown(
	choices=[("edge-tts（真实语音）", "edge-tts"), ("mock（调试预览）", "mock")],
	value="edge-tts",
	label="回复语音后端",
	)
	tts_voice = gr.Dropdown(
	choices=[(voice, voice) for voice in AVAILABLE_EDGE_TTS_VOICES],
	value=DEFAULT_EDGE_TTS_VOICE,
	label="回复音色",
	)
	with gr.Accordion("上传音频 / 手工输入兜底", open=False):
	upload_audio_input = gr.Audio(
	label="上传音频兜底",
	sources=["upload"],
	type="filepath",
	format="wav",
	)
	manual_text = gr.Textbox(
	label="手工文本兜底",
	placeholder="没有音频时，可直接输入中文文本做规则演示。",
	lines=3,
	)
	run_turn_button = gr.Button("执行上传/手工兜底", variant="secondary")
	recognized_text = gr.Textbox(label="识别文本")
	partial_text = gr.Textbox(label="实时转写预览")
	matched_keyword = gr.Textbox(label="命中关键词")
	matched_rule = gr.Textbox(label="命中规则")
	reply_text = gr.Textbox(label="回复文本", lines=3)
	tts_status = gr.Textbox(label="当前状态")
	reply_audio = gr.Audio(label="回复语音", interactive=False, type="filepath", autoplay=True)
	run_summary = gr.Markdown(value=_initial_live_stream_placeholder())
	with gr.Column(scale=7):
	rule_table = gr.Dataframe(
	headers=TABLE_HEADERS,
	datatype=["str", "str", "str", "str"],
	row_count=(len(seed_rules), "fixed"),
	column_count=(len(TABLE_HEADERS), "fixed"),
	value=rules_to_table_rows(seed_rules),
	label="规则编辑表（当前会话）",
	interactive=True,
	)
	apply_button = gr.Button("应用规则编辑")
	rule_status = gr.Textbox(label="规则编辑状态", value=_format_rule_status(seed_rules))
	gr.Markdown(_format_contract(evaluation_contract))

	with gr.Accordion("基准与烟测面板", open=False):
	gr.Markdown(
	"下面的规则预览保留了内部基准可见性。端到端烟测会使用 edge-tts 生成输入音频来打通真实 ASR/TTS 代码路径。"
	)
	benchmark_button = gr.Button("预览规则基准")
	benchmark_output = gr.Markdown(value=_initial_benchmark_placeholder(benchmark_csv_path))
	script_ids = gr.Textbox(
	label="烟测脚本 ID",
	value=", ".join(DEFAULT_STAGE1_SMOKE_SCRIPT_IDS),
	lines=2,
	)
	smoke_asr_backend = gr.Dropdown(
	choices=[
	("FunASR / SenseVoice（主路径）", "funasr"),
	("faster-whisper（兜底）", "faster-whisper"),
	],
	value="funasr",
	label="烟测识别后端",
	)
	smoke_tts_backend = gr.Dropdown(
	choices=[("edge-tts（真实语音）", "edge-tts"), ("mock（调试预览）", "mock")],
	value="edge-tts",
	label="烟测语音后端",
	)
	smoke_tts_voice = gr.Dropdown(
	choices=[(voice, voice) for voice in AVAILABLE_EDGE_TTS_VOICES],
	value=DEFAULT_EDGE_TTS_VOICE,
	label="烟测音色",
	)
	stage1_smoke_button = gr.Button("执行端到端烟测")
	stage1_smoke_output = gr.Markdown(value=_initial_stage1_smoke_placeholder(benchmark_csv_path))

	run_turn_button.click(
	process_turn,
	inputs=[upload_audio_input, manual_text, asr_backend, tts_backend, tts_voice, rule_table],
	outputs=[recognized_text, partial_text, matched_keyword, matched_rule, reply_text, tts_status, reply_audio, run_summary],
	)
	live_audio_input.start_recording(
	start_live_microphone_stream,
	outputs=[auto_reply_timer, live_stream_state, recognized_text, partial_text, matched_keyword, matched_rule, reply_text, tts_status, reply_audio, run_summary],
	queue=False,
	show_progress="hidden",
	)
	stream_event = live_audio_input.stream(
	preview_live_stream,
	inputs=[live_audio_input, live_stream_state, asr_backend, rule_table],
	outputs=[live_stream_state, recognized_text, partial_text, matched_keyword, matched_rule, reply_text, tts_status, reply_audio, run_summary],
	show_progress="hidden",
	trigger_mode="always_last",
	concurrency_limit=1,
	concurrency_id="live-stream-session",
	stream_every=0.75,
	)
	auto_reply_event = auto_reply_timer.tick(
	auto_reply_live_stream,
	inputs=[live_stream_state, tts_backend, tts_voice],
	outputs=[live_stream_state, tts_status, reply_audio, run_summary],
	show_progress="hidden",
	trigger_mode="always_last",
	concurrency_limit=1,
	concurrency_id="live-stream-session",
	)
	live_audio_input.stop_recording(
	stop_live_microphone_stream,
	inputs=[live_stream_state, asr_backend, tts_backend, tts_voice, rule_table],
	outputs=[auto_reply_timer, live_stream_state, recognized_text, partial_text, matched_keyword, matched_rule, reply_text, tts_status, reply_audio, run_summary],
	show_progress="minimal",
	cancels=[stream_event, auto_reply_event],
	)
	live_audio_input.pause_recording(
	stop_live_microphone_stream,
	inputs=[live_stream_state, asr_backend, tts_backend, tts_voice, rule_table],
	outputs=[auto_reply_timer, live_stream_state, recognized_text, partial_text, matched_keyword, matched_rule, reply_text, tts_status, reply_audio, run_summary],
	show_progress="minimal",
	cancels=[stream_event, auto_reply_event],
	)
	apply_button.click(apply_rules, inputs=[rule_table], outputs=[rule_table, rule_status])
	benchmark_button.click(preview_benchmark, inputs=[rule_table], outputs=[benchmark_output])
	stage1_smoke_button.click(
	run_stage1_smoke,
	inputs=[smoke_asr_backend, smoke_tts_backend, smoke_tts_voice, rule_table, script_ids],
	outputs=[stage1_smoke_output],
	)

	return app


	def _format_contract(contract: dict) -> str:
	h003 = contract["h003_mos"]
	notes = contract["evaluation_constraints"]
	return "\n".join(
	[
	"## 固定评测约束",
	f"- H-003 MOS 阈值：{h003['threshold']}/{h003['scale_max']}",
	f"- 最少评审人数：{h003['minimum_raters']}",
	f"- H-004 正式验证阶段：{notes['h004_formal_validation_phase']}",
	f"- H-002 适用范围：{notes['h002_precision_scope']}",
	]
	)


	def _format_rule_status(session_rules) -> str:
	return f"当前会话已加载 {len(session_rules)} 条规则，编辑结果会立即在本次会话生效。"


	def _initial_benchmark_placeholder(benchmark_csv_path: Path) -> str:
	return "\n".join(
	[
	"### 规则基准已就绪",
	f"- 数据集：{benchmark_csv_path}",
	"- 范围：仅内部基准",
	"- 用途：查看规则与文本基准，不代表真实语音识别表现。",
	]
	)


	def _initial_stage1_smoke_placeholder(benchmark_csv_path: Path) -> str:
	return "\n".join(
	[
	"### 端到端烟测已就绪",
	f"- 数据集：{benchmark_csv_path}",
	"- 输入音频来源：edge-tts 根据脚本文本合成",
	"- 用途：检查真实 ASR/TTS 链路是否跑通。",
	]
	)


	def _initial_live_stream_placeholder() -> str:
	return "\n".join(
	[
	"### 实时监听已就绪",
	"- 点击麦克风开始说话，识别文本会持续刷新。",
	f"- 命中规则稳定 {int(AUTO_REPLY_DELAY_SECONDS)} 秒后，会自动播放回复语音。",
	"- 上传音频 / 手工输入兜底仍可用。",
	f"- 说明：{MINIMAL_UI_NOTICE}",
	]
	)


	def _parse_script_ids(raw_value: str \| None) -> list[str]:
	if not raw_value:
	return list(DEFAULT_STAGE1_SMOKE_SCRIPT_IDS)
	return [item.strip() for item in raw_value.replace("\n", ",").split(",") if item.strip()]


	def _format_tts_status(tts_preview, matched: bool, *, finalized: bool) -> str:
	if tts_preview is None:
	return "录音结束，当前未生成回复语音。" if finalized and not matched else "当前未生成回复语音。"

	status = f"已生成回复语音（{tts_preview.latency_ms} ms）"
	if tts_preview.runtime_note:
	status = f"{status}；{tts_preview.runtime_note}"
	return status


	def _format_run_summary(
	*,
	run_id: str,
	input_mode: str,
	asr_backend: str \| None,
	asr_latency_ms: int \| None,
	rule_id: str \| None,
	matched_keyword: str \| None,
	tts_status: str,
	runtime_note: str \| None,
	) -> str:
	latency_text = f"{asr_latency_ms} ms" if asr_latency_ms is not None else "未知"
	input_mode_text = "音频输入" if input_mode == "audio" else "手工文本"
	lines = [
	"### 本轮处理结果",
	f"- 运行 ID：{run_id}",
	f"- 输入方式：{input_mode_text}",
	f"- 识别后端：{asr_backend or '手工文本'}",
	f"- 最新识别延迟：{latency_text}",
	f"- 命中规则：{rule_id or '未命中'}",
	f"- 命中关键词：{matched_keyword or '未命中'}",
	f"- 回复语音状态：{tts_status}",
	f"- 说明：{MINIMAL_UI_NOTICE}",
	]
	if runtime_note:
	lines.append(f"- 运行提示：{runtime_note}")
	return "\n".join(lines)


	def _format_live_stream_status(state, *, asr_result=None, finalized: bool, auto_reply_triggered: bool = False) -> str:
	if finalized:
	status = "录音已结束。"
	if state.current_match_key and (state.auto_reply_key == state.current_match_key or auto_reply_triggered):
	status = "录音已结束，已保留当前自动回复语音。"
	elif state.current_match_key:
	status = "录音已结束，已完成本轮规则匹配。"
	else:
	status = "录音已结束，当前未命中规则。"
	elif state.auto_reply_key == state.current_match_key and state.auto_reply_audio_path:
	status = f"已自动播放规则 {state.matched_rule_id or '未命中'} 的回复语音，继续说话可触发新的规则。"
	elif state.current_match_key:
	status = f"已命中规则 {state.matched_rule_id or '未命中'}，稳定 {int(AUTO_REPLY_DELAY_SECONDS)} 秒后将自动播放回复语音。"
	elif state.transcript:
	status = "实时转写中，当前尚未命中规则。"
	else:
	status = "实时监听中，请开始说话。"

	effective_asr_result = asr_result
	if effective_asr_result is None and state.asr_latency_ms is not None:
	latency_text = f"最新识别延迟 {state.asr_latency_ms} ms。"
	status = f"{status} {latency_text}"
	elif effective_asr_result is not None:
	status = f"{status} 最新识别延迟 {effective_asr_result.latency_ms} ms。"

	runtime_note = effective_asr_result.runtime_note if effective_asr_result is not None else state.runtime_note
	if runtime_note:
	status = f"{status} {runtime_note}"
	return status


	def _format_stream_preview_summary(*, state, error: str \| None = None, finalized: bool = False, auto_reply_triggered: bool = False) -> str:
	if error:
	return "\n".join(
	[
	"### 实时状态",
	f"- 已接收音频片段：{state.chunk_count}",
	f"- 错误：{error}",
	f"- 说明：{MINIMAL_UI_NOTICE}",
	]
	)

	if state.auto_reply_key == state.current_match_key and state.auto_reply_audio_path:
	auto_reply_status = "已自动播放"
	elif state.current_match_key:
	auto_reply_status = f"等待稳定 {int(AUTO_REPLY_DELAY_SECONDS)} 秒"
	else:
	auto_reply_status = "当前未触发"

	if auto_reply_triggered:
	auto_reply_status = "刚刚完成自动播放"
	if finalized and state.current_match_key:
	auto_reply_status = "录音结束，已完成本轮处理"

	latency_text = f"{state.asr_latency_ms} ms" if state.asr_latency_ms is not None else "未知"
	lines = [
	"### 实时状态",
	f"- 已接收音频片段：{state.chunk_count}",
	f"- 当前采样率：{state.sample_rate or '未知'} Hz",
	f"- 识别后端：{state.asr_backend or '尚未开始'}",
	f"- 最新识别延迟：{latency_text}",
	f"- 当前命中规则：{state.matched_rule_id or '未命中'}",
	f"- 当前命中关键词：{state.matched_keyword or '未命中'}",
	f"- 自动播报状态：{auto_reply_status}",
	f"- 说明：{MINIMAL_UI_NOTICE}",
	]
	if state.runtime_note:
	lines.append(f"- 运行提示：{state.runtime_note}")
	return "\n".join(lines)


	def _require_gradio():
	try:
	import gradio as gr
	except ImportError as exc:
	raise RuntimeError("未安装 Gradio，请先执行 `pip install -r requirements.txt`。") from exc
	return gr