import os
from datetime import datetime, timezone
from functools import partial
from pathlib import Path

try:
    import spaces  # pyright: ignore[reportMissingImports]
except ImportError:  # pragma: no cover - local fallback when not running on HF Spaces.
    class _SpacesShim:
        @staticmethod
        def GPU(*decorator_args, **decorator_kwargs):
            if decorator_args and callable(decorator_args[0]) and len(decorator_args) == 1 and not decorator_kwargs:
                return decorator_args[0]

            def _decorator(func):
                return func

            return _decorator

    spaces = _SpacesShim()

from benchmarks.harness import (
    DEFAULT_STAGE1_SMOKE_SCRIPT_IDS,
    evaluate_internal_benchmark,
    evaluate_stage1_integration_smoke,
)
from core.adapters import (
    AVAILABLE_EDGE_TTS_VOICES,
    DEFAULT_EDGE_TTS_VOICE,
    EdgeTTSAdapter,
    FasterWhisperAdapter,
    FunASRSenseVoiceAdapter,
    MockASRAdapter,
    MockTTSAdapter,
)
from core.pipeline import (
    AUTO_REPLY_DELAY_SECONDS,
    create_streaming_turn_state,
    maybe_auto_reply_streaming_turn,
    preview_streaming_turn,
    run_controlled_turn,
    run_streaming_turn,
    stop_streaming_turn_state,
)
from utils.config_loader import (
    DEFAULT_BENCHMARK_CSV_PATH,
    DEFAULT_EVALUATION_CONTRACT_PATH,
    DEFAULT_RULES_PATH,
    TABLE_HEADERS,
    load_evaluation_contract,
    load_rule_catalog,
    rules_from_editor_rows,
    rules_to_table_rows,
)
from utils.tracking import LocalJsonlTracker, RunMetadata, make_run_id, resolve_code_version

MINIMAL_UI_NOTICE = "当前为演示版，优先展示实时接话体验。首次加载模型可能稍慢；麦克风不可用时可改用下方兜底输入。"


def _is_zero_gpu_runtime() -> bool:
    return os.getenv("SPACES_ZERO_GPU", "").lower() in {"1", "t", "true"}


def _build_stream_preview_asr_adapters():
    # ZeroGPU forbids CUDA init from the main process, so preview callbacks must
    # stay on CPU when they are not wrapped by @spaces.GPU.
    if _is_zero_gpu_runtime():
        return {
            "funasr": FunASRSenseVoiceAdapter(device_order=("cpu",)),
            "faster-whisper": FasterWhisperAdapter(device_order=(("cpu", "int8"),)),
        }

    return {
        "funasr": FunASRSenseVoiceAdapter(),
        "faster-whisper": FasterWhisperAdapter(),
    }


@spaces.GPU
def _process_turn_for_space(
    audio_input,
    manual_text,
    asr_backend_key,
    tts_backend_key,
    tts_voice,
    rule_rows,
    *,
    tracker,
    manual_asr,
    asr_adapters,
    tts_adapters,
    code_version,
):
    session_rules = rules_from_editor_rows(rule_rows)
    audio_path = Path(audio_input) if audio_input else None
    hypothesis_ids = ["H-001", "H-002", "H-003"] if audio_path else ["H-002", "H-004"]
    route_id = "R-001" if asr_backend_key == "funasr" else "R-002" if asr_backend_key == "faster-whisper" else "R-001"
    run_id = make_run_id(f"stage1-turn-{asr_backend_key or 'manual'}")
    metadata = RunMetadata(
        run_id=run_id,
        experiment_name="stage1-controlled-turn",
        hypothesis_ids=hypothesis_ids,
        baseline_run_id=None,
        route_id=route_id,
        seed=42,
        code_version=code_version,
        data_version=str(audio_path) if audio_path else "ui-manual-text",
        started_at=datetime.now(timezone.utc).isoformat(),
        status="running",
    )

    try:
        outcome = run_controlled_turn(
            run_id=run_id,
            audio_path=audio_path,
            manual_text=manual_text,
            rules=session_rules,
            audio_asr=asr_adapters.get(asr_backend_key),
            manual_asr=manual_asr,
            tts_adapter=tts_adapters.get(tts_backend_key),
            tts_voice=tts_voice,
            tracker=tracker,
            metadata=metadata,
        )
    except Exception as exc:
        return (
            "",
            "",
            "未命中",
            "未命中",
            "",
            f"处理失败：{exc}",
            None,
            _format_run_summary(
                run_id=run_id,
                input_mode="audio" if audio_path else "manual_text",
                asr_backend=asr_backend_key,
                asr_latency_ms=None,
                rule_id=None,
                matched_keyword=None,
                tts_status="失败",
                runtime_note=None,
            ),
        )

    match = outcome.match_result
    tts_preview = outcome.tts_preview
    reply_text_value = match.reply or ("当前未命中规则。" if not match.matched else "")
    tts_status_text = _format_tts_status(tts_preview, match.matched, finalized=True)
    summary = _format_run_summary(
        run_id=outcome.run_id,
        input_mode=outcome.input_mode,
        asr_backend=outcome.asr_result.backend,
        asr_latency_ms=outcome.asr_result.latency_ms,
        rule_id=match.rule_id,
        matched_keyword=match.matched_keyword,
        tts_status=tts_status_text,
        runtime_note=outcome.asr_result.runtime_note,
    )
    return (
        outcome.asr_result.transcript,
        outcome.asr_result.partial_transcript,
        match.matched_keyword or "未命中",
        match.rule_id or "未命中",
        reply_text_value,
        tts_status_text,
        tts_preview.audio_path if tts_preview is not None else None,
        summary,
    )


def _preview_live_microphone_stream_for_space(
    audio_chunk,
    stream_state,
    asr_backend_key,
    rule_rows,
    *,
    asr_adapters,
):
    session_rules = rules_from_editor_rows(rule_rows)

    try:
        updated_state, asr_result, match = preview_streaming_turn(
            state=stream_state,
            audio_chunk=audio_chunk,
            rules=session_rules,
            audio_asr=asr_adapters[asr_backend_key],
        )
    except Exception as exc:
        safe_state = stream_state or create_streaming_turn_state(recording_active=True)
        return (
            safe_state,
            safe_state.transcript,
            safe_state.partial_transcript,
            safe_state.matched_keyword or "未命中",
            safe_state.matched_rule_id or "未命中",
            safe_state.matched_reply_text or "",
            f"实时识别失败：{exc}",
            safe_state.auto_reply_audio_path,
            _format_stream_preview_summary(state=safe_state, error=str(exc)),
        )

    reply_preview = match.reply or ("当前尚未命中规则。" if updated_state.transcript else "")
    return (
        updated_state,
        updated_state.transcript,
        updated_state.partial_transcript,
        match.matched_keyword or "未命中",
        match.rule_id or "未命中",
        reply_preview,
        _format_live_stream_status(updated_state, asr_result=asr_result, finalized=False),
        updated_state.auto_reply_audio_path,
        _format_stream_preview_summary(state=updated_state),
    )


def _auto_reply_live_microphone_stream_for_space(
    stream_state,
    tts_backend_key,
    tts_voice,
    *,
    tts_adapters,
):
    state = stream_state or create_streaming_turn_state()

    try:
        updated_state, tts_preview = maybe_auto_reply_streaming_turn(
            state=state,
            tts_adapter=tts_adapters.get(tts_backend_key),
            tts_voice=tts_voice,
        )
    except Exception as exc:
        return (
            state,
            f"自动播报失败：{exc}",
            state.auto_reply_audio_path,
            _format_stream_preview_summary(state=state, error=str(exc)),
        )

    return (
        updated_state,
        _format_live_stream_status(updated_state, finalized=False, auto_reply_triggered=tts_preview is not None),
        updated_state.auto_reply_audio_path,
        _format_stream_preview_summary(state=updated_state, auto_reply_triggered=tts_preview is not None),
    )


@spaces.GPU
def _finalize_live_microphone_stream_for_space(
    stream_state,
    asr_backend_key,
    tts_backend_key,
    tts_voice,
    rule_rows,
    *,
    tracker,
    manual_asr,
    asr_adapters,
    tts_adapters,
    code_version,
):
    safe_state = stop_streaming_turn_state(stream_state or create_streaming_turn_state())
    session_rules = rules_from_editor_rows(rule_rows)
    run_id = make_run_id(f"stage1-live-stream-{asr_backend_key or 'manual'}")
    metadata = RunMetadata(
        run_id=run_id,
        experiment_name="stage1-live-stream-turn",
        hypothesis_ids=["H-001", "H-002", "H-003"],
        baseline_run_id=None,
        route_id="R-001" if asr_backend_key == "funasr" else "R-002",
        seed=42,
        code_version=code_version,
        data_version="ui-live-microphone-stream",
        started_at=datetime.now(timezone.utc).isoformat(),
        status="running",
    )

    reuse_auto_reply = bool(
        safe_state.current_match_key
        and safe_state.auto_reply_key == safe_state.current_match_key
        and safe_state.auto_reply_audio_path
    )

    try:
        outcome = run_streaming_turn(
            run_id=run_id,
            state=safe_state,
            rules=session_rules,
            audio_asr=asr_adapters[asr_backend_key],
            manual_asr=manual_asr,
            tts_adapter=None if reuse_auto_reply else tts_adapters.get(tts_backend_key),
            tts_voice=tts_voice,
            tracker=tracker,
            metadata=metadata,
        )
    except Exception as exc:
        return (
            create_streaming_turn_state(),
            safe_state.transcript,
            safe_state.partial_transcript,
            safe_state.matched_keyword or "未命中",
            safe_state.matched_rule_id or "未命中",
            safe_state.matched_reply_text or "",
            f"结束录音时处理失败：{exc}",
            safe_state.auto_reply_audio_path,
            _format_stream_preview_summary(state=safe_state, error=str(exc), finalized=True),
        )

    match = outcome.match_result
    tts_preview = outcome.tts_preview
    reply_text_value = safe_state.auto_reply_text or match.reply or ("当前未命中规则。" if not match.matched else "")
    tts_status_text = _format_live_stream_status(
        safe_state,
        asr_result=outcome.asr_result,
        finalized=True,
        auto_reply_triggered=reuse_auto_reply or tts_preview is not None,
    )
    summary = _format_run_summary(
        run_id=outcome.run_id,
        input_mode=outcome.input_mode,
        asr_backend=outcome.asr_result.backend,
        asr_latency_ms=outcome.asr_result.latency_ms,
        rule_id=match.rule_id,
        matched_keyword=match.matched_keyword,
        tts_status=tts_status_text,
        runtime_note=outcome.asr_result.runtime_note,
    )
    return (
        create_streaming_turn_state(),
        outcome.asr_result.transcript,
        outcome.asr_result.partial_transcript,
        match.matched_keyword or "未命中",
        match.rule_id or "未命中",
        reply_text_value,
        tts_status_text,
        safe_state.auto_reply_audio_path if reuse_auto_reply else (tts_preview.audio_path if tts_preview is not None else None),
        summary,
    )


@spaces.GPU
def _run_stage1_smoke_for_space(
    asr_backend_key,
    tts_backend_key,
    tts_voice,
    rule_rows,
    script_ids_text,
    *,
    benchmark_csv_path,
    tracker,
    asr_adapters,
    tts_adapters,
    code_version,
):
    session_rules = rules_from_editor_rows(rule_rows)
    selected_script_ids = _parse_script_ids(script_ids_text)
    run_id = make_run_id(f"stage1-smoke-{asr_backend_key}")
    route_id = "R-001" if asr_backend_key == "funasr" else "R-002"
    metadata = RunMetadata(
        run_id=run_id,
        experiment_name="stage1-synthetic-audio-smoke",
        hypothesis_ids=["H-001", "H-002", "H-003", "H-005"],
        baseline_run_id=None,
        route_id=route_id,
        seed=42,
        code_version=code_version,
        data_version=f"{benchmark_csv_path}#synthetic-audio-smoke",
        started_at=datetime.now(timezone.utc).isoformat(),
        status="running",
    )
    summary = evaluate_stage1_integration_smoke(
        benchmark_csv_path,
        session_rules,
        asr_adapters[asr_backend_key],
        tts_adapters[tts_backend_key],
        tts_adapters[tts_backend_key],
        script_ids=selected_script_ids,
        tracker=tracker,
        metadata=metadata,
        input_voice=tts_voice,
        reply_voice=tts_voice,
    )
    return summary.to_markdown()


def build_app(
    benchmark_csv_path: Path = DEFAULT_BENCHMARK_CSV_PATH,
    rules_path: Path = DEFAULT_RULES_PATH,
    evaluation_contract_path: Path = DEFAULT_EVALUATION_CONTRACT_PATH,
):
    gr = _require_gradio()
    seed_rules = load_rule_catalog(rules_path)
    evaluation_contract = load_evaluation_contract(evaluation_contract_path)
    tracker = LocalJsonlTracker(Path(__file__).resolve().parents[1] / "results" / "tracking")
    manual_asr = MockASRAdapter()
    asr_adapters = {
        "funasr": FunASRSenseVoiceAdapter(),
        "faster-whisper": FasterWhisperAdapter(),
    }
    # Keep preview ASR instances separate so a ZeroGPU live-preview CPU fallback
    # does not pin the finalize/upload paths to CPU as well.
    stream_preview_asr_adapters = _build_stream_preview_asr_adapters()
    tts_adapters = {
        "edge-tts": EdgeTTSAdapter(),
        "mock": MockTTSAdapter(),
    }
    code_version = resolve_code_version(Path(__file__).resolve().parents[1])

    def apply_rules(rule_rows):
        session_rules = rules_from_editor_rows(rule_rows)
        return rules_to_table_rows(session_rules), _format_rule_status(session_rules)

    def start_live_microphone_stream():
        return (
            gr.Timer(value=0.25, active=True),
            create_streaming_turn_state(recording_active=True),
            "",
            "",
            "未命中",
            "未命中",
            "",
            f"实时监听中，命中规则后稳定 {int(AUTO_REPLY_DELAY_SECONDS)} 秒将自动播放回复语音。",
            None,
            _initial_live_stream_placeholder(),
        )

    process_turn = partial(
        _process_turn_for_space,
        tracker=tracker,
        manual_asr=manual_asr,
        asr_adapters=asr_adapters,
        tts_adapters=tts_adapters,
        code_version=code_version,
    )
    preview_live_stream = partial(
        _preview_live_microphone_stream_for_space,
        asr_adapters=stream_preview_asr_adapters,
    )
    auto_reply_live_stream = partial(
        _auto_reply_live_microphone_stream_for_space,
        tts_adapters=tts_adapters,
    )
    finalize_live_stream = partial(
        _finalize_live_microphone_stream_for_space,
        tracker=tracker,
        manual_asr=manual_asr,
        asr_adapters=asr_adapters,
        tts_adapters=tts_adapters,
        code_version=code_version,
    )

    def stop_live_microphone_stream(stream_state, asr_backend_key, tts_backend_key, tts_voice, rule_rows):
        result = finalize_live_stream(stream_state, asr_backend_key, tts_backend_key, tts_voice, rule_rows)
        return (gr.Timer(value=0.25, active=False), *result)

    def preview_benchmark(rule_rows):
        session_rules = rules_from_editor_rows(rule_rows)
        run_id = make_run_id("stage1-rule-preview")
        metadata = RunMetadata(
            run_id=run_id,
            experiment_name="stage1-rule-only-benchmark-preview",
            hypothesis_ids=["H-002"],
            baseline_run_id=None,
            route_id="R-001",
            seed=42,
            code_version=code_version,
            data_version=str(benchmark_csv_path),
            started_at=datetime.now(timezone.utc).isoformat(),
            status="running",
        )
        summary = evaluate_internal_benchmark(
            benchmark_csv_path,
            session_rules,
            tracker=tracker,
            metadata=metadata,
        )
        return summary.to_markdown()

    run_stage1_smoke = partial(
        _run_stage1_smoke_for_space,
        benchmark_csv_path=benchmark_csv_path,
        tracker=tracker,
        asr_adapters=asr_adapters,
        tts_adapters=tts_adapters,
        code_version=code_version,
    )

    with gr.Blocks(title="VoiceDirector 语音场控接话演示") as app:
        live_stream_state = gr.State(value=create_streaming_turn_state())
        auto_reply_timer = gr.Timer(value=0.25, active=False)

        gr.Markdown("# VoiceDirector 语音场控接话演示")
        gr.Markdown("**实时语音接话演示**：连续麦克风转写、自动规则匹配、自动回复语音。")
        gr.Markdown(f"提示：{MINIMAL_UI_NOTICE}")

        with gr.Row():
            with gr.Column(scale=5):
                gr.Markdown(
                    f"### 主路径：麦克风连续流式识别\n点击麦克风开始说话，识别文本会持续刷新；命中规则稳定 {int(AUTO_REPLY_DELAY_SECONDS)} 秒后会自动播放回复语音。"
                )
                live_audio_input = gr.Audio(
                    label="实时麦克风输入（连续流式）",
                    sources=["microphone"],
                    type="numpy",
                    streaming=True,
                )
                asr_backend = gr.Dropdown(
                    choices=[
                        ("FunASR / SenseVoice（主路径）", "funasr"),
                        ("faster-whisper（兜底）", "faster-whisper"),
                    ],
                    value="funasr",
                    label="识别后端",
                )
                tts_backend = gr.Dropdown(
                    choices=[("edge-tts（真实语音）", "edge-tts"), ("mock（调试预览）", "mock")],
                    value="edge-tts",
                    label="回复语音后端",
                )
                tts_voice = gr.Dropdown(
                    choices=[(voice, voice) for voice in AVAILABLE_EDGE_TTS_VOICES],
                    value=DEFAULT_EDGE_TTS_VOICE,
                    label="回复音色",
                )
                with gr.Accordion("上传音频 / 手工输入兜底", open=False):
                    upload_audio_input = gr.Audio(
                        label="上传音频兜底",
                        sources=["upload"],
                        type="filepath",
                        format="wav",
                    )
                    manual_text = gr.Textbox(
                        label="手工文本兜底",
                        placeholder="没有音频时，可直接输入中文文本做规则演示。",
                        lines=3,
                    )
                    run_turn_button = gr.Button("执行上传/手工兜底", variant="secondary")
                recognized_text = gr.Textbox(label="识别文本")
                partial_text = gr.Textbox(label="实时转写预览")
                matched_keyword = gr.Textbox(label="命中关键词")
                matched_rule = gr.Textbox(label="命中规则")
                reply_text = gr.Textbox(label="回复文本", lines=3)
                tts_status = gr.Textbox(label="当前状态")
                reply_audio = gr.Audio(label="回复语音", interactive=False, type="filepath", autoplay=True)
                run_summary = gr.Markdown(value=_initial_live_stream_placeholder())
            with gr.Column(scale=7):
                rule_table = gr.Dataframe(
                    headers=TABLE_HEADERS,
                    datatype=["str", "str", "str", "str"],
                    row_count=(len(seed_rules), "fixed"),
                    column_count=(len(TABLE_HEADERS), "fixed"),
                    value=rules_to_table_rows(seed_rules),
                    label="规则编辑表（当前会话）",
                    interactive=True,
                )
                apply_button = gr.Button("应用规则编辑")
                rule_status = gr.Textbox(label="规则编辑状态", value=_format_rule_status(seed_rules))
                gr.Markdown(_format_contract(evaluation_contract))

        with gr.Accordion("基准与烟测面板", open=False):
            gr.Markdown(
                "下面的规则预览保留了内部基准可见性。端到端烟测会使用 edge-tts 生成输入音频来打通真实 ASR/TTS 代码路径。"
            )
            benchmark_button = gr.Button("预览规则基准")
            benchmark_output = gr.Markdown(value=_initial_benchmark_placeholder(benchmark_csv_path))
            script_ids = gr.Textbox(
                label="烟测脚本 ID",
                value=", ".join(DEFAULT_STAGE1_SMOKE_SCRIPT_IDS),
                lines=2,
            )
            smoke_asr_backend = gr.Dropdown(
                choices=[
                    ("FunASR / SenseVoice（主路径）", "funasr"),
                    ("faster-whisper（兜底）", "faster-whisper"),
                ],
                value="funasr",
                label="烟测识别后端",
            )
            smoke_tts_backend = gr.Dropdown(
                choices=[("edge-tts（真实语音）", "edge-tts"), ("mock（调试预览）", "mock")],
                value="edge-tts",
                label="烟测语音后端",
            )
            smoke_tts_voice = gr.Dropdown(
                choices=[(voice, voice) for voice in AVAILABLE_EDGE_TTS_VOICES],
                value=DEFAULT_EDGE_TTS_VOICE,
                label="烟测音色",
            )
            stage1_smoke_button = gr.Button("执行端到端烟测")
            stage1_smoke_output = gr.Markdown(value=_initial_stage1_smoke_placeholder(benchmark_csv_path))

        run_turn_button.click(
            process_turn,
            inputs=[upload_audio_input, manual_text, asr_backend, tts_backend, tts_voice, rule_table],
            outputs=[recognized_text, partial_text, matched_keyword, matched_rule, reply_text, tts_status, reply_audio, run_summary],
        )
        live_audio_input.start_recording(
            start_live_microphone_stream,
            outputs=[auto_reply_timer, live_stream_state, recognized_text, partial_text, matched_keyword, matched_rule, reply_text, tts_status, reply_audio, run_summary],
            queue=False,
            show_progress="hidden",
        )
        stream_event = live_audio_input.stream(
            preview_live_stream,
            inputs=[live_audio_input, live_stream_state, asr_backend, rule_table],
            outputs=[live_stream_state, recognized_text, partial_text, matched_keyword, matched_rule, reply_text, tts_status, reply_audio, run_summary],
            show_progress="hidden",
            trigger_mode="always_last",
            concurrency_limit=1,
            concurrency_id="live-stream-session",
            stream_every=0.75,
        )
        auto_reply_event = auto_reply_timer.tick(
            auto_reply_live_stream,
            inputs=[live_stream_state, tts_backend, tts_voice],
            outputs=[live_stream_state, tts_status, reply_audio, run_summary],
            show_progress="hidden",
            trigger_mode="always_last",
            concurrency_limit=1,
            concurrency_id="live-stream-session",
        )
        live_audio_input.stop_recording(
            stop_live_microphone_stream,
            inputs=[live_stream_state, asr_backend, tts_backend, tts_voice, rule_table],
            outputs=[auto_reply_timer, live_stream_state, recognized_text, partial_text, matched_keyword, matched_rule, reply_text, tts_status, reply_audio, run_summary],
            show_progress="minimal",
            cancels=[stream_event, auto_reply_event],
        )
        live_audio_input.pause_recording(
            stop_live_microphone_stream,
            inputs=[live_stream_state, asr_backend, tts_backend, tts_voice, rule_table],
            outputs=[auto_reply_timer, live_stream_state, recognized_text, partial_text, matched_keyword, matched_rule, reply_text, tts_status, reply_audio, run_summary],
            show_progress="minimal",
            cancels=[stream_event, auto_reply_event],
        )
        apply_button.click(apply_rules, inputs=[rule_table], outputs=[rule_table, rule_status])
        benchmark_button.click(preview_benchmark, inputs=[rule_table], outputs=[benchmark_output])
        stage1_smoke_button.click(
            run_stage1_smoke,
            inputs=[smoke_asr_backend, smoke_tts_backend, smoke_tts_voice, rule_table, script_ids],
            outputs=[stage1_smoke_output],
        )

    return app


def _format_contract(contract: dict) -> str:
    h003 = contract["h003_mos"]
    notes = contract["evaluation_constraints"]
    return "\n".join(
        [
            "## 固定评测约束",
            f"- H-003 MOS 阈值：{h003['threshold']}/{h003['scale_max']}",
            f"- 最少评审人数：{h003['minimum_raters']}",
            f"- H-004 正式验证阶段：{notes['h004_formal_validation_phase']}",
            f"- H-002 适用范围：{notes['h002_precision_scope']}",
        ]
    )


def _format_rule_status(session_rules) -> str:
    return f"当前会话已加载 {len(session_rules)} 条规则，编辑结果会立即在本次会话生效。"


def _initial_benchmark_placeholder(benchmark_csv_path: Path) -> str:
    return "\n".join(
        [
            "### 规则基准已就绪",
            f"- 数据集：{benchmark_csv_path}",
            "- 范围：仅内部基准",
            "- 用途：查看规则与文本基准，不代表真实语音识别表现。",
        ]
    )


def _initial_stage1_smoke_placeholder(benchmark_csv_path: Path) -> str:
    return "\n".join(
        [
            "### 端到端烟测已就绪",
            f"- 数据集：{benchmark_csv_path}",
            "- 输入音频来源：edge-tts 根据脚本文本合成",
            "- 用途：检查真实 ASR/TTS 链路是否跑通。",
        ]
    )


def _initial_live_stream_placeholder() -> str:
    return "\n".join(
        [
            "### 实时监听已就绪",
            "- 点击麦克风开始说话，识别文本会持续刷新。",
            f"- 命中规则稳定 {int(AUTO_REPLY_DELAY_SECONDS)} 秒后，会自动播放回复语音。",
            "- 上传音频 / 手工输入兜底仍可用。",
            f"- 说明：{MINIMAL_UI_NOTICE}",
        ]
    )


def _parse_script_ids(raw_value: str | None) -> list[str]:
    if not raw_value:
        return list(DEFAULT_STAGE1_SMOKE_SCRIPT_IDS)
    return [item.strip() for item in raw_value.replace("\n", ",").split(",") if item.strip()]


def _format_tts_status(tts_preview, matched: bool, *, finalized: bool) -> str:
    if tts_preview is None:
        return "录音结束，当前未生成回复语音。" if finalized and not matched else "当前未生成回复语音。"

    status = f"已生成回复语音（{tts_preview.latency_ms} ms）"
    if tts_preview.runtime_note:
        status = f"{status}；{tts_preview.runtime_note}"
    return status


def _format_run_summary(
    *,
    run_id: str,
    input_mode: str,
    asr_backend: str | None,
    asr_latency_ms: int | None,
    rule_id: str | None,
    matched_keyword: str | None,
    tts_status: str,
    runtime_note: str | None,
) -> str:
    latency_text = f"{asr_latency_ms} ms" if asr_latency_ms is not None else "未知"
    input_mode_text = "音频输入" if input_mode == "audio" else "手工文本"
    lines = [
        "### 本轮处理结果",
        f"- 运行 ID：{run_id}",
        f"- 输入方式：{input_mode_text}",
        f"- 识别后端：{asr_backend or '手工文本'}",
        f"- 最新识别延迟：{latency_text}",
        f"- 命中规则：{rule_id or '未命中'}",
        f"- 命中关键词：{matched_keyword or '未命中'}",
        f"- 回复语音状态：{tts_status}",
        f"- 说明：{MINIMAL_UI_NOTICE}",
    ]
    if runtime_note:
        lines.append(f"- 运行提示：{runtime_note}")
    return "\n".join(lines)


def _format_live_stream_status(state, *, asr_result=None, finalized: bool, auto_reply_triggered: bool = False) -> str:
    if finalized:
        status = "录音已结束。"
        if state.current_match_key and (state.auto_reply_key == state.current_match_key or auto_reply_triggered):
            status = "录音已结束，已保留当前自动回复语音。"
        elif state.current_match_key:
            status = "录音已结束，已完成本轮规则匹配。"
        else:
            status = "录音已结束，当前未命中规则。"
    elif state.auto_reply_key == state.current_match_key and state.auto_reply_audio_path:
        status = f"已自动播放规则 {state.matched_rule_id or '未命中'} 的回复语音，继续说话可触发新的规则。"
    elif state.current_match_key:
        status = f"已命中规则 {state.matched_rule_id or '未命中'}，稳定 {int(AUTO_REPLY_DELAY_SECONDS)} 秒后将自动播放回复语音。"
    elif state.transcript:
        status = "实时转写中，当前尚未命中规则。"
    else:
        status = "实时监听中，请开始说话。"

    effective_asr_result = asr_result
    if effective_asr_result is None and state.asr_latency_ms is not None:
        latency_text = f"最新识别延迟 {state.asr_latency_ms} ms。"
        status = f"{status} {latency_text}"
    elif effective_asr_result is not None:
        status = f"{status} 最新识别延迟 {effective_asr_result.latency_ms} ms。"

    runtime_note = effective_asr_result.runtime_note if effective_asr_result is not None else state.runtime_note
    if runtime_note:
        status = f"{status} {runtime_note}"
    return status


def _format_stream_preview_summary(*, state, error: str | None = None, finalized: bool = False, auto_reply_triggered: bool = False) -> str:
    if error:
        return "\n".join(
            [
                "### 实时状态",
                f"- 已接收音频片段：{state.chunk_count}",
                f"- 错误：{error}",
                f"- 说明：{MINIMAL_UI_NOTICE}",
            ]
        )

    if state.auto_reply_key == state.current_match_key and state.auto_reply_audio_path:
        auto_reply_status = "已自动播放"
    elif state.current_match_key:
        auto_reply_status = f"等待稳定 {int(AUTO_REPLY_DELAY_SECONDS)} 秒"
    else:
        auto_reply_status = "当前未触发"

    if auto_reply_triggered:
        auto_reply_status = "刚刚完成自动播放"
    if finalized and state.current_match_key:
        auto_reply_status = "录音结束，已完成本轮处理"

    latency_text = f"{state.asr_latency_ms} ms" if state.asr_latency_ms is not None else "未知"
    lines = [
        "### 实时状态",
        f"- 已接收音频片段：{state.chunk_count}",
        f"- 当前采样率：{state.sample_rate or '未知'} Hz",
        f"- 识别后端：{state.asr_backend or '尚未开始'}",
        f"- 最新识别延迟：{latency_text}",
        f"- 当前命中规则：{state.matched_rule_id or '未命中'}",
        f"- 当前命中关键词：{state.matched_keyword or '未命中'}",
        f"- 自动播报状态：{auto_reply_status}",
        f"- 说明：{MINIMAL_UI_NOTICE}",
    ]
    if state.runtime_note:
        lines.append(f"- 运行提示：{state.runtime_note}")
    return "\n".join(lines)


def _require_gradio():
    try:
        import gradio as gr
    except ImportError as exc:
        raise RuntimeError("未安装 Gradio，请先执行 `pip install -r requirements.txt`。") from exc
    return gr