import json

from fastapi import FastAPI
from fastapi.testclient import TestClient
import httpx

from open_cortex.runtime.events import RuntimeEvent
from open_cortex.runtime.messages import ChatMessage
from open_cortex.runtime.metrics import RuntimeSnapshot
from open_cortex.ui.app import (
    create_app,
    event_to_payload,
    render_index,
    _backend_mode,
    _stream_events,
    _stream_simulated_events,
    _http_error_text,
    _is_context_overflow,
    _trim_for_context_collapse,
)


def test_render_index_uses_original_product_shell():
    html = render_index()

    assert 'id="app"' in html
    assert 'class="topbar"' in html
    assert "OpenCortex" in html
    assert "llama.cpp · local" in html
    assert "EN / 中" not in html
    assert 'id="lang"' not in html
    assert 'id="clear"' not in html
    assert ">Clear<" not in html
    assert ">Send<" in html
    assert "Send message" not in html
    assert 'class="workspace"' in html
    assert 'id="messages"' in html
    assert 'id="prompt"' in html
    assert 'id="send"' in html
    assert 'id="runtime-event"' in html
    assert 'id="experiment"' in html
    assert 'id="run-experiment"' in html
    assert "Live detected" in html
    assert "Sim · Memory pressure" in html
    assert "Sim · Context collapse" in html
    assert 'id="memory-organ"' in html
    assert 'id="context-used"' in html
    assert 'id="core-state"' in html
    assert "/assets/open_cortex.css?v=" in html
    assert "/assets/open_cortex.js?v=" in html


def test_styles_use_dynamic_viewport_height():
    css = (create_app.__globals__["ASSET_DIR"] / "open_cortex.css").read_text()

    assert "height: 100dvh" in css
    assert "min-height: 100dvh" in css


def test_frontend_exposes_experiment_and_engine_state_handlers():
    js = (create_app.__globals__["ASSET_DIR"] / "open_cortex.js").read_text()

    assert "function renderMarkdown" in js
    assert "message.role === \"assistant\" ? renderMarkdown" in js
    assert "isStreaming" in js
    assert 'message.role === "assistant" && message.isStreaming' in js
    assert "activeAssistant.isStreaming = false" in js
    assert "function runExperiment" in js
    assert "function runMemoryPressureExperiment" in js
    assert "function runSlowDecodeExperiment" in js
    assert "function runContextCollapseExperiment" in js
    assert "collapseHoldUntil" in js
    assert "contextWindowFullPending" in js
    assert "function applyContextFullWarning" in js
    assert "function applyDeferredContextEjection" in js
    assert "CONTEXT WINDOW FULL" in js
    assert "hazard-context-full" in js
    assert "hazard-loop" in js
    assert "runtimeEvent.classList.add(\"hazard\"" in js
    assert "Earlier turns fell outside the active context." in js
    assert "engine-strained" in js
    assert "Engine recovering" in js


def test_danger_states_have_decisive_visual_treatment():
    css = (create_app.__globals__["ASSET_DIR"] / "open_cortex.css").read_text()

    assert ".app.hazard-context-full .runtime-event" in css
    assert ".app.hazard-loop .runtime-event" in css
    assert ".runtime-event.hazard" in css
    assert ".runtime-event.critical" in css
    assert "context-full-shock" in css
    assert "loop-warning-scan" in css
    assert "danger-atmosphere" in css


def test_context_collapse_styles_pop_out_oldest_message():
    css = (create_app.__globals__["ASSET_DIR"] / "open_cortex.css").read_text()

    assert "context-pop-out" in css
    assert "@keyframes context-pop-out" in css


def test_create_app_serves_index():
    app = create_app()
    client = TestClient(app)

    response = client.get("/")

    assert isinstance(app, FastAPI)
    assert response.status_code == 200
    assert "OpenCortex" in response.text
    assert 'id="send"' in response.text


def test_space_defaults_to_simulated_backend(monkeypatch):
    monkeypatch.delenv("OPEN_CORTEX_BACKEND", raising=False)
    monkeypatch.setenv("SPACE_ID", "build-small-hackathon/open-cortex")

    assert _backend_mode() == "simulated"


def test_explicit_backend_overrides_space_default(monkeypatch):
    monkeypatch.setenv("OPEN_CORTEX_BACKEND", "llama_cpp")
    monkeypatch.setenv("SPACE_ID", "build-small-hackathon/open-cortex")

    assert _backend_mode() == "llama_cpp"


def test_stream_events_can_run_without_llama_cpp_in_simulated_mode(monkeypatch):
    monkeypatch.setenv("OPEN_CORTEX_BACKEND", "simulated")
    monkeypatch.setenv("OPEN_CORTEX_SIMULATOR_DELAY_SECONDS", "0")

    messages = [ChatMessage(role="user", content="What is OpenCortex?")]

    payloads = [json.loads(line) for line in _stream_events(messages)]

    assert payloads[0]["kind"] == "request_started"
    assert payloads[1]["kind"] == "first_token"
    assert payloads[-1]["kind"] == "request_completed"
    assert "OpenCortex" in "".join(payload["text_delta"] for payload in payloads)
    assert payloads[-1]["context_size"] == 2048
    assert payloads[-1]["decode_tps"] > 0


def test_simulated_story_can_surface_thought_loop(monkeypatch):
    monkeypatch.setenv("OPEN_CORTEX_SIMULATOR_DELAY_SECONDS", "0")

    messages = [ChatMessage(role="user", content="Write a long repeating story.")]

    payloads = [
        event_to_payload(event)
        for event in _stream_simulated_events(messages)
    ]

    assert any(payload["repetition_detected"] for payload in payloads)


def test_event_to_payload_includes_live_loop_state():
    event = RuntimeEvent(
        kind="token",
        text_delta="银河系",
        ttft_ms=None,
        snapshot=RuntimeSnapshot(
            prompt_tps=88.8,
            decode_tps=21.5,
            requests_processing=1,
            requests_deferred=0,
            active_slots=1,
            slot_context_tokens=(512,),
            slot_context_size=2048,
        ),
        generated_tokens=66,
        elapsed_ms=2100.0,
        live_tps=19.7,
        repetition_detected=True,
        context_tokens=578,
        context_size=2048,
        working_memory_percent=28.2,
    )

    payload = event_to_payload(event)

    assert payload["kind"] == "token"
    assert payload["text_delta"] == "银河系"
    assert payload["generated_tokens"] == 66
    assert payload["live_tps"] == 19.7
    assert payload["repetition_detected"] is True
    assert payload["context_tokens"] == 578
    assert payload["context_size"] == 2048
    assert payload["working_memory_percent"] == 28.2
    assert payload["snapshot"]["slot_context_tokens"] == [512]
    assert payload["snapshot"]["slot_context_size"] == 2048


def test_stream_events_emits_context_collapse_and_retries_trimmed_history(monkeypatch):
    calls: list[list[ChatMessage]] = []
    request = httpx.Request("POST", "http://127.0.0.1:8080/v1/chat/completions")
    overflow = httpx.HTTPStatusError(
        "400 Bad Request",
        request=request,
        response=httpx.Response(
            400,
            request=request,
            text="request (2059 tokens) exceeds the available context size (2048 tokens)",
        ),
    )

    def fake_stream_chat_events(messages: list[ChatMessage]):
        calls.append(messages)
        if len(calls) == 1:
            yield RuntimeEvent(
                kind="request_started",
                text_delta="",
                ttft_ms=None,
                snapshot=None,
            )
            raise overflow
        yield RuntimeEvent(
            kind="first_token",
            text_delta="继续",
            ttft_ms=12.3,
            snapshot=None,
            context_tokens=128,
            context_size=2048,
            working_memory_percent=6.2,
        )

    monkeypatch.setattr("open_cortex.ui.app.stream_chat_events", fake_stream_chat_events)
    messages = [
        ChatMessage(role="system", content="You are Cortex."),
        ChatMessage(role="user", content="old question"),
        ChatMessage(role="assistant", content="old long answer"),
        ChatMessage(role="user", content="recent question"),
        ChatMessage(role="assistant", content="recent answer"),
        ChatMessage(role="user", content="继续"),
    ]

    payloads = [json.loads(line) for line in _stream_events(messages)]

    assert [payload["kind"] for payload in payloads] == [
        "request_started",
        "context_collapse",
        "first_token",
    ]
    assert payloads[1]["dropped_messages"] == 2
    assert payloads[1]["retained_messages"] == 4
    assert payloads[1]["context_size"] == 2048
    assert [message.content for message in calls[1]] == [
        "You are Cortex.",
        "recent question",
        "recent answer",
        "继续",
    ]


def test_context_overflow_detection_reads_streaming_error_body():
    request = httpx.Request("POST", "http://127.0.0.1:8080/v1/chat/completions")
    response = httpx.Response(
        400,
        request=request,
        stream=httpx.ByteStream(
            b"request (2059 tokens) exceeds the available context size (2048 tokens)"
        ),
    )
    error = httpx.HTTPStatusError("400 Bad Request", request=request, response=response)

    assert _is_context_overflow(error) is True


def test_http_error_text_handles_closed_stream_without_crashing():
    request = httpx.Request("POST", "http://127.0.0.1:8080/v1/chat/completions")
    response = httpx.Response(
        400,
        request=request,
        stream=httpx.ByteStream(b"closed"),
    )
    response.close()
    error = httpx.HTTPStatusError("400 Bad Request", request=request, response=response)

    assert _http_error_text(error) == ""


def test_trim_for_context_collapse_drops_oversized_assistant_history():
    messages = [
        ChatMessage(role="user", content="write a long story"),
        ChatMessage(role="assistant", content="故事" * 4000),
        ChatMessage(role="user", content="what happened next?"),
        ChatMessage(role="assistant", content="recent answer"),
        ChatMessage(role="user", content="继续"),
    ]

    trimmed, dropped = _trim_for_context_collapse(messages)

    assert dropped == 2
    assert trimmed == [
        ChatMessage(role="user", content="what happened next?"),
        ChatMessage(role="assistant", content="recent answer"),
        ChatMessage(role="user", content="继续"),
    ]


def test_trim_for_context_collapse_keeps_recent_tail_of_oversized_answer():
    messages = [
        ChatMessage(role="user", content="write a long story"),
        ChatMessage(role="assistant", content="A" * 5000 + "recent ending"),
        ChatMessage(role="user", content="1"),
    ]

    trimmed, dropped = _trim_for_context_collapse(messages)

    assert dropped == 1
    assert len(trimmed) == 2
    assert trimmed[0].role == "assistant"
    assert "Earlier content collapsed" in trimmed[0].content
    assert "recent ending" in trimmed[0].content
    assert trimmed[1] == ChatMessage(role="user", content="1")