import json from fastapi import FastAPI from fastapi.testclient import TestClient import httpx from open_cortex.runtime.events import RuntimeEvent from open_cortex.runtime.messages import ChatMessage from open_cortex.runtime.metrics import RuntimeSnapshot from open_cortex.ui.app import ( create_app, event_to_payload, render_index, _backend_mode, _stream_events, _stream_simulated_events, _http_error_text, _is_context_overflow, _trim_for_context_collapse, ) def test_render_index_uses_original_product_shell(): html = render_index() assert 'id="app"' in html assert 'class="topbar"' in html assert "OpenCortex" in html assert "llama.cpp · local" in html assert "EN / 中" not in html assert 'id="lang"' not in html assert 'id="clear"' not in html assert ">Clear<" not in html assert ">Send<" in html assert "Send message" not in html assert 'class="workspace"' in html assert 'id="messages"' in html assert 'id="prompt"' in html assert 'id="send"' in html assert 'id="runtime-event"' in html assert 'id="experiment"' in html assert 'id="run-experiment"' in html assert "Live detected" in html assert "Sim · Memory pressure" in html assert "Sim · Context collapse" in html assert 'id="memory-organ"' in html assert 'id="context-used"' in html assert 'id="core-state"' in html assert "/assets/open_cortex.css?v=" in html assert "/assets/open_cortex.js?v=" in html def test_styles_use_dynamic_viewport_height(): css = (create_app.__globals__["ASSET_DIR"] / "open_cortex.css").read_text() assert "height: 100dvh" in css assert "min-height: 100dvh" in css def test_frontend_exposes_experiment_and_engine_state_handlers(): js = (create_app.__globals__["ASSET_DIR"] / "open_cortex.js").read_text() assert "function renderMarkdown" in js assert "message.role === \"assistant\" ? renderMarkdown" in js assert "isStreaming" in js assert 'message.role === "assistant" && message.isStreaming' in js assert "activeAssistant.isStreaming = false" in js assert "function runExperiment" in js assert "function runMemoryPressureExperiment" in js assert "function runSlowDecodeExperiment" in js assert "function runContextCollapseExperiment" in js assert "collapseHoldUntil" in js assert "contextWindowFullPending" in js assert "function applyContextFullWarning" in js assert "function applyDeferredContextEjection" in js assert "CONTEXT WINDOW FULL" in js assert "hazard-context-full" in js assert "hazard-loop" in js assert "runtimeEvent.classList.add(\"hazard\"" in js assert "Earlier turns fell outside the active context." in js assert "engine-strained" in js assert "Engine recovering" in js def test_danger_states_have_decisive_visual_treatment(): css = (create_app.__globals__["ASSET_DIR"] / "open_cortex.css").read_text() assert ".app.hazard-context-full .runtime-event" in css assert ".app.hazard-loop .runtime-event" in css assert ".runtime-event.hazard" in css assert ".runtime-event.critical" in css assert "context-full-shock" in css assert "loop-warning-scan" in css assert "danger-atmosphere" in css def test_context_collapse_styles_pop_out_oldest_message(): css = (create_app.__globals__["ASSET_DIR"] / "open_cortex.css").read_text() assert "context-pop-out" in css assert "@keyframes context-pop-out" in css def test_create_app_serves_index(): app = create_app() client = TestClient(app) response = client.get("/") assert isinstance(app, FastAPI) assert response.status_code == 200 assert "OpenCortex" in response.text assert 'id="send"' in response.text def test_space_defaults_to_simulated_backend(monkeypatch): monkeypatch.delenv("OPEN_CORTEX_BACKEND", raising=False) monkeypatch.setenv("SPACE_ID", "build-small-hackathon/open-cortex") assert _backend_mode() == "simulated" def test_explicit_backend_overrides_space_default(monkeypatch): monkeypatch.setenv("OPEN_CORTEX_BACKEND", "llama_cpp") monkeypatch.setenv("SPACE_ID", "build-small-hackathon/open-cortex") assert _backend_mode() == "llama_cpp" def test_stream_events_can_run_without_llama_cpp_in_simulated_mode(monkeypatch): monkeypatch.setenv("OPEN_CORTEX_BACKEND", "simulated") monkeypatch.setenv("OPEN_CORTEX_SIMULATOR_DELAY_SECONDS", "0") messages = [ChatMessage(role="user", content="What is OpenCortex?")] payloads = [json.loads(line) for line in _stream_events(messages)] assert payloads[0]["kind"] == "request_started" assert payloads[1]["kind"] == "first_token" assert payloads[-1]["kind"] == "request_completed" assert "OpenCortex" in "".join(payload["text_delta"] for payload in payloads) assert payloads[-1]["context_size"] == 2048 assert payloads[-1]["decode_tps"] > 0 def test_simulated_story_can_surface_thought_loop(monkeypatch): monkeypatch.setenv("OPEN_CORTEX_SIMULATOR_DELAY_SECONDS", "0") messages = [ChatMessage(role="user", content="Write a long repeating story.")] payloads = [ event_to_payload(event) for event in _stream_simulated_events(messages) ] assert any(payload["repetition_detected"] for payload in payloads) def test_event_to_payload_includes_live_loop_state(): event = RuntimeEvent( kind="token", text_delta="银河系", ttft_ms=None, snapshot=RuntimeSnapshot( prompt_tps=88.8, decode_tps=21.5, requests_processing=1, requests_deferred=0, active_slots=1, slot_context_tokens=(512,), slot_context_size=2048, ), generated_tokens=66, elapsed_ms=2100.0, live_tps=19.7, repetition_detected=True, context_tokens=578, context_size=2048, working_memory_percent=28.2, ) payload = event_to_payload(event) assert payload["kind"] == "token" assert payload["text_delta"] == "银河系" assert payload["generated_tokens"] == 66 assert payload["live_tps"] == 19.7 assert payload["repetition_detected"] is True assert payload["context_tokens"] == 578 assert payload["context_size"] == 2048 assert payload["working_memory_percent"] == 28.2 assert payload["snapshot"]["slot_context_tokens"] == [512] assert payload["snapshot"]["slot_context_size"] == 2048 def test_stream_events_emits_context_collapse_and_retries_trimmed_history(monkeypatch): calls: list[list[ChatMessage]] = [] request = httpx.Request("POST", "http://127.0.0.1:8080/v1/chat/completions") overflow = httpx.HTTPStatusError( "400 Bad Request", request=request, response=httpx.Response( 400, request=request, text="request (2059 tokens) exceeds the available context size (2048 tokens)", ), ) def fake_stream_chat_events(messages: list[ChatMessage]): calls.append(messages) if len(calls) == 1: yield RuntimeEvent( kind="request_started", text_delta="", ttft_ms=None, snapshot=None, ) raise overflow yield RuntimeEvent( kind="first_token", text_delta="继续", ttft_ms=12.3, snapshot=None, context_tokens=128, context_size=2048, working_memory_percent=6.2, ) monkeypatch.setattr("open_cortex.ui.app.stream_chat_events", fake_stream_chat_events) messages = [ ChatMessage(role="system", content="You are Cortex."), ChatMessage(role="user", content="old question"), ChatMessage(role="assistant", content="old long answer"), ChatMessage(role="user", content="recent question"), ChatMessage(role="assistant", content="recent answer"), ChatMessage(role="user", content="继续"), ] payloads = [json.loads(line) for line in _stream_events(messages)] assert [payload["kind"] for payload in payloads] == [ "request_started", "context_collapse", "first_token", ] assert payloads[1]["dropped_messages"] == 2 assert payloads[1]["retained_messages"] == 4 assert payloads[1]["context_size"] == 2048 assert [message.content for message in calls[1]] == [ "You are Cortex.", "recent question", "recent answer", "继续", ] def test_context_overflow_detection_reads_streaming_error_body(): request = httpx.Request("POST", "http://127.0.0.1:8080/v1/chat/completions") response = httpx.Response( 400, request=request, stream=httpx.ByteStream( b"request (2059 tokens) exceeds the available context size (2048 tokens)" ), ) error = httpx.HTTPStatusError("400 Bad Request", request=request, response=response) assert _is_context_overflow(error) is True def test_http_error_text_handles_closed_stream_without_crashing(): request = httpx.Request("POST", "http://127.0.0.1:8080/v1/chat/completions") response = httpx.Response( 400, request=request, stream=httpx.ByteStream(b"closed"), ) response.close() error = httpx.HTTPStatusError("400 Bad Request", request=request, response=response) assert _http_error_text(error) == "" def test_trim_for_context_collapse_drops_oversized_assistant_history(): messages = [ ChatMessage(role="user", content="write a long story"), ChatMessage(role="assistant", content="故事" * 4000), ChatMessage(role="user", content="what happened next?"), ChatMessage(role="assistant", content="recent answer"), ChatMessage(role="user", content="继续"), ] trimmed, dropped = _trim_for_context_collapse(messages) assert dropped == 2 assert trimmed == [ ChatMessage(role="user", content="what happened next?"), ChatMessage(role="assistant", content="recent answer"), ChatMessage(role="user", content="继续"), ] def test_trim_for_context_collapse_keeps_recent_tail_of_oversized_answer(): messages = [ ChatMessage(role="user", content="write a long story"), ChatMessage(role="assistant", content="A" * 5000 + "recent ending"), ChatMessage(role="user", content="1"), ] trimmed, dropped = _trim_for_context_collapse(messages) assert dropped == 1 assert len(trimmed) == 2 assert trimmed[0].role == "assistant" assert "Earlier content collapsed" in trimmed[0].content assert "recent ending" in trimmed[0].content assert trimmed[1] == ChatMessage(role="user", content="1")