import pytest import httpx from open_cortex.runtime.client import ( _detect_repetition, _raise_for_status_with_body, _working_memory_percent, ) def test_detect_repetition_marks_repeated_generation_loop(): repeated = ( "艾丽和卡斯一起探索了整个宇宙,他们发现了一个惊人的事实:" "整个宇宙的中心并不是银河系,而是整个宇宙的中心。" ) assert _detect_repetition(repeated * 3) is True def test_detect_repetition_allows_short_normal_text(): text = "KV Cache stores attention keys and values so decode can reuse prior context." assert _detect_repetition(text) is False def test_working_memory_percent_is_context_derived_and_capped(): assert _working_memory_percent(512, 2048) == 25.0 assert _working_memory_percent(4096, 2048) == 100.0 assert _working_memory_percent(None, 2048) is None def test_raise_for_status_with_body_preserves_streaming_error_body(): request = httpx.Request("POST", "http://127.0.0.1:8080/v1/chat/completions") response = httpx.Response( 400, request=request, stream=httpx.ByteStream( b"request (2059 tokens) exceeds the available context size (2048 tokens)" ), ) with pytest.raises(httpx.HTTPStatusError) as exc_info: _raise_for_status_with_body(response) assert "exceeds the available context size" in exc_info.value.response.text