| """Tests for local provider stream read timeout auto-detection. |
| |
| When a local LLM provider is detected (Ollama, llama.cpp, vLLM, etc.), |
| the httpx stream read timeout should be automatically increased from the |
| default 60s to HERMES_API_TIMEOUT (1800s) to avoid premature connection |
| kills during long prefill phases. |
| """ |
|
|
| import os |
| import pytest |
| from unittest.mock import patch |
|
|
| from agent.model_metadata import is_local_endpoint |
|
|
|
|
| class TestLocalStreamReadTimeout: |
| """Verify stream read timeout auto-detection logic.""" |
|
|
| @pytest.mark.parametrize("base_url", [ |
| "http://localhost:11434", |
| "http://127.0.0.1:8080", |
| "http://0.0.0.0:5000", |
| "http://192.168.1.100:8000", |
| "http://10.0.0.5:1234", |
| "http://host.docker.internal:11434", |
| "http://host.containers.internal:11434", |
| "http://host.lima.internal:11434", |
| ]) |
| def test_local_endpoint_bumps_read_timeout(self, base_url): |
| """Local endpoint + default timeout -> bumps to base_timeout.""" |
| with patch.dict(os.environ, {}, clear=False): |
| os.environ.pop("HERMES_STREAM_READ_TIMEOUT", None) |
| _base_timeout = float(os.getenv("HERMES_API_TIMEOUT", 1800.0)) |
| _stream_read_timeout = float(os.getenv("HERMES_STREAM_READ_TIMEOUT", 120.0)) |
| if _stream_read_timeout == 120.0 and base_url and is_local_endpoint(base_url): |
| _stream_read_timeout = _base_timeout |
| assert _stream_read_timeout == 1800.0 |
|
|
| def test_user_override_respected_for_local(self): |
| """User sets HERMES_STREAM_READ_TIMEOUT -> keep their value even for local.""" |
| with patch.dict(os.environ, {"HERMES_STREAM_READ_TIMEOUT": "300"}, clear=False): |
| _base_timeout = float(os.getenv("HERMES_API_TIMEOUT", 1800.0)) |
| _stream_read_timeout = float(os.getenv("HERMES_STREAM_READ_TIMEOUT", 120.0)) |
| base_url = "http://localhost:11434" |
| if _stream_read_timeout == 120.0 and base_url and is_local_endpoint(base_url): |
| _stream_read_timeout = _base_timeout |
| assert _stream_read_timeout == 300.0 |
|
|
| @pytest.mark.parametrize("base_url", [ |
| "https://api.openai.com", |
| "https://openrouter.ai/api", |
| "https://api.anthropic.com", |
| ]) |
| def test_remote_endpoint_keeps_default(self, base_url): |
| """Remote endpoint -> keep 120s default.""" |
| with patch.dict(os.environ, {}, clear=False): |
| os.environ.pop("HERMES_STREAM_READ_TIMEOUT", None) |
| _base_timeout = float(os.getenv("HERMES_API_TIMEOUT", 1800.0)) |
| _stream_read_timeout = float(os.getenv("HERMES_STREAM_READ_TIMEOUT", 120.0)) |
| if _stream_read_timeout == 120.0 and base_url and is_local_endpoint(base_url): |
| _stream_read_timeout = _base_timeout |
| assert _stream_read_timeout == 120.0 |
|
|
| def test_empty_base_url_keeps_default(self): |
| """No base_url set -> keep 120s default.""" |
| with patch.dict(os.environ, {}, clear=False): |
| os.environ.pop("HERMES_STREAM_READ_TIMEOUT", None) |
| _base_timeout = float(os.getenv("HERMES_API_TIMEOUT", 1800.0)) |
| _stream_read_timeout = float(os.getenv("HERMES_STREAM_READ_TIMEOUT", 120.0)) |
| base_url = "" |
| if _stream_read_timeout == 120.0 and base_url and is_local_endpoint(base_url): |
| _stream_read_timeout = _base_timeout |
| assert _stream_read_timeout == 120.0 |
|
|
|
|
| class TestIsLocalEndpoint: |
| """Direct unit tests for is_local_endpoint.""" |
|
|
| @pytest.mark.parametrize("url", [ |
| "http://localhost:11434", |
| "http://127.0.0.1:8080", |
| "http://0.0.0.0:5000", |
| "http://[::1]:11434", |
| "http://192.168.1.100:8000", |
| "http://10.0.0.5:1234", |
| "http://172.17.0.1:11434", |
| ]) |
| def test_classic_local_addresses(self, url): |
| assert is_local_endpoint(url) is True |
|
|
| @pytest.mark.parametrize("url", [ |
| "http://host.docker.internal:11434", |
| "http://host.docker.internal:8080/v1", |
| "http://gateway.docker.internal:11434", |
| "http://host.containers.internal:11434", |
| "http://host.lima.internal:11434", |
| ]) |
| def test_container_dns_names(self, url): |
| assert is_local_endpoint(url) is True |
|
|
| @pytest.mark.parametrize("url", [ |
| "https://api.openai.com", |
| "https://openrouter.ai/api", |
| "https://api.anthropic.com", |
| "https://evil.docker.internal.example.com", |
| ]) |
| def test_remote_endpoints(self, url): |
| assert is_local_endpoint(url) is False |
|
|
| @pytest.mark.parametrize("url", [ |
| "http://100.64.0.0:11434", |
| "http://100.64.0.1:11434/v1", |
| "http://100.77.243.5:11434", |
| "https://100.100.100.100:443", |
| "https://100.127.255.254:443", |
| "http://100.127.255.255:11434", |
| ]) |
| def test_tailscale_cgnat_is_local(self, url): |
| """Tailscale 100.64.0.0/10 should be treated as local for timeout bumps.""" |
| assert is_local_endpoint(url) is True |
|
|
| @pytest.mark.parametrize("url", [ |
| "http://100.63.255.255:11434", |
| "http://100.128.0.1:11434", |
| "http://100.200.0.1:11434", |
| "http://99.64.0.1:11434", |
| ]) |
| def test_near_but_not_cgnat_is_remote(self, url): |
| """Hosts adjacent to but outside 100.64.0.0/10 must not match.""" |
| assert is_local_endpoint(url) is False |
|
|