autoscan / tests /test_security_fixes.py
Chris4K's picture
Initial commit v5.0.0.
5248e3b verified
"""Tests covering the security fixes made in this session.
Each test class is named after the fix it verifies:
- TestB104ConfigHttpsValidator : 0.0.0.0 no longer exempt from HTTPS check
- TestBuildCommentBodyRefactor : list+join rewrite preserves all output
- TestDiscoverDropdownRefactor : list+join rewrite produces correct HTML
- TestRequestsTimeouts : all HTTP calls in hf.py carry explicit timeouts
"""
import importlib
import json
import re
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
# ── B104: AI_EXPLAINER_BASE_URL validator ────────────────────────────────────
class TestB104ConfigHttpsValidator:
"""AI_EXPLAINER_BASE_URL must reject http:// for non-localhost hosts.
Previously 0.0.0.0 was in the localhost allowlist, which bandit (B104)
flagged as 'binding to all interfaces'. The fix removes 0.0.0.0 from the
allowlist so that http://0.0.0.0:... is treated as a remote URL and
rejected.
"""
def _make_settings(self, url: str):
"""Import Settings fresh so environment doesn't pollute the test."""
import importlib
import sentinel.config as cfg_mod
importlib.reload(cfg_mod)
from pydantic_settings import BaseSettings
return cfg_mod.Settings(
AI_EXPLAINER_BASE_URL=url,
DATABASE_URL="sqlite+aiosqlite:////tmp/test.db",
DATA_DIR="/tmp",
)
def test_localhost_http_allowed(self):
s = self._make_settings("http://localhost:11434")
assert s.AI_EXPLAINER_BASE_URL == "http://localhost:11434"
def test_loopback_ip_http_allowed(self):
s = self._make_settings("http://127.0.0.1:11434")
assert s.AI_EXPLAINER_BASE_URL == "http://127.0.0.1:11434"
def test_ipv6_loopback_http_allowed(self):
s = self._make_settings("http://[::1]:11434")
assert s.AI_EXPLAINER_BASE_URL == "http://[::1]:11434"
def test_https_remote_allowed(self):
s = self._make_settings("https://ollama.example.com")
assert s.AI_EXPLAINER_BASE_URL == "https://ollama.example.com"
def test_http_remote_rejected(self):
from pydantic import ValidationError
with pytest.raises((ValidationError, ValueError)):
self._make_settings("http://ollama.example.com")
def test_bind_all_0000_now_rejected(self):
"""0.0.0.0 is NOT a loopback address β€” it must be rejected for http://."""
from pydantic import ValidationError
with pytest.raises((ValidationError, ValueError)):
self._make_settings("http://0.0.0.0:11434")
# ── _build_comment_body: list+join refactor correctness ─────────────────────
class TestBuildCommentBodyRefactor:
"""Verify the list+join rewrite produces the same output as the old body+=.
The performance fix changed string concatenation inside the per-finding
loop to list accumulation + ''.join() at the end. All existing content
invariants must still hold.
"""
def _finding(self, **kwargs) -> dict:
defaults = dict(
file="app.py", line=10, confidence="likely",
message="Shell injection found", remediation="Use subprocess.run",
)
defaults.update(kwargs)
return defaults
def _body(self, findings):
from core.hf import _build_comment_body
return _build_comment_body(findings)
def test_returns_string(self):
assert isinstance(self._body([self._finding()]), str)
def test_header_present(self):
assert "security scan findings" in self._body([self._finding()])
def test_details_block_present(self):
body = self._body([self._finding()])
assert "<details>" in body
assert "</details>" in body
def test_file_path_in_output(self):
assert "src/auth.py" in self._body([self._finding(file="src/auth.py")])
def test_message_in_output(self):
assert "Hardcoded secret" in self._body([self._finding(message="Hardcoded secret")])
def test_line_number_formatted(self):
assert "L99" in self._body([self._finding(line=99)])
def test_remediation_included(self):
body = self._body([self._finding(remediation="Upgrade to >=2.0")])
assert "Fix:" in body
assert "Upgrade to" in body
def test_empty_remediation_not_rendered(self):
assert "Fix:" not in self._body([self._finding(remediation="")])
def test_confidence_uppercased(self):
assert "CONFIRMED" in self._body([self._finding(confidence="confirmed")])
def test_finding_count_in_summary(self):
f = [self._finding(), self._finding(line=20, file="b.py")]
body = self._body(f)
assert "2" in body
def test_groups_by_file(self):
f = [
self._finding(file="a.py", line=1),
self._finding(file="a.py", line=5),
self._finding(file="b.py", line=3),
]
body = self._body(f)
assert "a.py" in body
assert "b.py" in body
def test_large_finding_list_is_efficient(self):
"""100 findings across 10 files must complete quickly (list+join is O(n))."""
import time
findings = [
self._finding(file=f"file{i % 10}.py", line=i, message=f"msg {i}")
for i in range(100)
]
t0 = time.monotonic()
body = self._body(findings)
elapsed = time.monotonic() - t0
assert elapsed < 1.0, f"_build_comment_body took {elapsed:.3f}s for 100 findings"
assert "file0.py" in body
# ── discover.py quicksearch dropdown: list+join refactor correctness ─────────
class TestDiscoverDropdownRefactor:
"""Verify the quicksearch dropdown HTML builder uses list+join correctly.
The quicksearch_dropdown route built items_html via += in a loop (O(nΒ²)).
The fix extracts the logic into _build_dropdown_html() which collects items
in a list and joins at the end. Content must be correct.
"""
def _html(self, spaces=None, users=None) -> str:
from sentinel.routes.discover import _build_dropdown_html
return _build_dropdown_html(
spaces=spaces or [],
users=users or [],
)
def test_space_id_appears_in_html(self):
html = self._html(spaces=[{"id": "owner/cool-space"}])
assert "owner/cool-space" in html
def test_user_appears_in_html(self):
html = self._html(users=[{"user": "alice"}])
assert "alice" in html
def test_empty_user_entry_skipped(self):
html = self._html(users=[{"user": ""}, {"user": None}, {"user": "bob"}])
assert html.count("<li") == 1
assert "bob" in html
def test_user_fullname_fallback(self):
html = self._html(users=[{"fullname": "Charlie"}])
assert "Charlie" in html
def test_empty_inputs_return_empty_string(self):
assert self._html() == ""
def test_space_li_has_type_label(self):
html = self._html(spaces=[{"id": "a/b"}])
assert "space" in html
def test_user_li_has_type_label(self):
html = self._html(users=[{"user": "alice"}])
assert "user" in html
def test_multiple_spaces_all_included(self):
spaces = [{"id": f"owner/space{i}"} for i in range(4)]
html = self._html(spaces=spaces)
assert html.count("<li") == 4
def test_html_special_chars_escaped(self):
"""Space IDs with HTML special chars must be escaped."""
html = self._html(spaces=[{"id": "<script>alert(1)</script>"}])
assert "<script>" not in html
def test_returns_string(self):
assert isinstance(self._html(spaces=[{"id": "a/b"}]), str)
# ── HTTP requests in hf.py carry explicit timeouts ───────────────────────────
class TestRequestsTimeouts:
"""All requests.get/post calls in core/hf.py must pass an explicit timeout.
These are false positives for the semgrep requests-no-timeout rule because
timeout kwargs are present, but we keep regression tests so the timeouts
are never accidentally removed.
"""
def _mock_resp(self, status=200, data=None):
m = MagicMock()
m.status_code = status
m.json.return_value = data or []
m.text = ""
return m
def test_list_user_spaces_passes_timeout(self):
from core.hf import list_user_spaces
with patch("core.hf.requests.get", return_value=self._mock_resp()) as mock_get:
list_user_spaces("testuser")
kwargs = mock_get.call_args[1]
assert "timeout" in kwargs, "list_user_spaces must pass timeout= to requests.get"
assert isinstance(kwargs["timeout"], (int, float))
def test_comment_on_space_passes_timeout(self):
from core.hf import comment_on_space
finding = dict(
file="f.py", line=1, severity="HIGH", confidence="confirmed",
message="test", remediation="fix",
)
with patch("core.hf.requests.post", return_value=self._mock_resp(201)) as mock_post:
comment_on_space(
"https://huggingface.co/spaces/ns/repo",
"token123",
[finding],
)
kwargs = mock_post.call_args[1]
assert "timeout" in kwargs, "comment_on_space must pass timeout= to requests.post"
assert isinstance(kwargs["timeout"], (int, float))