Spaces:

Chris4K
/

autoscan

Running

App Files Files Community

autoscan / tests /test_security_fixes.py

Chris4K

Initial commit v5.0.0.

5248e3b verified 28 days ago

raw

history blame contribute delete

9.8 kB

	"""Tests covering the security fixes made in this session.

	Each test class is named after the fix it verifies:

	- TestB104ConfigHttpsValidator : 0.0.0.0 no longer exempt from HTTPS check
	- TestBuildCommentBodyRefactor : list+join rewrite preserves all output
	- TestDiscoverDropdownRefactor : list+join rewrite produces correct HTML
	- TestRequestsTimeouts : all HTTP calls in hf.py carry explicit timeouts
	"""
	import importlib
	import json
	import re
	from unittest.mock import AsyncMock, MagicMock, patch

	import pytest


	# ── B104: AI_EXPLAINER_BASE_URL validator ────────────────────────────────────

	class TestB104ConfigHttpsValidator:
	"""AI_EXPLAINER_BASE_URL must reject http:// for non-localhost hosts.

	Previously 0.0.0.0 was in the localhost allowlist, which bandit (B104)
	flagged as 'binding to all interfaces'. The fix removes 0.0.0.0 from the
	allowlist so that http://0.0.0.0:... is treated as a remote URL and
	rejected.
	"""

	def _make_settings(self, url: str):
	"""Import Settings fresh so environment doesn't pollute the test."""
	import importlib
	import sentinel.config as cfg_mod
	importlib.reload(cfg_mod)
	from pydantic_settings import BaseSettings
	return cfg_mod.Settings(
	AI_EXPLAINER_BASE_URL=url,
	DATABASE_URL="sqlite+aiosqlite:////tmp/test.db",
	DATA_DIR="/tmp",
	)

	def test_localhost_http_allowed(self):
	s = self._make_settings("http://localhost:11434")
	assert s.AI_EXPLAINER_BASE_URL == "http://localhost:11434"

	def test_loopback_ip_http_allowed(self):
	s = self._make_settings("http://127.0.0.1:11434")
	assert s.AI_EXPLAINER_BASE_URL == "http://127.0.0.1:11434"

	def test_ipv6_loopback_http_allowed(self):
	s = self._make_settings("http://[::1]:11434")
	assert s.AI_EXPLAINER_BASE_URL == "http://[::1]:11434"

	def test_https_remote_allowed(self):
	s = self._make_settings("https://ollama.example.com")
	assert s.AI_EXPLAINER_BASE_URL == "https://ollama.example.com"

	def test_http_remote_rejected(self):
	from pydantic import ValidationError
	with pytest.raises((ValidationError, ValueError)):
	self._make_settings("http://ollama.example.com")

	def test_bind_all_0000_now_rejected(self):
	"""0.0.0.0 is NOT a loopback address — it must be rejected for http://."""
	from pydantic import ValidationError
	with pytest.raises((ValidationError, ValueError)):
	self._make_settings("http://0.0.0.0:11434")


	# ── _build_comment_body: list+join refactor correctness ─────────────────────

	class TestBuildCommentBodyRefactor:
	"""Verify the list+join rewrite produces the same output as the old body+=.

	The performance fix changed string concatenation inside the per-finding
	loop to list accumulation + ''.join() at the end. All existing content
	invariants must still hold.
	"""

	def _finding(self, **kwargs) -> dict:
	defaults = dict(
	file="app.py", line=10, confidence="likely",
	message="Shell injection found", remediation="Use subprocess.run",
	)
	defaults.update(kwargs)
	return defaults

	def _body(self, findings):
	from core.hf import _build_comment_body
	return _build_comment_body(findings)

	def test_returns_string(self):
	assert isinstance(self._body([self._finding()]), str)

	def test_header_present(self):
	assert "security scan findings" in self._body([self._finding()])

	def test_details_block_present(self):
	body = self._body([self._finding()])
	assert "<details>" in body
	assert "</details>" in body

	def test_file_path_in_output(self):
	assert "src/auth.py" in self._body([self._finding(file="src/auth.py")])

	def test_message_in_output(self):
	assert "Hardcoded secret" in self._body([self._finding(message="Hardcoded secret")])

	def test_line_number_formatted(self):
	assert "L99" in self._body([self._finding(line=99)])

	def test_remediation_included(self):
	body = self._body([self._finding(remediation="Upgrade to >=2.0")])
	assert "Fix:" in body
	assert "Upgrade to" in body

	def test_empty_remediation_not_rendered(self):
	assert "Fix:" not in self._body([self._finding(remediation="")])

	def test_confidence_uppercased(self):
	assert "CONFIRMED" in self._body([self._finding(confidence="confirmed")])

	def test_finding_count_in_summary(self):
	f = [self._finding(), self._finding(line=20, file="b.py")]
	body = self._body(f)
	assert "2" in body

	def test_groups_by_file(self):
	f = [
	self._finding(file="a.py", line=1),
	self._finding(file="a.py", line=5),
	self._finding(file="b.py", line=3),
	]
	body = self._body(f)
	assert "a.py" in body
	assert "b.py" in body

	def test_large_finding_list_is_efficient(self):
	"""100 findings across 10 files must complete quickly (list+join is O(n))."""
	import time
	findings = [
	self._finding(file=f"file{i % 10}.py", line=i, message=f"msg {i}")
	for i in range(100)
	]
	t0 = time.monotonic()
	body = self._body(findings)
	elapsed = time.monotonic() - t0
	assert elapsed < 1.0, f"_build_comment_body took {elapsed:.3f}s for 100 findings"
	assert "file0.py" in body


	# ── discover.py quicksearch dropdown: list+join refactor correctness ─────────

	class TestDiscoverDropdownRefactor:
	"""Verify the quicksearch dropdown HTML builder uses list+join correctly.

	The quicksearch_dropdown route built items_html via += in a loop (O(n²)).
	The fix extracts the logic into _build_dropdown_html() which collects items
	in a list and joins at the end. Content must be correct.
	"""

	def _html(self, spaces=None, users=None) -> str:
	from sentinel.routes.discover import _build_dropdown_html
	return _build_dropdown_html(
	spaces=spaces or [],
	users=users or [],
	)

	def test_space_id_appears_in_html(self):
	html = self._html(spaces=[{"id": "owner/cool-space"}])
	assert "owner/cool-space" in html

	def test_user_appears_in_html(self):
	html = self._html(users=[{"user": "alice"}])
	assert "alice" in html

	def test_empty_user_entry_skipped(self):
	html = self._html(users=[{"user": ""}, {"user": None}, {"user": "bob"}])
	assert html.count("<li") == 1
	assert "bob" in html

	def test_user_fullname_fallback(self):
	html = self._html(users=[{"fullname": "Charlie"}])
	assert "Charlie" in html

	def test_empty_inputs_return_empty_string(self):
	assert self._html() == ""

	def test_space_li_has_type_label(self):
	html = self._html(spaces=[{"id": "a/b"}])
	assert "space" in html

	def test_user_li_has_type_label(self):
	html = self._html(users=[{"user": "alice"}])
	assert "user" in html

	def test_multiple_spaces_all_included(self):
	spaces = [{"id": f"owner/space{i}"} for i in range(4)]
	html = self._html(spaces=spaces)
	assert html.count("<li") == 4

	def test_html_special_chars_escaped(self):
	"""Space IDs with HTML special chars must be escaped."""
	html = self._html(spaces=[{"id": "<script>alert(1)</script>"}])
	assert "<script>" not in html

	def test_returns_string(self):
	assert isinstance(self._html(spaces=[{"id": "a/b"}]), str)


	# ── HTTP requests in hf.py carry explicit timeouts ───────────────────────────

	class TestRequestsTimeouts:
	"""All requests.get/post calls in core/hf.py must pass an explicit timeout.

	These are false positives for the semgrep requests-no-timeout rule because
	timeout kwargs are present, but we keep regression tests so the timeouts
	are never accidentally removed.
	"""

	def _mock_resp(self, status=200, data=None):
	m = MagicMock()
	m.status_code = status
	m.json.return_value = data or []
	m.text = ""
	return m

	def test_list_user_spaces_passes_timeout(self):
	from core.hf import list_user_spaces
	with patch("core.hf.requests.get", return_value=self._mock_resp()) as mock_get:
	list_user_spaces("testuser")
	kwargs = mock_get.call_args[1]
	assert "timeout" in kwargs, "list_user_spaces must pass timeout= to requests.get"
	assert isinstance(kwargs["timeout"], (int, float))

	def test_comment_on_space_passes_timeout(self):
	from core.hf import comment_on_space
	finding = dict(
	file="f.py", line=1, severity="HIGH", confidence="confirmed",
	message="test", remediation="fix",
	)
	with patch("core.hf.requests.post", return_value=self._mock_resp(201)) as mock_post:
	comment_on_space(
	"https://huggingface.co/spaces/ns/repo",
	"token123",
	[finding],
	)
	kwargs = mock_post.call_args[1]
	assert "timeout" in kwargs, "comment_on_space must pass timeout= to requests.post"
	assert isinstance(kwargs["timeout"], (int, float))