Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files- HF/test/__init__.py +0 -0
- HF/test/test_llm.py +197 -0
- HF/test/test_regex.py +222 -0
- HF/test/test_routing.py +179 -0
HF/test/__init__.py
ADDED
|
File without changes
|
HF/test/test_llm.py
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
tests/test_llm.py β Tests for Tier 3: LLM Classifier
|
| 3 |
+
|
| 4 |
+
Tests verify:
|
| 5 |
+
1. Cache hit avoids API call
|
| 6 |
+
2. Retry logic on transient failure
|
| 7 |
+
3. Returns "Unclassified" on all error paths (never crashes pipeline)
|
| 8 |
+
4. Response normalization handles edge cases
|
| 9 |
+
5. No HF_TOKEN β returns Unclassified gracefully
|
| 10 |
+
|
| 11 |
+
Run:
|
| 12 |
+
pytest tests/test_llm.py -v
|
| 13 |
+
"""
|
| 14 |
+
import sys, os
|
| 15 |
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
| 16 |
+
|
| 17 |
+
import pytest
|
| 18 |
+
from unittest.mock import patch, MagicMock, call
|
| 19 |
+
import processor_llm as llm_module
|
| 20 |
+
from processor_llm import (
|
| 21 |
+
classify_with_llm, get_cache_stats,
|
| 22 |
+
_cache_key, _cache_get, _cache_set, _normalize,
|
| 23 |
+
_RESPONSE_CACHE,
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
# ββ Setup / teardown ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 28 |
+
@pytest.fixture(autouse=True)
|
| 29 |
+
def clear_cache():
|
| 30 |
+
"""Clear LLM cache before each test."""
|
| 31 |
+
_RESPONSE_CACHE.clear()
|
| 32 |
+
yield
|
| 33 |
+
_RESPONSE_CACHE.clear()
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
# ββ Normalize βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 37 |
+
class TestNormalize:
|
| 38 |
+
def test_exact_match(self):
|
| 39 |
+
assert _normalize("Workflow Error") == "Workflow Error"
|
| 40 |
+
|
| 41 |
+
def test_case_insensitive(self):
|
| 42 |
+
assert _normalize("workflow error") == "Workflow Error"
|
| 43 |
+
|
| 44 |
+
def test_deprecation_warning(self):
|
| 45 |
+
assert _normalize("Deprecation Warning") == "Deprecation Warning"
|
| 46 |
+
|
| 47 |
+
def test_random_text_returns_unclassified(self):
|
| 48 |
+
assert _normalize("I don't know") == "Unclassified"
|
| 49 |
+
|
| 50 |
+
def test_empty_string_returns_unclassified(self):
|
| 51 |
+
assert _normalize("") == "Unclassified"
|
| 52 |
+
|
| 53 |
+
def test_partial_match(self):
|
| 54 |
+
# Model might return "Category: Workflow Error" β still should match
|
| 55 |
+
assert _normalize("Category: Workflow Error") == "Workflow Error"
|
| 56 |
+
|
| 57 |
+
def test_strips_quotes(self):
|
| 58 |
+
assert _normalize('"Deprecation Warning"') == "Deprecation Warning"
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
# ββ Cache βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 62 |
+
class TestCache:
|
| 63 |
+
def test_cache_miss_returns_none(self):
|
| 64 |
+
assert _cache_get("totally new log message xyz") is None
|
| 65 |
+
|
| 66 |
+
def test_cache_set_and_get(self):
|
| 67 |
+
log = "test log message for caching"
|
| 68 |
+
_cache_set(log, "Workflow Error")
|
| 69 |
+
assert _cache_get(log) == "Workflow Error"
|
| 70 |
+
|
| 71 |
+
def test_cache_key_is_deterministic(self):
|
| 72 |
+
log = "same log every time"
|
| 73 |
+
assert _cache_key(log) == _cache_key(log)
|
| 74 |
+
|
| 75 |
+
def test_different_logs_different_keys(self):
|
| 76 |
+
k1 = _cache_key("log message A")
|
| 77 |
+
k2 = _cache_key("log message B")
|
| 78 |
+
assert k1 != k2
|
| 79 |
+
|
| 80 |
+
def test_cache_hit_avoids_api_call(self):
|
| 81 |
+
log = "Case escalation for ticket 7324 failed."
|
| 82 |
+
_cache_set(log, "Workflow Error") # Pre-populate cache
|
| 83 |
+
|
| 84 |
+
with patch("processor_llm.InferenceClient") as mock_client:
|
| 85 |
+
result = classify_with_llm(log)
|
| 86 |
+
|
| 87 |
+
mock_client.assert_not_called()
|
| 88 |
+
assert result == "Workflow Error"
|
| 89 |
+
|
| 90 |
+
def test_cache_stats_size(self):
|
| 91 |
+
_cache_set("log1", "Workflow Error")
|
| 92 |
+
_cache_set("log2", "Deprecation Warning")
|
| 93 |
+
stats = get_cache_stats()
|
| 94 |
+
assert stats["size"] == 2
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
# ββ No token ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 98 |
+
class TestNoToken:
|
| 99 |
+
def test_no_hf_token_returns_unclassified(self, monkeypatch):
|
| 100 |
+
monkeypatch.setattr(llm_module, "HF_TOKEN", None)
|
| 101 |
+
result = classify_with_llm("Case escalation for ticket 1234.")
|
| 102 |
+
assert result == "Unclassified"
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
# ββ Retry logic βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 106 |
+
class TestRetry:
|
| 107 |
+
def _make_mock_client(self, responses):
|
| 108 |
+
"""responses: list of (Exception | str) β raised or returned in order."""
|
| 109 |
+
call_count = [0]
|
| 110 |
+
|
| 111 |
+
def mock_create(**kwargs):
|
| 112 |
+
idx = call_count[0]
|
| 113 |
+
call_count[0] += 1
|
| 114 |
+
if isinstance(responses[idx], Exception):
|
| 115 |
+
raise responses[idx]
|
| 116 |
+
mock_resp = MagicMock()
|
| 117 |
+
mock_resp.choices[0].message.content = responses[idx]
|
| 118 |
+
return mock_resp
|
| 119 |
+
|
| 120 |
+
mock_client = MagicMock()
|
| 121 |
+
mock_client.chat.completions.create.side_effect = mock_create
|
| 122 |
+
return mock_client
|
| 123 |
+
|
| 124 |
+
def test_success_on_first_try(self, monkeypatch):
|
| 125 |
+
monkeypatch.setattr(llm_module, "HF_TOKEN", "fake-token")
|
| 126 |
+
monkeypatch.setattr(llm_module, "RETRY_DELAY_SEC", 0) # no sleep
|
| 127 |
+
|
| 128 |
+
client = self._make_mock_client(["Workflow Error"])
|
| 129 |
+
|
| 130 |
+
with patch("processor_llm.InferenceClient", return_value=client):
|
| 131 |
+
result = classify_with_llm("Case escalation for ticket 7324.")
|
| 132 |
+
|
| 133 |
+
assert result == "Workflow Error"
|
| 134 |
+
assert client.chat.completions.create.call_count == 1
|
| 135 |
+
|
| 136 |
+
def test_retry_on_transient_failure(self, monkeypatch):
|
| 137 |
+
monkeypatch.setattr(llm_module, "HF_TOKEN", "fake-token")
|
| 138 |
+
monkeypatch.setattr(llm_module, "RETRY_DELAY_SEC", 0)
|
| 139 |
+
monkeypatch.setattr(llm_module, "MAX_RETRIES", 2)
|
| 140 |
+
|
| 141 |
+
# Fail once, succeed on second attempt
|
| 142 |
+
client = self._make_mock_client([
|
| 143 |
+
ConnectionError("timeout"),
|
| 144 |
+
"Deprecation Warning",
|
| 145 |
+
])
|
| 146 |
+
|
| 147 |
+
with patch("processor_llm.InferenceClient", return_value=client), \
|
| 148 |
+
patch("processor_llm.time.sleep"): # skip actual sleep
|
| 149 |
+
result = classify_with_llm("Module will be retired in v4.")
|
| 150 |
+
|
| 151 |
+
assert result == "Deprecation Warning"
|
| 152 |
+
assert client.chat.completions.create.call_count == 2
|
| 153 |
+
|
| 154 |
+
def test_all_retries_exhausted_returns_unclassified(self, monkeypatch):
|
| 155 |
+
monkeypatch.setattr(llm_module, "HF_TOKEN", "fake-token")
|
| 156 |
+
monkeypatch.setattr(llm_module, "RETRY_DELAY_SEC", 0)
|
| 157 |
+
monkeypatch.setattr(llm_module, "MAX_RETRIES", 2)
|
| 158 |
+
|
| 159 |
+
client = self._make_mock_client([
|
| 160 |
+
ConnectionError("timeout"),
|
| 161 |
+
ConnectionError("timeout"),
|
| 162 |
+
ConnectionError("timeout"),
|
| 163 |
+
])
|
| 164 |
+
|
| 165 |
+
with patch("processor_llm.InferenceClient", return_value=client), \
|
| 166 |
+
patch("processor_llm.time.sleep"):
|
| 167 |
+
result = classify_with_llm("Something that keeps failing.")
|
| 168 |
+
|
| 169 |
+
assert result == "Unclassified"
|
| 170 |
+
assert client.chat.completions.create.call_count == 3 # 1 initial + 2 retries
|
| 171 |
+
|
| 172 |
+
def test_successful_result_gets_cached(self, monkeypatch):
|
| 173 |
+
monkeypatch.setattr(llm_module, "HF_TOKEN", "fake-token")
|
| 174 |
+
monkeypatch.setattr(llm_module, "RETRY_DELAY_SEC", 0)
|
| 175 |
+
|
| 176 |
+
client = self._make_mock_client(["Workflow Error"])
|
| 177 |
+
|
| 178 |
+
log = "Case escalation for unique ticket 99999."
|
| 179 |
+
with patch("processor_llm.InferenceClient", return_value=client):
|
| 180 |
+
result = classify_with_llm(log)
|
| 181 |
+
|
| 182 |
+
assert result == "Workflow Error"
|
| 183 |
+
# Should now be in cache
|
| 184 |
+
assert _cache_get(log) == "Workflow Error"
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
# ββ Pipeline safety βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 188 |
+
class TestPipelineSafety:
|
| 189 |
+
def test_classify_never_raises(self, monkeypatch):
|
| 190 |
+
"""LLM failures must NEVER propagate as exceptions to the pipeline."""
|
| 191 |
+
monkeypatch.setattr(llm_module, "HF_TOKEN", "fake-token")
|
| 192 |
+
monkeypatch.setattr(llm_module, "RETRY_DELAY_SEC", 0)
|
| 193 |
+
|
| 194 |
+
with patch("processor_llm.InferenceClient", side_effect=RuntimeError("catastrophic")):
|
| 195 |
+
result = classify_with_llm("Any log message here.")
|
| 196 |
+
|
| 197 |
+
assert result == "Unclassified" # Never raises, always returns string
|
HF/test/test_regex.py
ADDED
|
@@ -0,0 +1,222 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
tests/test_regex.py β Unit tests for Tier 1: Regex Classifier
|
| 3 |
+
|
| 4 |
+
Tests verify:
|
| 5 |
+
1. Every pattern category has positive matches
|
| 6 |
+
2. No false positives on known non-matching logs
|
| 7 |
+
3. Pattern order doesn't cause mis-labeling
|
| 8 |
+
4. Coverage improvement (should be > 35% on balanced test set)
|
| 9 |
+
|
| 10 |
+
Run:
|
| 11 |
+
pytest tests/ -v
|
| 12 |
+
pytest tests/test_regex.py -v --tb=short
|
| 13 |
+
"""
|
| 14 |
+
import sys, os
|
| 15 |
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
| 16 |
+
|
| 17 |
+
import pytest
|
| 18 |
+
from processor_regex import classify_with_regex, get_regex_coverage
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
# ββ Positive cases: must match and return correct label βββββββββββββββββββββββ
|
| 22 |
+
class TestHTTPStatus:
|
| 23 |
+
def test_get_request(self):
|
| 24 |
+
assert classify_with_regex("GET /api/v2/resource HTTP/1.1 status: 200 len: 100 time: 0.1") == "HTTP Status"
|
| 25 |
+
|
| 26 |
+
def test_post_request(self):
|
| 27 |
+
assert classify_with_regex("POST /v1/users HTTP/1.1 status: 201 len: 42 time: 0.05") == "HTTP Status"
|
| 28 |
+
|
| 29 |
+
def test_delete_request(self):
|
| 30 |
+
assert classify_with_regex("DELETE /v1/users/123 HTTP/1.1 status: 204 len: 0 time: 0.02") == "HTTP Status"
|
| 31 |
+
|
| 32 |
+
def test_nova_style(self):
|
| 33 |
+
assert classify_with_regex(
|
| 34 |
+
"nova.osapi_compute.wsgi.server GET /v2/servers/detail HTTP/1.1 status: 404 len: 1583 time: 0.19"
|
| 35 |
+
) == "HTTP Status"
|
| 36 |
+
|
| 37 |
+
def test_status_code_only(self):
|
| 38 |
+
assert classify_with_regex("API call /invoices returned HTTP 500 in 2.1s") == "HTTP Status"
|
| 39 |
+
|
| 40 |
+
def test_patch_request(self):
|
| 41 |
+
assert classify_with_regex("PATCH /v1/config HTTP/1.1 status: 200 len: 55 time: 0.04") == "HTTP Status"
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
class TestSecurityAlert:
|
| 45 |
+
def test_login_failures(self):
|
| 46 |
+
assert classify_with_regex("Multiple login failures occurred on user 6454 account") == "Security Alert"
|
| 47 |
+
|
| 48 |
+
def test_ip_blocked(self):
|
| 49 |
+
assert classify_with_regex("IP 192.168.133.114 blocked due to potential attack") == "Security Alert"
|
| 50 |
+
|
| 51 |
+
def test_brute_force(self):
|
| 52 |
+
assert classify_with_regex("Alert: brute force login attempt from 10.0.0.5 detected") == "Security Alert"
|
| 53 |
+
|
| 54 |
+
def test_admin_escalation(self):
|
| 55 |
+
assert classify_with_regex("Admin access escalation detected for user 9429") == "Security Alert"
|
| 56 |
+
|
| 57 |
+
def test_privilege_elevation(self):
|
| 58 |
+
assert classify_with_regex("Privilege elevation detected for user Admin99") == "Security Alert"
|
| 59 |
+
|
| 60 |
+
def test_ddos(self):
|
| 61 |
+
assert classify_with_regex("Potential DDoS attack from 1.2.3.4 detected") == "Security Alert"
|
| 62 |
+
|
| 63 |
+
def test_suspicious_activity(self):
|
| 64 |
+
assert classify_with_regex("Suspicious login activity detected from 203.0.113.1") == "Security Alert"
|
| 65 |
+
|
| 66 |
+
def test_unauthorized_access(self):
|
| 67 |
+
assert classify_with_regex("Unauthorized access to data was attempted by User123") == "Security Alert"
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
class TestUserAction:
|
| 71 |
+
def test_login(self):
|
| 72 |
+
assert classify_with_regex("User User12345 logged in.") == "User Action"
|
| 73 |
+
|
| 74 |
+
def test_logout(self):
|
| 75 |
+
assert classify_with_regex("User User99 logged out.") == "User Action"
|
| 76 |
+
|
| 77 |
+
def test_account_created(self):
|
| 78 |
+
assert classify_with_regex("Account with ID 456 created by Admin.") == "User Action"
|
| 79 |
+
|
| 80 |
+
def test_password_changed(self):
|
| 81 |
+
assert classify_with_regex("User User42 changed password successfully.") == "User Action"
|
| 82 |
+
|
| 83 |
+
def test_new_user_registered(self):
|
| 84 |
+
assert classify_with_regex("New user User9999 registered with email u@e.com.") == "User Action"
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
class TestSystemNotification:
|
| 88 |
+
def test_backup_completed(self):
|
| 89 |
+
assert classify_with_regex("Backup completed successfully.") == "System Notification"
|
| 90 |
+
|
| 91 |
+
def test_backup_started(self):
|
| 92 |
+
assert classify_with_regex("Backup started at 2024-01-14 03:00:00.") == "System Notification"
|
| 93 |
+
|
| 94 |
+
def test_system_updated(self):
|
| 95 |
+
assert classify_with_regex("System updated to version 4.2.1.") == "System Notification"
|
| 96 |
+
|
| 97 |
+
def test_disk_cleanup(self):
|
| 98 |
+
assert classify_with_regex("Disk cleanup completed successfully.") == "System Notification"
|
| 99 |
+
|
| 100 |
+
def test_service_restarted(self):
|
| 101 |
+
assert classify_with_regex("Service payments restarted successfully.") == "System Notification"
|
| 102 |
+
|
| 103 |
+
def test_cpu_usage(self):
|
| 104 |
+
assert classify_with_regex("CPU usage at 98% for the last 10 minutes on node-7") == "System Notification"
|
| 105 |
+
|
| 106 |
+
def test_health_check_passed(self):
|
| 107 |
+
assert classify_with_regex("Health check passed for service auth-api") == "System Notification"
|
| 108 |
+
|
| 109 |
+
def test_cron_executed(self):
|
| 110 |
+
assert classify_with_regex("Cron job cleanup-tokens executed successfully.") == "System Notification"
|
| 111 |
+
|
| 112 |
+
def test_certificate_renewed(self):
|
| 113 |
+
assert classify_with_regex("Certificate renewed successfully for domain api.example.com") == "System Notification"
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
class TestError:
|
| 117 |
+
def test_system_crashed(self):
|
| 118 |
+
assert classify_with_regex("System crashed due to disk I/O failure on node-3") == "Error"
|
| 119 |
+
|
| 120 |
+
def test_db_connection_failed(self):
|
| 121 |
+
assert classify_with_regex("Database connection failed after 3 retries") == "Error"
|
| 122 |
+
|
| 123 |
+
def test_service_down(self):
|
| 124 |
+
assert classify_with_regex("Service payments-api is down") == "Error"
|
| 125 |
+
|
| 126 |
+
def test_request_timeout(self):
|
| 127 |
+
assert classify_with_regex("Connection timed out after 30s on shard-7") == "Error"
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
class TestCriticalError:
|
| 131 |
+
def test_critical_prefix(self):
|
| 132 |
+
assert classify_with_regex("CRITICAL: data corruption detected on shard-14") == "Critical Error"
|
| 133 |
+
|
| 134 |
+
def test_fatal(self):
|
| 135 |
+
assert classify_with_regex("FATAL: kernel panic β system halted") == "Critical Error"
|
| 136 |
+
|
| 137 |
+
def test_data_loss(self):
|
| 138 |
+
assert classify_with_regex("data loss detected during write to replica-3") == "Critical Error"
|
| 139 |
+
|
| 140 |
+
def test_oom(self):
|
| 141 |
+
assert classify_with_regex("out-of-memory error: process killed (OOM)") == "Critical Error"
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
# ββ Negative cases: must return None (don't mis-classify) ββββββββββββββββββββ
|
| 145 |
+
class TestNegativeCases:
|
| 146 |
+
@pytest.mark.parametrize("log", [
|
| 147 |
+
"Case escalation for ticket ID 7324 failed because the assigned support agent is no longer active.",
|
| 148 |
+
"The 'ReportGenerator' module will be retired in version 4.0.",
|
| 149 |
+
"The 'BulkEmailSender' feature will be deprecated in v5.0.",
|
| 150 |
+
"Invoice generation aborted for order ID 8910 due to invalid tax calculation module.",
|
| 151 |
+
"Hey bro chill ya!",
|
| 152 |
+
])
|
| 153 |
+
def test_no_false_positives(self, log):
|
| 154 |
+
result = classify_with_regex(log)
|
| 155 |
+
assert result is None, f"Expected None but got '{result}' for: {log[:80]}"
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
# ββ Coverage test βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 159 |
+
class TestCoverage:
|
| 160 |
+
BALANCED_SAMPLE = [
|
| 161 |
+
# HTTP (6)
|
| 162 |
+
"GET /api/v2/resource HTTP/1.1 status: 200 len: 100 time: 0.1",
|
| 163 |
+
"POST /v1/users HTTP/1.1 status: 201 len: 42 time: 0.05",
|
| 164 |
+
"nova.osapi_compute.wsgi.server GET /v2/servers/detail HTTP/1.1 status: 200",
|
| 165 |
+
"DELETE /v1/items/99 HTTP/1.1 status: 204 len: 0 time: 0.01",
|
| 166 |
+
"PATCH /v1/config HTTP/1.1 status: 200 len: 55 time: 0.04",
|
| 167 |
+
"API call /invoices returned HTTP 500 in 2.1s",
|
| 168 |
+
# Security (6)
|
| 169 |
+
"Multiple login failures occurred on user 6454 account",
|
| 170 |
+
"IP 10.0.0.5 blocked due to potential attack",
|
| 171 |
+
"Brute force login attempt from 192.168.1.1 detected",
|
| 172 |
+
"Admin access escalation detected for user 9429",
|
| 173 |
+
"Suspicious login activity detected from 1.2.3.4",
|
| 174 |
+
"Potential DDoS attack from 203.0.113.1 detected",
|
| 175 |
+
# User Action (5)
|
| 176 |
+
"User User12345 logged in.",
|
| 177 |
+
"User User99 logged out.",
|
| 178 |
+
"Account with ID 456 created by Admin.",
|
| 179 |
+
"User User42 changed password successfully.",
|
| 180 |
+
"New user User9999 registered with email u@e.com.",
|
| 181 |
+
# System Notification (5)
|
| 182 |
+
"Backup completed successfully.",
|
| 183 |
+
"System updated to version 4.2.1.",
|
| 184 |
+
"Disk cleanup completed successfully.",
|
| 185 |
+
"CPU usage at 98% for the last 10 minutes on node-7",
|
| 186 |
+
"Cron job cleanup-tokens executed successfully.",
|
| 187 |
+
# Error (4)
|
| 188 |
+
"System crashed due to disk I/O failure on node-3",
|
| 189 |
+
"Database connection failed after 3 retries",
|
| 190 |
+
"Service auth-api is down",
|
| 191 |
+
"Connection timed out after 30s",
|
| 192 |
+
# Critical (3)
|
| 193 |
+
"CRITICAL: data corruption detected on shard-14",
|
| 194 |
+
"FATAL: kernel panic β system halted",
|
| 195 |
+
"data loss detected during write to replica-3",
|
| 196 |
+
# LegacyCRM / unmatched (5) β should NOT match
|
| 197 |
+
"Case escalation for ticket ID 7324 failed.",
|
| 198 |
+
"The 'BulkEmailSender' feature will be deprecated in v5.0.",
|
| 199 |
+
"Invoice generation aborted for order ID 8910.",
|
| 200 |
+
"Workflow stalled at approval step 3 for case 9021.",
|
| 201 |
+
"SLA breach detected for case ID 7701 (P1 4h breach).",
|
| 202 |
+
]
|
| 203 |
+
|
| 204 |
+
def test_coverage_above_35_percent(self):
|
| 205 |
+
result = get_regex_coverage(self.BALANCED_SAMPLE)
|
| 206 |
+
pct = result["coverage_pct"]
|
| 207 |
+
# 29 of 34 logs should match regex (29/34 = 85%)
|
| 208 |
+
# 5 LegacyCRM logs should NOT match β ~85% expected
|
| 209 |
+
assert pct >= 35.0, (
|
| 210 |
+
f"Regex coverage {pct}% is below 35% minimum. "
|
| 211 |
+
f"Check pattern additions in processor_regex.py"
|
| 212 |
+
)
|
| 213 |
+
|
| 214 |
+
def test_no_false_positive_on_legacy_logs(self):
|
| 215 |
+
legacy_logs = [
|
| 216 |
+
"Case escalation for ticket ID 7324 failed.",
|
| 217 |
+
"The 'BulkEmailSender' feature will be deprecated in v5.0.",
|
| 218 |
+
"Invoice generation aborted for order ID 8910.",
|
| 219 |
+
]
|
| 220 |
+
for log in legacy_logs:
|
| 221 |
+
result = classify_with_regex(log)
|
| 222 |
+
assert result is None, f"False positive: '{result}' on legacy log: {log}"
|
HF/test/test_routing.py
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
tests/test_routing.py β Pipeline Routing Tests
|
| 3 |
+
|
| 4 |
+
Tests verify:
|
| 5 |
+
1. LegacyCRM source β LLM tier (always)
|
| 6 |
+
2. Regex match β Regex tier (never reaches BERT)
|
| 7 |
+
3. High-confidence BERT β BERT tier
|
| 8 |
+
4. Unclassified BERT β LLM fallback tier
|
| 9 |
+
5. Result schema is complete (all keys present)
|
| 10 |
+
|
| 11 |
+
Run:
|
| 12 |
+
pytest tests/test_routing.py -v
|
| 13 |
+
"""
|
| 14 |
+
import sys, os
|
| 15 |
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
| 16 |
+
|
| 17 |
+
import pytest
|
| 18 |
+
from unittest.mock import patch, MagicMock
|
| 19 |
+
from classify import classify_log, classify_logs, pipeline_summary
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
# ββ Fixtures ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 23 |
+
REGEX_HIT_LOG = ("ModernCRM", "User User123 logged in.")
|
| 24 |
+
REGEX_HIT_LOG2 = ("BillingSystem", "GET /api/v1/invoices HTTP/1.1 status: 200 len: 100 time: 0.1")
|
| 25 |
+
LEGACY_LOG = ("LegacyCRM", "Case escalation for ticket 9021 failed.")
|
| 26 |
+
NON_REGEX_LOG = ("ModernHR", "The inventory sync completed without matching standard patterns.")
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
# ββ Schema completeness βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 30 |
+
class TestResultSchema:
|
| 31 |
+
def test_classify_log_has_required_keys(self):
|
| 32 |
+
with patch("classify.bert_batch", return_value=[("Error", 0.95)]):
|
| 33 |
+
result = classify_log(*NON_REGEX_LOG)
|
| 34 |
+
assert "label" in result
|
| 35 |
+
assert "tier" in result
|
| 36 |
+
assert "confidence" in result
|
| 37 |
+
assert "latency_ms" in result
|
| 38 |
+
|
| 39 |
+
def test_latency_ms_is_positive(self):
|
| 40 |
+
result = classify_log(*REGEX_HIT_LOG)
|
| 41 |
+
assert result["latency_ms"] > 0
|
| 42 |
+
|
| 43 |
+
def test_confidence_is_float_or_none(self):
|
| 44 |
+
result = classify_log(*REGEX_HIT_LOG)
|
| 45 |
+
assert result["confidence"] is None or isinstance(result["confidence"], float)
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
# ββ Regex tier routing βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 49 |
+
class TestRegexRouting:
|
| 50 |
+
def test_regex_match_returns_regex_tier(self):
|
| 51 |
+
result = classify_log(*REGEX_HIT_LOG)
|
| 52 |
+
assert result["tier"] == "Regex"
|
| 53 |
+
|
| 54 |
+
def test_regex_match_has_full_confidence(self):
|
| 55 |
+
result = classify_log(*REGEX_HIT_LOG)
|
| 56 |
+
assert result["confidence"] == 1.0
|
| 57 |
+
|
| 58 |
+
def test_regex_match_http_log(self):
|
| 59 |
+
result = classify_log(*REGEX_HIT_LOG2)
|
| 60 |
+
assert result["tier"] == "Regex"
|
| 61 |
+
assert result["label"] == "HTTP Status"
|
| 62 |
+
|
| 63 |
+
def test_regex_match_skips_bert(self):
|
| 64 |
+
"""If regex matches, bert_batch should never be called."""
|
| 65 |
+
with patch("classify.bert_batch") as mock_bert:
|
| 66 |
+
classify_log(*REGEX_HIT_LOG)
|
| 67 |
+
mock_bert.assert_not_called()
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
# ββ LegacyCRM routing βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 71 |
+
class TestLegacyCRMRouting:
|
| 72 |
+
def test_legacy_crm_goes_to_llm(self):
|
| 73 |
+
with patch("classify.classify_with_llm", return_value="Workflow Error") as mock_llm:
|
| 74 |
+
result = classify_log(*LEGACY_LOG)
|
| 75 |
+
assert result["tier"] == "LLM"
|
| 76 |
+
mock_llm.assert_called_once()
|
| 77 |
+
|
| 78 |
+
def test_legacy_crm_skips_regex(self):
|
| 79 |
+
"""LegacyCRM should skip regex entirely β go straight to LLM."""
|
| 80 |
+
with patch("classify.classify_with_regex") as mock_regex, \
|
| 81 |
+
patch("classify.classify_with_llm", return_value="Workflow Error"):
|
| 82 |
+
classify_log(*LEGACY_LOG)
|
| 83 |
+
mock_regex.assert_not_called()
|
| 84 |
+
|
| 85 |
+
def test_legacy_crm_skips_bert(self):
|
| 86 |
+
with patch("classify.bert_batch") as mock_bert, \
|
| 87 |
+
patch("classify.classify_with_llm", return_value="Workflow Error"):
|
| 88 |
+
classify_log(*LEGACY_LOG)
|
| 89 |
+
mock_bert.assert_not_called()
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
# ββ BERT routing ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 93 |
+
class TestBERTRouting:
|
| 94 |
+
def test_high_confidence_bert_stays_bert(self):
|
| 95 |
+
with patch("classify.bert_batch", return_value=[("Security Alert", 0.95)]):
|
| 96 |
+
result = classify_log(*NON_REGEX_LOG)
|
| 97 |
+
assert result["tier"] == "BERT"
|
| 98 |
+
assert result["label"] == "Security Alert"
|
| 99 |
+
assert result["confidence"] == pytest.approx(0.95)
|
| 100 |
+
|
| 101 |
+
def test_low_confidence_bert_falls_back_to_llm(self):
|
| 102 |
+
"""BERT returning 'Unclassified' should escalate to LLM."""
|
| 103 |
+
with patch("classify.bert_batch", return_value=[("Unclassified", 0.20)]), \
|
| 104 |
+
patch("classify.classify_with_llm", return_value="Workflow Error") as mock_llm:
|
| 105 |
+
result = classify_log(*NON_REGEX_LOG)
|
| 106 |
+
assert "LLM" in result["tier"]
|
| 107 |
+
mock_llm.assert_called_once()
|
| 108 |
+
|
| 109 |
+
def test_bert_batch_called_for_non_regex_log(self):
|
| 110 |
+
with patch("classify.bert_batch", return_value=[("Error", 0.88)]) as mock_bert:
|
| 111 |
+
classify_log(*NON_REGEX_LOG)
|
| 112 |
+
mock_bert.assert_called_once()
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
# ββ Batch routing ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 116 |
+
class TestBatchRouting:
|
| 117 |
+
def test_batch_returns_correct_length(self):
|
| 118 |
+
logs = [REGEX_HIT_LOG, REGEX_HIT_LOG2, LEGACY_LOG]
|
| 119 |
+
with patch("classify.classify_with_llm", return_value="Workflow Error"):
|
| 120 |
+
results = classify_logs(logs)
|
| 121 |
+
assert len(results) == len(logs)
|
| 122 |
+
|
| 123 |
+
def test_batch_mixed_tiers(self):
|
| 124 |
+
logs = [
|
| 125 |
+
REGEX_HIT_LOG, # β Regex
|
| 126 |
+
("ModernCRM", "GET /api HTTP/1.1 status: 200"), # β Regex (HTTP)
|
| 127 |
+
LEGACY_LOG, # β LLM
|
| 128 |
+
]
|
| 129 |
+
with patch("classify.classify_with_llm", return_value="Workflow Error"):
|
| 130 |
+
results = classify_logs(logs)
|
| 131 |
+
|
| 132 |
+
assert results[0]["tier"] == "Regex"
|
| 133 |
+
assert results[1]["tier"] == "Regex"
|
| 134 |
+
assert results[2]["tier"] == "LLM"
|
| 135 |
+
|
| 136 |
+
def test_pipeline_summary_structure(self):
|
| 137 |
+
logs = [REGEX_HIT_LOG, REGEX_HIT_LOG2]
|
| 138 |
+
results = classify_logs(logs)
|
| 139 |
+
summary = pipeline_summary(results)
|
| 140 |
+
|
| 141 |
+
assert "total" in summary
|
| 142 |
+
assert "tier_stats" in summary
|
| 143 |
+
assert "label_counts" in summary
|
| 144 |
+
assert summary["total"] == 2
|
| 145 |
+
|
| 146 |
+
def test_pipeline_summary_tier_pcts_sum_to_100(self):
|
| 147 |
+
logs = [REGEX_HIT_LOG, REGEX_HIT_LOG2, REGEX_HIT_LOG]
|
| 148 |
+
results = classify_logs(logs)
|
| 149 |
+
summary = pipeline_summary(results)
|
| 150 |
+
total_pct = sum(s["pct"] for s in summary["tier_stats"].values())
|
| 151 |
+
assert abs(total_pct - 100.0) < 1.0, f"Tier pcts don't sum to 100: {total_pct}"
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
# ββ Edge cases ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 155 |
+
class TestEdgeCases:
|
| 156 |
+
def test_empty_batch_returns_empty(self):
|
| 157 |
+
results = classify_logs([])
|
| 158 |
+
assert results == []
|
| 159 |
+
|
| 160 |
+
def test_single_log_batch(self):
|
| 161 |
+
with patch("classify.bert_batch", return_value=[("Error", 0.85)]):
|
| 162 |
+
results = classify_logs([NON_REGEX_LOG])
|
| 163 |
+
assert len(results) == 1
|
| 164 |
+
|
| 165 |
+
def test_all_regex_batch_never_calls_bert(self):
|
| 166 |
+
logs = [REGEX_HIT_LOG] * 10
|
| 167 |
+
with patch("classify.bert_batch") as mock_bert:
|
| 168 |
+
classify_logs(logs)
|
| 169 |
+
mock_bert.assert_not_called()
|
| 170 |
+
|
| 171 |
+
def test_llm_failure_returns_unclassified(self):
|
| 172 |
+
"""LLM crashing should return Unclassified, not raise."""
|
| 173 |
+
with patch("classify.classify_with_llm", side_effect=Exception("LLM down")):
|
| 174 |
+
try:
|
| 175 |
+
result = classify_log(*LEGACY_LOG)
|
| 176 |
+
# If it doesn't raise, Unclassified should be label
|
| 177 |
+
assert result["label"] == "Unclassified"
|
| 178 |
+
except Exception:
|
| 179 |
+
pytest.fail("classify_log raised an exception β should have returned Unclassified")
|