NOT-OMEGA commited on
Commit
88cdd3e
Β·
verified Β·
1 Parent(s): 4561114

Upload 4 files

Browse files
HF/test/__init__.py ADDED
File without changes
HF/test/test_llm.py ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ tests/test_llm.py β€” Tests for Tier 3: LLM Classifier
3
+
4
+ Tests verify:
5
+ 1. Cache hit avoids API call
6
+ 2. Retry logic on transient failure
7
+ 3. Returns "Unclassified" on all error paths (never crashes pipeline)
8
+ 4. Response normalization handles edge cases
9
+ 5. No HF_TOKEN β†’ returns Unclassified gracefully
10
+
11
+ Run:
12
+ pytest tests/test_llm.py -v
13
+ """
14
+ import sys, os
15
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
16
+
17
+ import pytest
18
+ from unittest.mock import patch, MagicMock, call
19
+ import processor_llm as llm_module
20
+ from processor_llm import (
21
+ classify_with_llm, get_cache_stats,
22
+ _cache_key, _cache_get, _cache_set, _normalize,
23
+ _RESPONSE_CACHE,
24
+ )
25
+
26
+
27
+ # ── Setup / teardown ──────────────────────────────────────────────────────────
28
+ @pytest.fixture(autouse=True)
29
+ def clear_cache():
30
+ """Clear LLM cache before each test."""
31
+ _RESPONSE_CACHE.clear()
32
+ yield
33
+ _RESPONSE_CACHE.clear()
34
+
35
+
36
+ # ── Normalize ─────────────────────────────────────────────────────────────────
37
+ class TestNormalize:
38
+ def test_exact_match(self):
39
+ assert _normalize("Workflow Error") == "Workflow Error"
40
+
41
+ def test_case_insensitive(self):
42
+ assert _normalize("workflow error") == "Workflow Error"
43
+
44
+ def test_deprecation_warning(self):
45
+ assert _normalize("Deprecation Warning") == "Deprecation Warning"
46
+
47
+ def test_random_text_returns_unclassified(self):
48
+ assert _normalize("I don't know") == "Unclassified"
49
+
50
+ def test_empty_string_returns_unclassified(self):
51
+ assert _normalize("") == "Unclassified"
52
+
53
+ def test_partial_match(self):
54
+ # Model might return "Category: Workflow Error" β†’ still should match
55
+ assert _normalize("Category: Workflow Error") == "Workflow Error"
56
+
57
+ def test_strips_quotes(self):
58
+ assert _normalize('"Deprecation Warning"') == "Deprecation Warning"
59
+
60
+
61
+ # ── Cache ─────────────────────────────────────────────────────────────────────
62
+ class TestCache:
63
+ def test_cache_miss_returns_none(self):
64
+ assert _cache_get("totally new log message xyz") is None
65
+
66
+ def test_cache_set_and_get(self):
67
+ log = "test log message for caching"
68
+ _cache_set(log, "Workflow Error")
69
+ assert _cache_get(log) == "Workflow Error"
70
+
71
+ def test_cache_key_is_deterministic(self):
72
+ log = "same log every time"
73
+ assert _cache_key(log) == _cache_key(log)
74
+
75
+ def test_different_logs_different_keys(self):
76
+ k1 = _cache_key("log message A")
77
+ k2 = _cache_key("log message B")
78
+ assert k1 != k2
79
+
80
+ def test_cache_hit_avoids_api_call(self):
81
+ log = "Case escalation for ticket 7324 failed."
82
+ _cache_set(log, "Workflow Error") # Pre-populate cache
83
+
84
+ with patch("processor_llm.InferenceClient") as mock_client:
85
+ result = classify_with_llm(log)
86
+
87
+ mock_client.assert_not_called()
88
+ assert result == "Workflow Error"
89
+
90
+ def test_cache_stats_size(self):
91
+ _cache_set("log1", "Workflow Error")
92
+ _cache_set("log2", "Deprecation Warning")
93
+ stats = get_cache_stats()
94
+ assert stats["size"] == 2
95
+
96
+
97
+ # ── No token ──────────────────────────────────────────────────────────────────
98
+ class TestNoToken:
99
+ def test_no_hf_token_returns_unclassified(self, monkeypatch):
100
+ monkeypatch.setattr(llm_module, "HF_TOKEN", None)
101
+ result = classify_with_llm("Case escalation for ticket 1234.")
102
+ assert result == "Unclassified"
103
+
104
+
105
+ # ── Retry logic ───────────────────────────────────────────────────────────────
106
+ class TestRetry:
107
+ def _make_mock_client(self, responses):
108
+ """responses: list of (Exception | str) β€” raised or returned in order."""
109
+ call_count = [0]
110
+
111
+ def mock_create(**kwargs):
112
+ idx = call_count[0]
113
+ call_count[0] += 1
114
+ if isinstance(responses[idx], Exception):
115
+ raise responses[idx]
116
+ mock_resp = MagicMock()
117
+ mock_resp.choices[0].message.content = responses[idx]
118
+ return mock_resp
119
+
120
+ mock_client = MagicMock()
121
+ mock_client.chat.completions.create.side_effect = mock_create
122
+ return mock_client
123
+
124
+ def test_success_on_first_try(self, monkeypatch):
125
+ monkeypatch.setattr(llm_module, "HF_TOKEN", "fake-token")
126
+ monkeypatch.setattr(llm_module, "RETRY_DELAY_SEC", 0) # no sleep
127
+
128
+ client = self._make_mock_client(["Workflow Error"])
129
+
130
+ with patch("processor_llm.InferenceClient", return_value=client):
131
+ result = classify_with_llm("Case escalation for ticket 7324.")
132
+
133
+ assert result == "Workflow Error"
134
+ assert client.chat.completions.create.call_count == 1
135
+
136
+ def test_retry_on_transient_failure(self, monkeypatch):
137
+ monkeypatch.setattr(llm_module, "HF_TOKEN", "fake-token")
138
+ monkeypatch.setattr(llm_module, "RETRY_DELAY_SEC", 0)
139
+ monkeypatch.setattr(llm_module, "MAX_RETRIES", 2)
140
+
141
+ # Fail once, succeed on second attempt
142
+ client = self._make_mock_client([
143
+ ConnectionError("timeout"),
144
+ "Deprecation Warning",
145
+ ])
146
+
147
+ with patch("processor_llm.InferenceClient", return_value=client), \
148
+ patch("processor_llm.time.sleep"): # skip actual sleep
149
+ result = classify_with_llm("Module will be retired in v4.")
150
+
151
+ assert result == "Deprecation Warning"
152
+ assert client.chat.completions.create.call_count == 2
153
+
154
+ def test_all_retries_exhausted_returns_unclassified(self, monkeypatch):
155
+ monkeypatch.setattr(llm_module, "HF_TOKEN", "fake-token")
156
+ monkeypatch.setattr(llm_module, "RETRY_DELAY_SEC", 0)
157
+ monkeypatch.setattr(llm_module, "MAX_RETRIES", 2)
158
+
159
+ client = self._make_mock_client([
160
+ ConnectionError("timeout"),
161
+ ConnectionError("timeout"),
162
+ ConnectionError("timeout"),
163
+ ])
164
+
165
+ with patch("processor_llm.InferenceClient", return_value=client), \
166
+ patch("processor_llm.time.sleep"):
167
+ result = classify_with_llm("Something that keeps failing.")
168
+
169
+ assert result == "Unclassified"
170
+ assert client.chat.completions.create.call_count == 3 # 1 initial + 2 retries
171
+
172
+ def test_successful_result_gets_cached(self, monkeypatch):
173
+ monkeypatch.setattr(llm_module, "HF_TOKEN", "fake-token")
174
+ monkeypatch.setattr(llm_module, "RETRY_DELAY_SEC", 0)
175
+
176
+ client = self._make_mock_client(["Workflow Error"])
177
+
178
+ log = "Case escalation for unique ticket 99999."
179
+ with patch("processor_llm.InferenceClient", return_value=client):
180
+ result = classify_with_llm(log)
181
+
182
+ assert result == "Workflow Error"
183
+ # Should now be in cache
184
+ assert _cache_get(log) == "Workflow Error"
185
+
186
+
187
+ # ── Pipeline safety ───────────────────────────────────────────────────────────
188
+ class TestPipelineSafety:
189
+ def test_classify_never_raises(self, monkeypatch):
190
+ """LLM failures must NEVER propagate as exceptions to the pipeline."""
191
+ monkeypatch.setattr(llm_module, "HF_TOKEN", "fake-token")
192
+ monkeypatch.setattr(llm_module, "RETRY_DELAY_SEC", 0)
193
+
194
+ with patch("processor_llm.InferenceClient", side_effect=RuntimeError("catastrophic")):
195
+ result = classify_with_llm("Any log message here.")
196
+
197
+ assert result == "Unclassified" # Never raises, always returns string
HF/test/test_regex.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ tests/test_regex.py β€” Unit tests for Tier 1: Regex Classifier
3
+
4
+ Tests verify:
5
+ 1. Every pattern category has positive matches
6
+ 2. No false positives on known non-matching logs
7
+ 3. Pattern order doesn't cause mis-labeling
8
+ 4. Coverage improvement (should be > 35% on balanced test set)
9
+
10
+ Run:
11
+ pytest tests/ -v
12
+ pytest tests/test_regex.py -v --tb=short
13
+ """
14
+ import sys, os
15
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
16
+
17
+ import pytest
18
+ from processor_regex import classify_with_regex, get_regex_coverage
19
+
20
+
21
+ # ── Positive cases: must match and return correct label ───────────────────────
22
+ class TestHTTPStatus:
23
+ def test_get_request(self):
24
+ assert classify_with_regex("GET /api/v2/resource HTTP/1.1 status: 200 len: 100 time: 0.1") == "HTTP Status"
25
+
26
+ def test_post_request(self):
27
+ assert classify_with_regex("POST /v1/users HTTP/1.1 status: 201 len: 42 time: 0.05") == "HTTP Status"
28
+
29
+ def test_delete_request(self):
30
+ assert classify_with_regex("DELETE /v1/users/123 HTTP/1.1 status: 204 len: 0 time: 0.02") == "HTTP Status"
31
+
32
+ def test_nova_style(self):
33
+ assert classify_with_regex(
34
+ "nova.osapi_compute.wsgi.server GET /v2/servers/detail HTTP/1.1 status: 404 len: 1583 time: 0.19"
35
+ ) == "HTTP Status"
36
+
37
+ def test_status_code_only(self):
38
+ assert classify_with_regex("API call /invoices returned HTTP 500 in 2.1s") == "HTTP Status"
39
+
40
+ def test_patch_request(self):
41
+ assert classify_with_regex("PATCH /v1/config HTTP/1.1 status: 200 len: 55 time: 0.04") == "HTTP Status"
42
+
43
+
44
+ class TestSecurityAlert:
45
+ def test_login_failures(self):
46
+ assert classify_with_regex("Multiple login failures occurred on user 6454 account") == "Security Alert"
47
+
48
+ def test_ip_blocked(self):
49
+ assert classify_with_regex("IP 192.168.133.114 blocked due to potential attack") == "Security Alert"
50
+
51
+ def test_brute_force(self):
52
+ assert classify_with_regex("Alert: brute force login attempt from 10.0.0.5 detected") == "Security Alert"
53
+
54
+ def test_admin_escalation(self):
55
+ assert classify_with_regex("Admin access escalation detected for user 9429") == "Security Alert"
56
+
57
+ def test_privilege_elevation(self):
58
+ assert classify_with_regex("Privilege elevation detected for user Admin99") == "Security Alert"
59
+
60
+ def test_ddos(self):
61
+ assert classify_with_regex("Potential DDoS attack from 1.2.3.4 detected") == "Security Alert"
62
+
63
+ def test_suspicious_activity(self):
64
+ assert classify_with_regex("Suspicious login activity detected from 203.0.113.1") == "Security Alert"
65
+
66
+ def test_unauthorized_access(self):
67
+ assert classify_with_regex("Unauthorized access to data was attempted by User123") == "Security Alert"
68
+
69
+
70
+ class TestUserAction:
71
+ def test_login(self):
72
+ assert classify_with_regex("User User12345 logged in.") == "User Action"
73
+
74
+ def test_logout(self):
75
+ assert classify_with_regex("User User99 logged out.") == "User Action"
76
+
77
+ def test_account_created(self):
78
+ assert classify_with_regex("Account with ID 456 created by Admin.") == "User Action"
79
+
80
+ def test_password_changed(self):
81
+ assert classify_with_regex("User User42 changed password successfully.") == "User Action"
82
+
83
+ def test_new_user_registered(self):
84
+ assert classify_with_regex("New user User9999 registered with email u@e.com.") == "User Action"
85
+
86
+
87
+ class TestSystemNotification:
88
+ def test_backup_completed(self):
89
+ assert classify_with_regex("Backup completed successfully.") == "System Notification"
90
+
91
+ def test_backup_started(self):
92
+ assert classify_with_regex("Backup started at 2024-01-14 03:00:00.") == "System Notification"
93
+
94
+ def test_system_updated(self):
95
+ assert classify_with_regex("System updated to version 4.2.1.") == "System Notification"
96
+
97
+ def test_disk_cleanup(self):
98
+ assert classify_with_regex("Disk cleanup completed successfully.") == "System Notification"
99
+
100
+ def test_service_restarted(self):
101
+ assert classify_with_regex("Service payments restarted successfully.") == "System Notification"
102
+
103
+ def test_cpu_usage(self):
104
+ assert classify_with_regex("CPU usage at 98% for the last 10 minutes on node-7") == "System Notification"
105
+
106
+ def test_health_check_passed(self):
107
+ assert classify_with_regex("Health check passed for service auth-api") == "System Notification"
108
+
109
+ def test_cron_executed(self):
110
+ assert classify_with_regex("Cron job cleanup-tokens executed successfully.") == "System Notification"
111
+
112
+ def test_certificate_renewed(self):
113
+ assert classify_with_regex("Certificate renewed successfully for domain api.example.com") == "System Notification"
114
+
115
+
116
+ class TestError:
117
+ def test_system_crashed(self):
118
+ assert classify_with_regex("System crashed due to disk I/O failure on node-3") == "Error"
119
+
120
+ def test_db_connection_failed(self):
121
+ assert classify_with_regex("Database connection failed after 3 retries") == "Error"
122
+
123
+ def test_service_down(self):
124
+ assert classify_with_regex("Service payments-api is down") == "Error"
125
+
126
+ def test_request_timeout(self):
127
+ assert classify_with_regex("Connection timed out after 30s on shard-7") == "Error"
128
+
129
+
130
+ class TestCriticalError:
131
+ def test_critical_prefix(self):
132
+ assert classify_with_regex("CRITICAL: data corruption detected on shard-14") == "Critical Error"
133
+
134
+ def test_fatal(self):
135
+ assert classify_with_regex("FATAL: kernel panic β€” system halted") == "Critical Error"
136
+
137
+ def test_data_loss(self):
138
+ assert classify_with_regex("data loss detected during write to replica-3") == "Critical Error"
139
+
140
+ def test_oom(self):
141
+ assert classify_with_regex("out-of-memory error: process killed (OOM)") == "Critical Error"
142
+
143
+
144
+ # ── Negative cases: must return None (don't mis-classify) ────────────────────
145
+ class TestNegativeCases:
146
+ @pytest.mark.parametrize("log", [
147
+ "Case escalation for ticket ID 7324 failed because the assigned support agent is no longer active.",
148
+ "The 'ReportGenerator' module will be retired in version 4.0.",
149
+ "The 'BulkEmailSender' feature will be deprecated in v5.0.",
150
+ "Invoice generation aborted for order ID 8910 due to invalid tax calculation module.",
151
+ "Hey bro chill ya!",
152
+ ])
153
+ def test_no_false_positives(self, log):
154
+ result = classify_with_regex(log)
155
+ assert result is None, f"Expected None but got '{result}' for: {log[:80]}"
156
+
157
+
158
+ # ── Coverage test ─────────────────────────────────────────────────────────────
159
+ class TestCoverage:
160
+ BALANCED_SAMPLE = [
161
+ # HTTP (6)
162
+ "GET /api/v2/resource HTTP/1.1 status: 200 len: 100 time: 0.1",
163
+ "POST /v1/users HTTP/1.1 status: 201 len: 42 time: 0.05",
164
+ "nova.osapi_compute.wsgi.server GET /v2/servers/detail HTTP/1.1 status: 200",
165
+ "DELETE /v1/items/99 HTTP/1.1 status: 204 len: 0 time: 0.01",
166
+ "PATCH /v1/config HTTP/1.1 status: 200 len: 55 time: 0.04",
167
+ "API call /invoices returned HTTP 500 in 2.1s",
168
+ # Security (6)
169
+ "Multiple login failures occurred on user 6454 account",
170
+ "IP 10.0.0.5 blocked due to potential attack",
171
+ "Brute force login attempt from 192.168.1.1 detected",
172
+ "Admin access escalation detected for user 9429",
173
+ "Suspicious login activity detected from 1.2.3.4",
174
+ "Potential DDoS attack from 203.0.113.1 detected",
175
+ # User Action (5)
176
+ "User User12345 logged in.",
177
+ "User User99 logged out.",
178
+ "Account with ID 456 created by Admin.",
179
+ "User User42 changed password successfully.",
180
+ "New user User9999 registered with email u@e.com.",
181
+ # System Notification (5)
182
+ "Backup completed successfully.",
183
+ "System updated to version 4.2.1.",
184
+ "Disk cleanup completed successfully.",
185
+ "CPU usage at 98% for the last 10 minutes on node-7",
186
+ "Cron job cleanup-tokens executed successfully.",
187
+ # Error (4)
188
+ "System crashed due to disk I/O failure on node-3",
189
+ "Database connection failed after 3 retries",
190
+ "Service auth-api is down",
191
+ "Connection timed out after 30s",
192
+ # Critical (3)
193
+ "CRITICAL: data corruption detected on shard-14",
194
+ "FATAL: kernel panic β€” system halted",
195
+ "data loss detected during write to replica-3",
196
+ # LegacyCRM / unmatched (5) β†’ should NOT match
197
+ "Case escalation for ticket ID 7324 failed.",
198
+ "The 'BulkEmailSender' feature will be deprecated in v5.0.",
199
+ "Invoice generation aborted for order ID 8910.",
200
+ "Workflow stalled at approval step 3 for case 9021.",
201
+ "SLA breach detected for case ID 7701 (P1 4h breach).",
202
+ ]
203
+
204
+ def test_coverage_above_35_percent(self):
205
+ result = get_regex_coverage(self.BALANCED_SAMPLE)
206
+ pct = result["coverage_pct"]
207
+ # 29 of 34 logs should match regex (29/34 = 85%)
208
+ # 5 LegacyCRM logs should NOT match β†’ ~85% expected
209
+ assert pct >= 35.0, (
210
+ f"Regex coverage {pct}% is below 35% minimum. "
211
+ f"Check pattern additions in processor_regex.py"
212
+ )
213
+
214
+ def test_no_false_positive_on_legacy_logs(self):
215
+ legacy_logs = [
216
+ "Case escalation for ticket ID 7324 failed.",
217
+ "The 'BulkEmailSender' feature will be deprecated in v5.0.",
218
+ "Invoice generation aborted for order ID 8910.",
219
+ ]
220
+ for log in legacy_logs:
221
+ result = classify_with_regex(log)
222
+ assert result is None, f"False positive: '{result}' on legacy log: {log}"
HF/test/test_routing.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ tests/test_routing.py β€” Pipeline Routing Tests
3
+
4
+ Tests verify:
5
+ 1. LegacyCRM source β†’ LLM tier (always)
6
+ 2. Regex match β†’ Regex tier (never reaches BERT)
7
+ 3. High-confidence BERT β†’ BERT tier
8
+ 4. Unclassified BERT β†’ LLM fallback tier
9
+ 5. Result schema is complete (all keys present)
10
+
11
+ Run:
12
+ pytest tests/test_routing.py -v
13
+ """
14
+ import sys, os
15
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
16
+
17
+ import pytest
18
+ from unittest.mock import patch, MagicMock
19
+ from classify import classify_log, classify_logs, pipeline_summary
20
+
21
+
22
+ # ── Fixtures ──────────────────────────────────────────────────────────────────
23
+ REGEX_HIT_LOG = ("ModernCRM", "User User123 logged in.")
24
+ REGEX_HIT_LOG2 = ("BillingSystem", "GET /api/v1/invoices HTTP/1.1 status: 200 len: 100 time: 0.1")
25
+ LEGACY_LOG = ("LegacyCRM", "Case escalation for ticket 9021 failed.")
26
+ NON_REGEX_LOG = ("ModernHR", "The inventory sync completed without matching standard patterns.")
27
+
28
+
29
+ # ── Schema completeness ───────────────────────────────────────────────────────
30
+ class TestResultSchema:
31
+ def test_classify_log_has_required_keys(self):
32
+ with patch("classify.bert_batch", return_value=[("Error", 0.95)]):
33
+ result = classify_log(*NON_REGEX_LOG)
34
+ assert "label" in result
35
+ assert "tier" in result
36
+ assert "confidence" in result
37
+ assert "latency_ms" in result
38
+
39
+ def test_latency_ms_is_positive(self):
40
+ result = classify_log(*REGEX_HIT_LOG)
41
+ assert result["latency_ms"] > 0
42
+
43
+ def test_confidence_is_float_or_none(self):
44
+ result = classify_log(*REGEX_HIT_LOG)
45
+ assert result["confidence"] is None or isinstance(result["confidence"], float)
46
+
47
+
48
+ # ── Regex tier routing ─────────────────────────────────────────────────────────
49
+ class TestRegexRouting:
50
+ def test_regex_match_returns_regex_tier(self):
51
+ result = classify_log(*REGEX_HIT_LOG)
52
+ assert result["tier"] == "Regex"
53
+
54
+ def test_regex_match_has_full_confidence(self):
55
+ result = classify_log(*REGEX_HIT_LOG)
56
+ assert result["confidence"] == 1.0
57
+
58
+ def test_regex_match_http_log(self):
59
+ result = classify_log(*REGEX_HIT_LOG2)
60
+ assert result["tier"] == "Regex"
61
+ assert result["label"] == "HTTP Status"
62
+
63
+ def test_regex_match_skips_bert(self):
64
+ """If regex matches, bert_batch should never be called."""
65
+ with patch("classify.bert_batch") as mock_bert:
66
+ classify_log(*REGEX_HIT_LOG)
67
+ mock_bert.assert_not_called()
68
+
69
+
70
+ # ── LegacyCRM routing ─────────────────────────────────────────────────────────
71
+ class TestLegacyCRMRouting:
72
+ def test_legacy_crm_goes_to_llm(self):
73
+ with patch("classify.classify_with_llm", return_value="Workflow Error") as mock_llm:
74
+ result = classify_log(*LEGACY_LOG)
75
+ assert result["tier"] == "LLM"
76
+ mock_llm.assert_called_once()
77
+
78
+ def test_legacy_crm_skips_regex(self):
79
+ """LegacyCRM should skip regex entirely β€” go straight to LLM."""
80
+ with patch("classify.classify_with_regex") as mock_regex, \
81
+ patch("classify.classify_with_llm", return_value="Workflow Error"):
82
+ classify_log(*LEGACY_LOG)
83
+ mock_regex.assert_not_called()
84
+
85
+ def test_legacy_crm_skips_bert(self):
86
+ with patch("classify.bert_batch") as mock_bert, \
87
+ patch("classify.classify_with_llm", return_value="Workflow Error"):
88
+ classify_log(*LEGACY_LOG)
89
+ mock_bert.assert_not_called()
90
+
91
+
92
+ # ── BERT routing ──────────────────────────────────────────────────────────────
93
+ class TestBERTRouting:
94
+ def test_high_confidence_bert_stays_bert(self):
95
+ with patch("classify.bert_batch", return_value=[("Security Alert", 0.95)]):
96
+ result = classify_log(*NON_REGEX_LOG)
97
+ assert result["tier"] == "BERT"
98
+ assert result["label"] == "Security Alert"
99
+ assert result["confidence"] == pytest.approx(0.95)
100
+
101
+ def test_low_confidence_bert_falls_back_to_llm(self):
102
+ """BERT returning 'Unclassified' should escalate to LLM."""
103
+ with patch("classify.bert_batch", return_value=[("Unclassified", 0.20)]), \
104
+ patch("classify.classify_with_llm", return_value="Workflow Error") as mock_llm:
105
+ result = classify_log(*NON_REGEX_LOG)
106
+ assert "LLM" in result["tier"]
107
+ mock_llm.assert_called_once()
108
+
109
+ def test_bert_batch_called_for_non_regex_log(self):
110
+ with patch("classify.bert_batch", return_value=[("Error", 0.88)]) as mock_bert:
111
+ classify_log(*NON_REGEX_LOG)
112
+ mock_bert.assert_called_once()
113
+
114
+
115
+ # ── Batch routing ──────────────────────────────────────────────────────────────
116
+ class TestBatchRouting:
117
+ def test_batch_returns_correct_length(self):
118
+ logs = [REGEX_HIT_LOG, REGEX_HIT_LOG2, LEGACY_LOG]
119
+ with patch("classify.classify_with_llm", return_value="Workflow Error"):
120
+ results = classify_logs(logs)
121
+ assert len(results) == len(logs)
122
+
123
+ def test_batch_mixed_tiers(self):
124
+ logs = [
125
+ REGEX_HIT_LOG, # β†’ Regex
126
+ ("ModernCRM", "GET /api HTTP/1.1 status: 200"), # β†’ Regex (HTTP)
127
+ LEGACY_LOG, # β†’ LLM
128
+ ]
129
+ with patch("classify.classify_with_llm", return_value="Workflow Error"):
130
+ results = classify_logs(logs)
131
+
132
+ assert results[0]["tier"] == "Regex"
133
+ assert results[1]["tier"] == "Regex"
134
+ assert results[2]["tier"] == "LLM"
135
+
136
+ def test_pipeline_summary_structure(self):
137
+ logs = [REGEX_HIT_LOG, REGEX_HIT_LOG2]
138
+ results = classify_logs(logs)
139
+ summary = pipeline_summary(results)
140
+
141
+ assert "total" in summary
142
+ assert "tier_stats" in summary
143
+ assert "label_counts" in summary
144
+ assert summary["total"] == 2
145
+
146
+ def test_pipeline_summary_tier_pcts_sum_to_100(self):
147
+ logs = [REGEX_HIT_LOG, REGEX_HIT_LOG2, REGEX_HIT_LOG]
148
+ results = classify_logs(logs)
149
+ summary = pipeline_summary(results)
150
+ total_pct = sum(s["pct"] for s in summary["tier_stats"].values())
151
+ assert abs(total_pct - 100.0) < 1.0, f"Tier pcts don't sum to 100: {total_pct}"
152
+
153
+
154
+ # ── Edge cases ────────────────────────────────────────────────────────────────
155
+ class TestEdgeCases:
156
+ def test_empty_batch_returns_empty(self):
157
+ results = classify_logs([])
158
+ assert results == []
159
+
160
+ def test_single_log_batch(self):
161
+ with patch("classify.bert_batch", return_value=[("Error", 0.85)]):
162
+ results = classify_logs([NON_REGEX_LOG])
163
+ assert len(results) == 1
164
+
165
+ def test_all_regex_batch_never_calls_bert(self):
166
+ logs = [REGEX_HIT_LOG] * 10
167
+ with patch("classify.bert_batch") as mock_bert:
168
+ classify_logs(logs)
169
+ mock_bert.assert_not_called()
170
+
171
+ def test_llm_failure_returns_unclassified(self):
172
+ """LLM crashing should return Unclassified, not raise."""
173
+ with patch("classify.classify_with_llm", side_effect=Exception("LLM down")):
174
+ try:
175
+ result = classify_log(*LEGACY_LOG)
176
+ # If it doesn't raise, Unclassified should be label
177
+ assert result["label"] == "Unclassified"
178
+ except Exception:
179
+ pytest.fail("classify_log raised an exception β€” should have returned Unclassified")