NOT-OMEGA commited on
Commit
238ab41
·
verified ·
1 Parent(s): 0a86214

Delete HF

Browse files
HF/app_gradio.py DELETED
@@ -1,187 +0,0 @@
1
- """
2
- Log Classification System — HuggingFace Spaces
3
- Gradio UI for the 3-tier hybrid log classification pipeline.
4
- """
5
- from __future__ import annotations
6
- import io
7
- import time
8
- import pandas as pd
9
- import gradio as gr
10
- from classify import classify_log, classify_csv
11
-
12
- # ── Source options ──────────────────────────────────────────────────────────
13
- SOURCES = [
14
- "ModernCRM",
15
- "ModernHR",
16
- "BillingSystem",
17
- "AnalyticsEngine",
18
- "ThirdPartyAPI",
19
- "LegacyCRM",
20
- ]
21
-
22
- TIER_COLORS = {
23
- "Regex": "🟢",
24
- "BERT": "🔵",
25
- "LLM": "🟡",
26
- "LLM (fallback)": "🟠",
27
- }
28
-
29
- EXAMPLE_LOGS = [
30
- ["ModernCRM", "User User12345 logged in."],
31
- ["ModernHR", "Multiple login failures occurred on user 6454 account"],
32
- ["BillingSystem", "GET /v2/servers/detail HTTP/1.1 status: 200 len: 1583 time: 0.19"],
33
- ["AnalyticsEngine", "System crashed due to disk I/O failure on node-3"],
34
- ["LegacyCRM", "Case escalation for ticket ID 7324 failed — support agent is no longer active."],
35
- ["LegacyCRM", "The 'BulkEmailSender' feature will be deprecated in v5.0. Use 'EmailCampaignManager'."],
36
- ]
37
-
38
-
39
- # ── Single log tab ──────────────────────────────────────────────────────────
40
- def classify_single(source: str, log_message: str):
41
- if not log_message.strip():
42
- return "—", "—", "—", "—"
43
-
44
- t0 = time.perf_counter()
45
- result = classify_log(source, log_message)
46
- latency_ms = (time.perf_counter() - t0) * 1000
47
-
48
- label = result["label"]
49
- tier = result["tier"]
50
- confidence = f"{result['confidence']:.1%}" if result["confidence"] is not None else "N/A"
51
- icon = TIER_COLORS.get(tier, "⚪")
52
-
53
- return (
54
- label,
55
- f"{icon} {tier}",
56
- confidence,
57
- f"{latency_ms:.1f} ms",
58
- )
59
-
60
-
61
- # ── Batch CSV tab ───────────────────────────────────────────────────────────
62
- def classify_batch(file):
63
- if file is None:
64
- return None, "⚠️ Please upload a CSV file."
65
-
66
- try:
67
- output_path, df = classify_csv(file.name, "/tmp/classified_output.csv")
68
- except ValueError as e:
69
- return None, f"⚠️ {e}"
70
- except Exception as e:
71
- return None, f"❌ Error: {e}"
72
-
73
- total = len(df)
74
- tier_counts = df["tier_used"].value_counts().to_dict()
75
- label_counts = df["predicted_label"].value_counts().to_dict()
76
-
77
- tier_lines = "\n".join(f" {TIER_COLORS.get(k,'⚪')} {k}: {v} ({v/total:.0%})" for k, v in tier_counts.items())
78
- label_lines = "\n".join(f" • {k}: {v}" for k, v in label_counts.items())
79
-
80
- stats = (
81
- f"✅ Classified {total} logs\n\n"
82
- f"📊 Tier breakdown:\n{tier_lines}\n\n"
83
- f"🏷️ Label distribution:\n{label_lines}"
84
- )
85
-
86
- return output_path, stats
87
-
88
-
89
- # ── UI ──────────────────────────────────────────────────────────────────────
90
- with gr.Blocks(title="Log Classification System", theme=gr.themes.Soft()) as demo:
91
-
92
- gr.Markdown("""
93
- # 🔍 Log Classification System
94
- **3-tier hybrid pipeline** → 🟢 Regex · 🔵 BERT + LogReg · 🟡 LLM
95
- Built to mimic production enterprise log monitoring architecture.
96
- """)
97
-
98
- with gr.Tabs():
99
-
100
- # ── Tab 1: Single Log ────────────────────────────────────────────
101
- with gr.Tab("Single Log"):
102
- with gr.Row():
103
- source_input = gr.Dropdown(
104
- choices=SOURCES,
105
- value="ModernCRM",
106
- label="Source System",
107
- )
108
- log_input = gr.Textbox(
109
- label="Log Message",
110
- placeholder="Paste a log message here...",
111
- lines=3,
112
- )
113
-
114
- classify_btn = gr.Button("Classify", variant="primary")
115
-
116
- with gr.Row():
117
- label_out = gr.Textbox(label="🏷️ Predicted Label", interactive=False)
118
- tier_out = gr.Textbox(label="⚙️ Tier Used", interactive=False)
119
- confidence_out = gr.Textbox(label="📈 Confidence", interactive=False)
120
- latency_out = gr.Textbox(label="⏱️ Latency", interactive=False)
121
-
122
- classify_btn.click(
123
- fn=classify_single,
124
- inputs=[source_input, log_input],
125
- outputs=[label_out, tier_out, confidence_out, latency_out],
126
- )
127
-
128
- gr.Examples(
129
- examples=EXAMPLE_LOGS,
130
- inputs=[source_input, log_input],
131
- label="📋 Example Logs (click to try)",
132
- )
133
-
134
- # ── Tab 2: Batch CSV ─────────────────────────────────────────────
135
- with gr.Tab("Batch CSV Upload"):
136
- gr.Markdown("""
137
- Upload a CSV with columns: **`source`**, **`log_message`**
138
- Download the classified CSV with added columns: `predicted_label`, `tier_used`, `confidence`.
139
- """)
140
- with gr.Row():
141
- with gr.Column():
142
- csv_input = gr.File(label="📂 Upload CSV", file_types=[".csv"])
143
- batch_btn = gr.Button("Classify All", variant="primary")
144
- with gr.Column():
145
- csv_output = gr.File(label="📥 Download Classified CSV")
146
- stats_out = gr.Textbox(label="📊 Stats", lines=12, interactive=False)
147
-
148
- batch_btn.click(
149
- fn=classify_batch,
150
- inputs=[csv_input],
151
- outputs=[csv_output, stats_out],
152
- )
153
-
154
- gr.Markdown("""
155
- **Sample CSV format:**
156
- ```
157
- source,log_message
158
- ModernCRM,User User123 logged in.
159
- LegacyCRM,Case escalation for ticket ID 7324 failed.
160
- BillingSystem,GET /api/v2/invoice HTTP/1.1 status: 500
161
- ```
162
- """)
163
-
164
- # ── Tab 3: Architecture ──────────────────────────────────────────
165
- with gr.Tab("Architecture"):
166
- gr.Markdown("""
167
- ## 🏗️ 3-Tier Hybrid Pipeline
168
-
169
- | Tier | Method | Coverage | Latency | When Used |
170
- |------|--------|----------|---------|-----------|
171
- | 🟢 Regex | Python `re` patterns | ~21% | < 1ms | Fixed patterns (login, backup, etc.) |
172
- | 🔵 BERT | `all-MiniLM-L6-v2` + LogReg | ~79% | 20–80ms | High-volume categories with 150+ samples |
173
- | 🟡 LLM | HuggingFace Inference API | ~0.3% | 500–2000ms | LegacyCRM logs, rare patterns |
174
-
175
- ## 📊 Model Performance (from training)
176
- - **BERT + LogReg** trained on 2,410 synthetic enterprise logs
177
- - **Confidence threshold**: 0.5 (below → escalate to LLM)
178
- - **Source-aware routing**: `LegacyCRM` bypasses ML entirely (only 7 training samples)
179
-
180
- ## 🔑 Environment Variables
181
- | Secret | Required For |
182
- |--------|-------------|
183
- | `HF_TOKEN` | LLM inference (LegacyCRM logs) |
184
- """)
185
-
186
- if __name__ == "__main__":
187
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
HF/benchmark.py DELETED
@@ -1,214 +0,0 @@
1
- """
2
- benchmark.py — Full Benchmark Harness
3
-
4
- Outputs a CSV with columns:
5
- batch_size, mode, throughput_logs_sec, p50_ms, p95_ms, p99_ms, cpu_pct, ram_mb, tier_regex_pct, tier_bert_pct, tier_llm_pct
6
-
7
- Usage:
8
- python benchmark.py --logs 5000 --output benchmark_results.csv
9
-
10
- What it measures:
11
- - Batch size sweep: 1, 8, 16, 32, 64, 128
12
- - Throughput (logs/sec)
13
- - Latency: p50 / p95 / p99 (per-log)
14
- - CPU and RAM during inference
15
- - Tier distribution (Regex % / BERT % / LLM %)
16
-
17
- Google interview talking point:
18
- "I designed a benchmark harness that sweeps batch sizes and measures
19
- latency percentiles + resource utilization, so I can show the
20
- throughput-latency tradeoff curve empirically."
21
- """
22
- from __future__ import annotations
23
- import argparse
24
- import csv
25
- import os
26
- import random
27
- import sys
28
- import time
29
- import statistics
30
- from pathlib import Path
31
-
32
- import psutil
33
-
34
- # ── Synthetic log generator (no external deps needed) ────────────────────────
35
- SOURCES = ["ModernCRM", "ModernHR", "BillingSystem", "AnalyticsEngine", "ThirdPartyAPI"]
36
-
37
- _LOG_TEMPLATES = [
38
- ("ModernCRM", "User User{id} logged in."),
39
- ("ModernCRM", "IP {ip} blocked due to potential attack"),
40
- ("ModernHR", "Multiple login failures occurred on user {id} account"),
41
- ("ModernHR", "Admin access escalation detected for user {id}"),
42
- ("BillingSystem", "GET /api/v2/invoices HTTP/1.1 status: {code} len: {len} time: {t}"),
43
- ("BillingSystem", "POST /api/v1/payments HTTP/1.1 status: {code} len: {len} time: {t}"),
44
- ("AnalyticsEngine", "System crashed due to disk I/O failure on node-{n}"),
45
- ("AnalyticsEngine", "Backup completed successfully."),
46
- ("ThirdPartyAPI", "Service payments-api is unreachable after 3 retries"),
47
- ("ThirdPartyAPI", "CPU usage at {pct}% for the last 10 minutes on node-{n}"),
48
- ("AnalyticsEngine", "CRITICAL: data corruption detected on shard-{n}"),
49
- ("ModernCRM", "Health check passed for service {svc}"),
50
- ]
51
-
52
- def _rand_ip():
53
- return f"{random.randint(10,192)}.{random.randint(0,255)}.{random.randint(0,255)}.{random.randint(1,254)}"
54
-
55
- def _fill(template: str) -> str:
56
- return (template
57
- .replace("{id}", str(random.randint(100, 99999)))
58
- .replace("{ip}", _rand_ip())
59
- .replace("{code}", random.choice(["200", "201", "400", "404", "500", "503"]))
60
- .replace("{len}", str(random.randint(100, 9999)))
61
- .replace("{t}", f"{random.uniform(0.01, 2.5):.2f}")
62
- .replace("{n}", str(random.randint(1, 20)))
63
- .replace("{pct}", str(random.randint(60, 99)))
64
- .replace("{svc}", random.choice(["auth-api", "billing", "analytics", "events"]))
65
- )
66
-
67
- def generate_logs(n: int) -> list[tuple[str, str]]:
68
- random.seed(42)
69
- return [
70
- (src, _fill(tmpl))
71
- for src, tmpl in random.choices(_LOG_TEMPLATES, k=n)
72
- ]
73
-
74
-
75
- # ── Benchmark runner ─────────────────────────────────────────────────────────
76
- def run_benchmark(
77
- logs: list[tuple[str, str]],
78
- batch_sizes: list[int],
79
- output_csv: str,
80
- warmup_n: int = 50,
81
- ) -> list[dict]:
82
- from classify import classify_logs, pipeline_summary
83
-
84
- proc = psutil.Process(os.getpid())
85
- rows: list[dict] = []
86
-
87
- # Warmup (model load, JIT, etc.)
88
- print(f"🔥 Warming up with {warmup_n} logs…")
89
- classify_logs(logs[:warmup_n])
90
-
91
- for bs in batch_sizes:
92
- # Slice logs into batches of size `bs`
93
- batches = [logs[i:i + bs] for i in range(0, len(logs), bs)]
94
- if not batches:
95
- continue
96
-
97
- per_log_latencies: list[float] = []
98
- cpu_samples: list[float] = []
99
- ram_samples: list[float] = []
100
- all_results: list[dict] = []
101
-
102
- print(f"\n📐 Batch size = {bs} ({len(batches)} batches × {bs} logs)…")
103
-
104
- wall_start = time.perf_counter()
105
-
106
- for batch in batches:
107
- t0 = time.perf_counter()
108
- results = classify_logs(batch)
109
- t1 = time.perf_counter()
110
- batch_ms = (t1 - t0) * 1000
111
- per_log_ms = batch_ms / len(batch)
112
-
113
- per_log_latencies.extend([per_log_ms] * len(batch))
114
- all_results.extend(results)
115
-
116
- # Resource snapshot
117
- cpu_samples.append(proc.cpu_percent(interval=None))
118
- ram_samples.append(proc.memory_info().rss / 1_048_576) # MB
119
-
120
- wall_elapsed = time.perf_counter() - wall_start
121
- total_logs = len(logs)
122
- throughput = round(total_logs / wall_elapsed, 1)
123
-
124
- per_log_latencies.sort()
125
- n = len(per_log_latencies)
126
-
127
- summary = pipeline_summary(all_results)
128
- tier_stats = summary["tier_stats"]
129
-
130
- def tier_pct(name):
131
- return tier_stats.get(name, {}).get("pct", 0.0)
132
-
133
- row = {
134
- "batch_size": bs,
135
- "total_logs": total_logs,
136
- "elapsed_sec": round(wall_elapsed, 2),
137
- "throughput_logs_sec": throughput,
138
- "p50_ms": round(statistics.median(per_log_latencies), 3),
139
- "p95_ms": round(per_log_latencies[min(int(n * 0.95), n - 1)], 3),
140
- "p99_ms": round(per_log_latencies[min(int(n * 0.99), n - 1)], 3),
141
- "mean_ms": round(statistics.mean(per_log_latencies), 3),
142
- "cpu_mean_pct": round(statistics.mean(cpu_samples), 1) if cpu_samples else 0,
143
- "cpu_max_pct": round(max(cpu_samples), 1) if cpu_samples else 0,
144
- "ram_mean_mb": round(statistics.mean(ram_samples), 1) if ram_samples else 0,
145
- "ram_max_mb": round(max(ram_samples), 1) if ram_samples else 0,
146
- "tier_regex_pct": tier_pct("Regex"),
147
- "tier_bert_pct": tier_pct("BERT"),
148
- "tier_llm_pct": tier_pct("LLM") + tier_pct("LLM (fallback)"),
149
- }
150
- rows.append(row)
151
-
152
- print(f" ✅ Throughput: {throughput} logs/sec | "
153
- f"p50={row['p50_ms']}ms p95={row['p95_ms']}ms p99={row['p99_ms']}ms | "
154
- f"CPU={row['cpu_mean_pct']}% RAM={row['ram_mean_mb']}MB")
155
- print(f" 📊 Tiers: Regex={row['tier_regex_pct']}% "
156
- f"BERT={row['tier_bert_pct']}% "
157
- f"LLM={row['tier_llm_pct']}%")
158
-
159
- # Write CSV
160
- Path(output_csv).parent.mkdir(parents=True, exist_ok=True)
161
- with open(output_csv, "w", newline="") as f:
162
- writer = csv.DictWriter(f, fieldnames=rows[0].keys())
163
- writer.writeheader()
164
- writer.writerows(rows)
165
-
166
- print(f"\n✅ Benchmark results saved → {output_csv}")
167
- return rows
168
-
169
-
170
- # ── Scaling stress test ──────────────────────────────────────────────────────
171
- def stress_test(sizes: list[int] = [5_000, 20_000, 50_000, 100_000]) -> None:
172
- """Quick throughput check at different total log counts."""
173
- from classify import classify_logs
174
-
175
- print("\n🔥 Stress Test — Scaling")
176
- print(f"{'N logs':>10} {'Elapsed(s)':>12} {'Throughput':>12} {'p95_ms':>10}")
177
- print("─" * 50)
178
-
179
- for n in sizes:
180
- logs = generate_logs(n)
181
- t0 = time.perf_counter()
182
- classify_logs(logs)
183
- elapsed = time.perf_counter() - t0
184
- tput = n / elapsed
185
- # Rough p95 approximation: time / n * correction factor
186
- p95_approx = (elapsed / n * 1000) * 1.5
187
- print(f"{n:>10,} {elapsed:>12.2f}s {tput:>12.1f}/s {p95_approx:>10.1f}ms")
188
-
189
-
190
- # ── CLI ──────────────────────────────────────────────────────────────────────
191
- def main():
192
- parser = argparse.ArgumentParser(description="Log pipeline benchmark harness")
193
- parser.add_argument("--logs", type=int, default=5_000,
194
- help="Number of logs to benchmark (default: 5000)")
195
- parser.add_argument("--output", default="benchmark_results.csv",
196
- help="Output CSV path")
197
- parser.add_argument("--stress", action="store_true",
198
- help="Run scaling stress test (5k, 20k, 50k, 100k)")
199
- parser.add_argument("--batches", default="1,8,16,32,64,128",
200
- help="Comma-separated batch sizes to sweep")
201
- args = parser.parse_args()
202
-
203
- batch_sizes = [int(x) for x in args.batches.split(",")]
204
- logs = generate_logs(args.logs)
205
- print(f"📦 Generated {len(logs):,} synthetic logs")
206
-
207
- run_benchmark(logs, batch_sizes, args.output)
208
-
209
- if args.stress:
210
- stress_test()
211
-
212
-
213
- if __name__ == "__main__":
214
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
HF/error_analysis.py DELETED
@@ -1,250 +0,0 @@
1
- """
2
- error_analysis.py — Deep Dive into Unclassified / Misclassified Logs
3
-
4
- This script addresses the 76 unclassified logs from the 20k run.
5
- It answers:
6
- 1. What do these logs look like? (print + group)
7
- 2. Why did the model fail? (pattern analysis)
8
- 3. What should we do? (actionable fix suggestions)
9
-
10
- Google interview talking point:
11
- "I performed structured error analysis on my model's failure cases.
12
- I grouped them by failure type — vocabulary mismatch, ambiguous intent,
13
- formatting noise — and used that to drive targeted improvements."
14
-
15
- Usage:
16
- python error_analysis.py --input output.csv # post-classify CSV
17
- python error_analysis.py --simulate # demo with synthetic data
18
- """
19
- from __future__ import annotations
20
- import argparse
21
- import re
22
- import sys
23
- from collections import Counter, defaultdict
24
- from typing import Optional
25
- import pandas as pd
26
-
27
-
28
- # ── Failure mode taxonomy ────────────────────────────────────────────────────
29
- class FailureMode:
30
- RARE_VOCAB = "rare_vocabulary" # domain-specific terms not in training
31
- AMBIGUOUS = "ambiguous_intent" # log could match multiple categories
32
- LEGACY_FORMAT = "legacy_format" # non-standard / old-school formatting
33
- TRUNCATED = "truncated_or_noisy" # partial / malformed log line
34
- NUMERIC_ONLY = "mostly_numeric" # ID/code-heavy, no semantic signal
35
- MULTI_EVENT = "multi_event" # one line, multiple events
36
- UNKNOWN = "unknown"
37
-
38
-
39
- def _detect_failure_mode(log: str) -> str:
40
- """Heuristic: guess WHY this log was unclassified."""
41
- log_l = log.lower()
42
-
43
- if len(log) < 20:
44
- return FailureMode.TRUNCATED
45
-
46
- # Check ratio of digits to total chars
47
- digit_ratio = sum(c.isdigit() for c in log) / max(len(log), 1)
48
- if digit_ratio > 0.40:
49
- return FailureMode.NUMERIC_ONLY
50
-
51
- # Looks like it has 2+ events joined
52
- if log.count(";") >= 2 or log.count(" AND ") >= 1 or log.count(" | ") >= 2:
53
- return FailureMode.MULTI_EVENT
54
-
55
- # Legacy / unusual format signals
56
- legacy_signals = ["ticket", "escalation", "crm", "deprecated", "retire",
57
- "module will be", "workflow", "assigned agent"]
58
- if any(s in log_l for s in legacy_signals):
59
- return FailureMode.LEGACY_FORMAT
60
-
61
- # Ambiguity signals — could be error OR security
62
- ambiguous_signals = ["failed", "error", "unauthorized", "denied", "blocked"]
63
- if sum(1 for s in ambiguous_signals if s in log_l) >= 2:
64
- return FailureMode.AMBIGUOUS
65
-
66
- # Rare vocabulary
67
- rare_signals = ["sla", "oncall", "runbook", "pagerduty", "janitor", "gc ", "eviction"]
68
- if any(s in log_l for s in rare_signals):
69
- return FailureMode.RARE_VOCAB
70
-
71
- return FailureMode.UNKNOWN
72
-
73
-
74
- def _suggest_fix(mode: str) -> str:
75
- fixes = {
76
- FailureMode.RARE_VOCAB: "Add 5–10 training examples covering this vocabulary; or add regex rule.",
77
- FailureMode.AMBIGUOUS: "Use multi-label or add a dedicated 'Ambiguous' class; review confidence threshold.",
78
- FailureMode.LEGACY_FORMAT: "Route all legacy-format logs to LLM tier; add few-shot examples for LLM prompt.",
79
- FailureMode.TRUNCATED: "Add input validation: reject/flag logs under 15 chars before classification.",
80
- FailureMode.NUMERIC_ONLY: "Add regex patterns for structured numeric formats (job IDs, error codes, etc.).",
81
- FailureMode.MULTI_EVENT: "Pre-process: split multi-event lines on ';' or ' | ' before classifying.",
82
- FailureMode.UNKNOWN: "Manually review and add to training data or LLM few-shot examples.",
83
- }
84
- return fixes.get(mode, "Manual review required.")
85
-
86
-
87
- # ── Core analysis ────────────────────────────────────────────────────────────
88
- def analyze_unclassified(df: pd.DataFrame, label_col: str = "predicted_label") -> None:
89
- """Full error analysis on a classified CSV DataFrame."""
90
-
91
- unclassified = df[df[label_col] == "Unclassified"].copy()
92
- total_unclassified = len(unclassified)
93
-
94
- if total_unclassified == 0:
95
- print("✅ No unclassified logs found!")
96
- return
97
-
98
- print(f"\n{'='*70}")
99
- print(f"🔍 ERROR ANALYSIS: {total_unclassified} Unclassified Logs")
100
- print(f"{'='*70}\n")
101
-
102
- # ── Step 1: Print all unclassified logs ─────────────────────────────────
103
- log_col = "log_message" if "log_message" in df.columns else df.columns[-1]
104
- print(f"{'#':>4} {'Log Message'}")
105
- print("─" * 80)
106
- for i, (_, row) in enumerate(unclassified.iterrows(), 1):
107
- log = str(row.get(log_col, ""))
108
- print(f"{i:>4}. {log[:120]}")
109
-
110
- # ── Step 2: Group by failure mode ───────────────────────────────────────
111
- print(f"\n{'='*70}")
112
- print("📂 GROUPING BY FAILURE MODE")
113
- print("─" * 70)
114
-
115
- groups: dict[str, list[str]] = defaultdict(list)
116
- for _, row in unclassified.iterrows():
117
- log = str(row.get(log_col, ""))
118
- mode = _detect_failure_mode(log)
119
- groups[mode].append(log)
120
-
121
- for mode, logs in sorted(groups.items(), key=lambda x: -len(x[1])):
122
- pct = len(logs) / total_unclassified * 100
123
- print(f"\n🔹 {mode} — {len(logs)} logs ({pct:.1f}%)")
124
- print(f" 💡 Fix: {_suggest_fix(mode)}")
125
- print(f" Examples:")
126
- for log in logs[:3]:
127
- print(f" • {log[:110]}")
128
-
129
- # ── Step 3: Token frequency analysis ────────────────────────────────────
130
- print(f"\n{'='*70}")
131
- print("📊 COMMON TOKENS IN UNCLASSIFIED LOGS")
132
- print("─" * 70)
133
-
134
- STOPWORDS = {"the", "a", "an", "is", "in", "on", "for", "to", "of",
135
- "and", "or", "by", "at", "with", "has", "was", "be",
136
- "this", "that", "it", "not", "are", "from", "as"}
137
-
138
- all_tokens: list[str] = []
139
- for _, row in unclassified.iterrows():
140
- log = str(row.get(log_col, "")).lower()
141
- tokens = re.findall(r"[a-z]{3,}", log)
142
- all_tokens.extend(t for t in tokens if t not in STOPWORDS)
143
-
144
- counter = Counter(all_tokens)
145
- print("Top 20 tokens in unclassified logs:")
146
- for token, count in counter.most_common(20):
147
- bar = "█" * min(count, 40)
148
- print(f" {token:<20} {count:>4} {bar}")
149
-
150
- # ── Step 4: Length distribution ─────────────────────────────────────────
151
- lengths = unclassified[log_col].apply(lambda x: len(str(x)))
152
- print(f"\n{'='*70}")
153
- print("📏 LOG LENGTH DISTRIBUTION (Unclassified)")
154
- print(f" Mean: {lengths.mean():.1f} chars")
155
- print(f" Median: {lengths.median():.1f} chars")
156
- print(f" Min: {lengths.min()} chars")
157
- print(f" Max: {lengths.max()} chars")
158
-
159
- short = (lengths < 30).sum()
160
- if short:
161
- print(f" ⚠️ {short} logs under 30 chars — likely truncated/noisy")
162
-
163
- # ── Step 5: Source breakdown ─────────────────────────────────────────────
164
- if "source" in df.columns:
165
- print(f"\n{'='*70}")
166
- print("🏷️ UNCLASSIFIED BY SOURCE")
167
- src_counts = unclassified["source"].value_counts()
168
- for src, cnt in src_counts.items():
169
- bar = "█" * min(cnt, 40)
170
- print(f" {src:<22} {cnt:>4} {bar}")
171
-
172
- # ── Step 6: Actionable summary ───────────────────────────────────────────
173
- print(f"\n{'='*70}")
174
- print("✅ ACTIONABLE FIXES (Priority Order)")
175
- print("─" * 70)
176
- dominant_mode = max(groups.items(), key=lambda x: len(x[1]))[0] if groups else FailureMode.UNKNOWN
177
- fixes = [
178
- (1, "regex", "Add patterns for top unclassified tokens to processor_regex.py"),
179
- (2, "training", "Add 10–20 examples per failure mode to training data"),
180
- (3, "llm", "For LEGACY_FORMAT failures: add to LLM few-shot examples"),
181
- (4, "preproc", "Pre-process: split multi-event logs, reject truncated logs"),
182
- (5, "threshold","Tune BERT confidence threshold (currently 0.30 — try 0.40)"),
183
- ]
184
- for priority, area, fix in fixes:
185
- print(f" {priority}. [{area.upper():^10}] {fix}")
186
-
187
- print(f"\n📌 Dominant failure mode: '{dominant_mode}' ({len(groups.get(dominant_mode,[]))} logs)")
188
- print(f" Start here: {_suggest_fix(dominant_mode)}\n")
189
-
190
-
191
- # ── Simulate 76 unclassified logs for demo ────────────────────────────────────
192
- def _simulate_unclassified() -> pd.DataFrame:
193
- """Generate synthetic 'unclassified' logs that mimic real failure patterns."""
194
- logs = [
195
- # Legacy format / CRM
196
- "Case escalation for ticket ID 9021 failed: agent inactive.",
197
- "CRM module 'ReportGenerator' will be retired in v4.1.",
198
- "Workflow for approval chain #4421 stalled at step 3.",
199
- "SLA breach detected for case ID 7701 (P1, 4h breach).",
200
- # Ambiguous
201
- "Service auth-api failed and unauthorized access was logged.",
202
- "Error: blocked request from 10.0.0.5 — reason unknown.",
203
- # Truncated / noisy
204
- "ERR",
205
- "srv timeout",
206
- "node-7",
207
- # Numeric-heavy
208
- "8821 9001 443 0 0 DROP IN=eth0 OUT= MAC=",
209
- "16 0 0 1 2024-01-14 03:21:00.001",
210
- # Multi-event
211
- "Backup started; disk usage at 92%; health check failed | node-3",
212
- # Rare vocab
213
- "PagerDuty alert triggered for on-call rotation P1-incident.",
214
- "GC eviction: 3.2GB heap compacted in 420ms.",
215
- "Janitor job completed: 14,000 stale tokens purged.",
216
- "Runbook auto-remediation triggered for alert ALT-9021.",
217
- ]
218
- # Pad to ~76
219
- padded = (logs * 5)[:76]
220
- return pd.DataFrame({
221
- "source": ["ModernCRM"] * 30 + ["LegacyCRM"] * 20 + ["AnalyticsEngine"] * 26,
222
- "log_message": padded,
223
- "predicted_label": ["Unclassified"] * 76,
224
- })
225
-
226
-
227
- # ── CLI ──────────────────────────────────────────────────────────────────────
228
- def main():
229
- parser = argparse.ArgumentParser(description="Analyze unclassified/misclassified logs")
230
- parser.add_argument("--input", help="Path to classified CSV from classify_csv()")
231
- parser.add_argument("--simulate", action="store_true",
232
- help="Run with synthetic unclassified logs (no CSV needed)")
233
- parser.add_argument("--label-col", default="predicted_label",
234
- help="Column name that holds the predicted label")
235
- args = parser.parse_args()
236
-
237
- if args.simulate:
238
- df = _simulate_unclassified()
239
- print("🎭 Running with SIMULATED 76 unclassified logs…")
240
- elif args.input:
241
- df = pd.read_csv(args.input)
242
- else:
243
- parser.print_help()
244
- sys.exit(1)
245
-
246
- analyze_unclassified(df, label_col=args.label_col)
247
-
248
-
249
- if __name__ == "__main__":
250
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
HF/models/log_classifier.joblib DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bfe9c71b71412797de0d426be2255566dbf6cf87b3f2ae5d2cd1fd69a98d18d
3
- size 23997
 
 
 
 
HF/onnx_model/config.json DELETED
@@ -1,24 +0,0 @@
1
- {
2
- "architectures": [
3
- "BertModel"
4
- ],
5
- "attention_probs_dropout_prob": 0.1,
6
- "classifier_dropout": null,
7
- "gradient_checkpointing": false,
8
- "hidden_act": "gelu",
9
- "hidden_dropout_prob": 0.1,
10
- "hidden_size": 384,
11
- "initializer_range": 0.02,
12
- "intermediate_size": 1536,
13
- "layer_norm_eps": 1e-12,
14
- "max_position_embeddings": 512,
15
- "model_type": "bert",
16
- "num_attention_heads": 12,
17
- "num_hidden_layers": 6,
18
- "pad_token_id": 0,
19
- "position_embedding_type": "absolute",
20
- "transformers_version": "4.57.6",
21
- "type_vocab_size": 2,
22
- "use_cache": true,
23
- "vocab_size": 30522
24
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
HF/onnx_model/special_tokens_map.json DELETED
@@ -1,37 +0,0 @@
1
- {
2
- "cls_token": {
3
- "content": "[CLS]",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "mask_token": {
10
- "content": "[MASK]",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "pad_token": {
17
- "content": "[PAD]",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
- "sep_token": {
24
- "content": "[SEP]",
25
- "lstrip": false,
26
- "normalized": false,
27
- "rstrip": false,
28
- "single_word": false
29
- },
30
- "unk_token": {
31
- "content": "[UNK]",
32
- "lstrip": false,
33
- "normalized": false,
34
- "rstrip": false,
35
- "single_word": false
36
- }
37
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
HF/onnx_model/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
HF/onnx_model/tokenizer_config.json DELETED
@@ -1,65 +0,0 @@
1
- {
2
- "added_tokens_decoder": {
3
- "0": {
4
- "content": "[PAD]",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false,
9
- "special": true
10
- },
11
- "100": {
12
- "content": "[UNK]",
13
- "lstrip": false,
14
- "normalized": false,
15
- "rstrip": false,
16
- "single_word": false,
17
- "special": true
18
- },
19
- "101": {
20
- "content": "[CLS]",
21
- "lstrip": false,
22
- "normalized": false,
23
- "rstrip": false,
24
- "single_word": false,
25
- "special": true
26
- },
27
- "102": {
28
- "content": "[SEP]",
29
- "lstrip": false,
30
- "normalized": false,
31
- "rstrip": false,
32
- "single_word": false,
33
- "special": true
34
- },
35
- "103": {
36
- "content": "[MASK]",
37
- "lstrip": false,
38
- "normalized": false,
39
- "rstrip": false,
40
- "single_word": false,
41
- "special": true
42
- }
43
- },
44
- "clean_up_tokenization_spaces": false,
45
- "cls_token": "[CLS]",
46
- "do_basic_tokenize": true,
47
- "do_lower_case": true,
48
- "extra_special_tokens": {},
49
- "mask_token": "[MASK]",
50
- "max_length": 128,
51
- "model_max_length": 512,
52
- "never_split": null,
53
- "pad_to_multiple_of": null,
54
- "pad_token": "[PAD]",
55
- "pad_token_type_id": 0,
56
- "padding_side": "right",
57
- "sep_token": "[SEP]",
58
- "stride": 0,
59
- "strip_accents": null,
60
- "tokenize_chinese_chars": true,
61
- "tokenizer_class": "BertTokenizer",
62
- "truncation_side": "right",
63
- "truncation_strategy": "longest_first",
64
- "unk_token": "[UNK]"
65
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
HF/onnx_model/vocab.txt DELETED
The diff for this file is too large to render. See raw diff
 
HF/processor_llm.py DELETED
@@ -1,192 +0,0 @@
1
- """
2
- processor_llm.py — Tier 3: LLM-based Classifier
3
-
4
- Used for:
5
- - LegacyCRM logs (Workflow Error, Deprecation Warning)
6
- - BERT fallback when confidence < threshold
7
-
8
- Production hardening in V3:
9
- - Timeout (configurable, default 5s)
10
- - Retry with exponential backoff (max 2 retries)
11
- - Explicit failure modes: returns "Unclassified" on all error paths
12
- - Caching for repeated log patterns (hash-based, in-memory)
13
- - Token budget enforcement (max_tokens=15)
14
- """
15
- from __future__ import annotations
16
- import os
17
- import re
18
- import time
19
- import hashlib
20
- import logging
21
- from functools import lru_cache
22
- from typing import Optional
23
-
24
- logger = logging.getLogger(__name__)
25
-
26
- # ── Config ─────────────────────────────────────────────────────────────────
27
- HF_TOKEN = os.getenv("HF_TOKEN")
28
- LLM_MODEL = "mistralai/Mistral-7B-Instruct-v0.3"
29
-
30
- VALID_CATEGORIES = ["Workflow Error", "Deprecation Warning"]
31
-
32
- # Retry / timeout config
33
- MAX_RETRIES = 2
34
- RETRY_DELAY_SEC = 1.0 # doubles on each retry (exponential backoff)
35
- REQUEST_TIMEOUT = 5 # seconds — fail fast, do not hang pipeline
36
-
37
- # In-memory cache to avoid redundant LLM calls for repeated logs
38
- _RESPONSE_CACHE: dict[str, str] = {}
39
- MAX_CACHE_SIZE = 1000 # evict oldest when full (simple FIFO)
40
-
41
- SYSTEM_PROMPT = (
42
- "You are an enterprise log classifier. "
43
- "Classify log messages into exactly one category. "
44
- "Return ONLY the category name — no explanation, no punctuation."
45
- )
46
-
47
- FEW_SHOT_EXAMPLES = [
48
- {
49
- "log": "Case escalation for ticket ID 7324 failed because the assigned support agent is no longer active.",
50
- "label": "Workflow Error",
51
- },
52
- {
53
- "log": "The 'BulkEmailSender' feature is no longer supported. Use 'EmailCampaignManager' instead.",
54
- "label": "Deprecation Warning",
55
- },
56
- {
57
- "log": "Invoice generation aborted for order ID 8910 due to invalid tax calculation module.",
58
- "label": "Workflow Error",
59
- },
60
- ]
61
-
62
-
63
- # ── Cache helpers ────────────────────────────────────────────────────────────
64
- def _cache_key(log_msg: str) -> str:
65
- return hashlib.md5(log_msg.strip().encode()).hexdigest()
66
-
67
-
68
- def _cache_get(log_msg: str) -> Optional[str]:
69
- return _RESPONSE_CACHE.get(_cache_key(log_msg))
70
-
71
-
72
- def _cache_set(log_msg: str, label: str) -> None:
73
- key = _cache_key(log_msg)
74
- if len(_RESPONSE_CACHE) >= MAX_CACHE_SIZE:
75
- # Evict oldest (first inserted) key
76
- oldest = next(iter(_RESPONSE_CACHE))
77
- del _RESPONSE_CACHE[oldest]
78
- _RESPONSE_CACHE[key] = label
79
-
80
-
81
- def get_cache_stats() -> dict:
82
- return {"size": len(_RESPONSE_CACHE), "max_size": MAX_CACHE_SIZE}
83
-
84
-
85
- # ── Prompt builder ───────────────────────────────────────────────────────────
86
- def _build_messages(log_msg: str) -> list[dict]:
87
- categories_str = ", ".join(f'"{c}"' for c in VALID_CATEGORIES)
88
- user_content = (
89
- f'Classify the following log into one of these categories: {categories_str}.\n'
90
- 'If none fits, return "Unclassified".\n\n'
91
- )
92
- for ex in FEW_SHOT_EXAMPLES:
93
- user_content += f'Log: {ex["log"]}\nCategory: {ex["label"]}\n\n'
94
- user_content += f"Log: {log_msg}\nCategory:"
95
-
96
- return [
97
- {"role": "system", "content": SYSTEM_PROMPT},
98
- {"role": "user", "content": user_content},
99
- ]
100
-
101
-
102
- # ── Normalize raw LLM output ─────────────────────────────────────────────────
103
- def _normalize(raw: str) -> str:
104
- """Map raw LLM output to a valid category or 'Unclassified'."""
105
- raw = raw.strip().strip('"').strip("'")
106
- for cat in VALID_CATEGORIES:
107
- if cat.lower() in raw.lower():
108
- return cat
109
- return "Unclassified"
110
-
111
-
112
- # ── Main classify function ────────────────────────────────────────────────────
113
- def classify_with_llm(log_msg: str) -> str:
114
- """
115
- Tier 3 LLM classifier with:
116
- - In-memory cache (avoids duplicate API calls)
117
- - Timeout (REQUEST_TIMEOUT seconds)
118
- - Retry with exponential backoff (MAX_RETRIES attempts)
119
- - Explicit fallback to "Unclassified" on all error paths
120
-
121
- Latency: 500–2000ms on cache miss; ~0ms on cache hit.
122
- """
123
- # ── Cache hit ────────────────────────────────────────────────────────────
124
- cached = _cache_get(log_msg)
125
- if cached is not None:
126
- logger.debug(f"[LLM] Cache hit for: {log_msg[:60]}")
127
- return cached
128
-
129
- # ── Inference with retry ─────────────────────────────────────────────────
130
- if not HF_TOKEN:
131
- logger.warning("[LLM] HF_TOKEN not set — returning Unclassified")
132
- return "Unclassified"
133
-
134
- from huggingface_hub import InferenceClient
135
-
136
- client = InferenceClient(token=HF_TOKEN, timeout=REQUEST_TIMEOUT)
137
- delay = RETRY_DELAY_SEC
138
- last_err: Optional[Exception] = None
139
-
140
- for attempt in range(1, MAX_RETRIES + 2): # +2: initial + MAX_RETRIES
141
- try:
142
- response = client.chat.completions.create(
143
- model=LLM_MODEL,
144
- messages=_build_messages(log_msg),
145
- max_tokens=15,
146
- temperature=0.1,
147
- )
148
- raw = response.choices[0].message.content
149
- label = _normalize(raw)
150
-
151
- _cache_set(log_msg, label)
152
- logger.debug(f"[LLM] Attempt {attempt}: '{raw.strip()}' → '{label}'")
153
- return label
154
-
155
- except Exception as e:
156
- last_err = e
157
- if attempt <= MAX_RETRIES:
158
- logger.warning(f"[LLM] Attempt {attempt} failed ({e}), retrying in {delay:.1f}s…")
159
- time.sleep(delay)
160
- delay *= 2 # exponential backoff
161
- else:
162
- logger.error(f"[LLM] All {MAX_RETRIES + 1} attempts failed. Last error: {e}")
163
-
164
- return "Unclassified"
165
-
166
-
167
- # ── Batch classify (serial — LLM is already rate-limited) ────────────────────
168
- def classify_batch_llm(log_msgs: list[str]) -> list[str]:
169
- """Classify multiple logs through LLM. Each call is sequential to respect rate limits."""
170
- return [classify_with_llm(msg) for msg in log_msgs]
171
-
172
-
173
- # ── CLI test ─────────────────────────────────────────────────────────────────
174
- if __name__ == "__main__":
175
- logging.basicConfig(level=logging.INFO)
176
-
177
- test_logs = [
178
- "Case escalation for ticket ID 7324 failed because the assigned support agent is no longer active.",
179
- "The 'ReportGenerator' module will be retired in version 4.0. Migrate to 'AdvancedAnalyticsSuite'.",
180
- "System reboot initiated by user 12345.", # should be Unclassified
181
- ]
182
- for log in test_logs:
183
- result = classify_with_llm(log)
184
- print(f"{result:25s} | {log[:80]}")
185
-
186
- # Cache hit test
187
- print("\n── Cache hit test ──")
188
- t0 = time.perf_counter()
189
- classify_with_llm(test_logs[0])
190
- t1 = time.perf_counter()
191
- print(f"Cache hit latency: {(t1-t0)*1000:.2f}ms")
192
- print(f"Cache stats: {get_cache_stats()}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
HF/processor_regex.py DELETED
@@ -1,220 +0,0 @@
1
- """
2
- processor_regex.py — Tier 1: Rule-based Classifier
3
-
4
- Target coverage: 40%+ (up from 15%)
5
- Latency: sub-millisecond per log
6
-
7
- New pattern groups added:
8
- - HTTP request/response logs (was completely missing!)
9
- - Auth / credential events (login failures, MFA, lockouts)
10
- - System/infra events (disk, CPU, memory, cron)
11
- - Network / firewall events (IP block, port scan)
12
- - Structured error codes (ERROR, CRITICAL prefix logs)
13
- """
14
- from __future__ import annotations
15
- import re
16
- import time
17
- from typing import Optional
18
-
19
- # ---------------------------------------------------------------------------
20
- # Pattern registry: (compiled_pattern, label)
21
- # Order matters — more specific patterns FIRST to avoid mis-labeling.
22
- # ---------------------------------------------------------------------------
23
- _RAW_PATTERNS: list[tuple[str, str]] = [
24
-
25
- # ── HTTP Status ─────────────────────────────────────────────────────────
26
- # Covers: GET/POST/PUT/DELETE/PATCH + status code in request line
27
- (r"\b(GET|POST|PUT|DELETE|PATCH|HEAD|OPTIONS)\s+\S+\s+HTTP/\d", "HTTP Status"),
28
- # Nova / OpenStack style
29
- (r"nova\.\S+\s+(GET|POST|PUT|DELETE)\s+\S+\s+HTTP/\d", "HTTP Status"),
30
- # Status code only style: "returned HTTP 200" or "status: 404"
31
- (r"\bstatus[:\s]+\d{3}\b", "HTTP Status"),
32
- (r"\breturned\s+HTTP\s+\d{3}\b", "HTTP Status"),
33
- (r"\bHTTP\s+status\s+code\s*[:-]?\s*\d{3}\b", "HTTP Status"),
34
- # API response style
35
- (r"\bAPI\s+(call|request)\s+\S+\s+completed\s+with\s+status\s+\d{3}", "HTTP Status"),
36
- (r"\bEndpoint\s+\S+\s+responded\s+with\s+code\s+\d{3}", "HTTP Status"),
37
-
38
- # ── Security Alert ──────────────────────────────────────────────────────
39
- # Brute force / login failures
40
- (r"(multiple\s+)?(bad\s+|failed?\s+)?login\s+(failure|attempt|failures)", "Security Alert"),
41
- (r"brute[\s_-]force\s+(login|attack|attempt)", "Security Alert"),
42
- # Unauthorized access
43
- (r"unauthorized\s+(access|admin|privilege|attempt)", "Security Alert"),
44
- (r"access\s+denied\s+(for|to)\s+(user|ip|host)", "Security Alert"),
45
- # Privilege escalation
46
- (r"(admin\s+)?access\s+escalation\s+detected", "Security Alert"),
47
- (r"privilege\s+(elev|escalat)", "Security Alert"),
48
- # IP blocking / suspicious traffic
49
- (r"IP\s+\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\s+blocked", "Security Alert"),
50
- (r"(suspicious|anomalous)\s+(login|traffic|activity|request)", "Security Alert"),
51
- (r"potential\s+(DDoS|attack|breach|intrusion)", "Security Alert"),
52
- (r"security\s+breach\s+suspected", "Security Alert"),
53
- (r"(API\s+security\s+breach|bypass\s+API\s+security)", "Security Alert"),
54
- (r"port\s+scan\s+(detected|attempt)", "Security Alert"),
55
-
56
- # ── User Action ─────────────────────────────────────────────────────────
57
- (r"User\s+\w+\d*\s+logged\s+(in|out)", "User Action"),
58
- (r"Account\s+(with\s+)?ID\s+\S+\s+created\s+by", "User Action"),
59
- (r"User\s+\w+\d*\s+(updated\s+profile|changed\s+password|enabled\s+two|downloaded|exported)", "User Action"),
60
- (r"(New\s+user|user\s+\w+\d*)\s+registered", "User Action"),
61
- (r"Account\s+\S+\s+deleted\s+by\s+(administrator|admin)", "User Action"),
62
- (r"User\s+\w+\d*\s+(tried|attempted)", "User Action"),
63
-
64
- # ── System Notification ─────────────────────────────────────────────────
65
- # Backup events
66
- (r"Backup\s+(started|ended|completed\s+successfully|failed|aborted)", "System Notification"),
67
- (r"System\s+updated\s+to\s+version", "System Notification"),
68
- (r"File\s+\S+\s+uploaded\s+successfully\s+by\s+user", "System Notification"),
69
- (r"Disk\s+cleanup\s+completed\s+successfully", "System Notification"),
70
- (r"System\s+reboot\s+initiated\s+by\s+user", "System Notification"),
71
- (r"Scheduled\s+maintenance\s+(started|completed)", "System Notification"),
72
- (r"Service\s+\w+\s+restarted\s+successfully", "System Notification"),
73
- # NEW: cache, cron, health check, cert, log rotation
74
- (r"Cache\s+cleared\s+successfully", "System Notification"),
75
- (r"Log\s+rotation\s+completed", "System Notification"),
76
- (r"Health\s+check\s+(passed|failed)\s+for\s+service", "System Notification"),
77
- (r"Certificate\s+(renewed|expired|revoked)\s+successfully", "System Notification"),
78
- (r"Cron\s+job\s+\S+\s+(executed|failed|completed)\s+successfully", "System Notification"),
79
- (r"(Disk|Storage)\s+(usage|space)\s+(at|reached|exceeded)\s+\d+%", "System Notification"),
80
- (r"CPU\s+usage\s+at\s+\d+%", "System Notification"),
81
- (r"Memory\s+(usage|limit)\s+(at|reached|exceeded)\s+\d+%", "System Notification"),
82
- # Deployment / config
83
- (r"Deployment\s+(of|for)\s+\S+\s+(completed|failed|started)", "System Notification"),
84
- (r"Configuration\s+(reloaded|updated|applied)\s+successfully", "System Notification"),
85
-
86
- # ── Error ───────────────────────────────────────────────────────────────
87
- (r"\bERROR\b.*\b(exception|failed|failure|crash|timeout|unavailable)\b", "Error"),
88
- (r"System\s+crashed\s+due\s+to", "Error"),
89
- (r"(connection|request|task|job)\s+(timed?\s*out|timeout)", "Error"),
90
- (r"service\s+\S+\s+(is\s+down|unavailable|unreachable)", "Error"),
91
- (r"database\s+connection\s+(failed|refused|lost|dropped)", "Error"),
92
- (r"disk\s+(I/O\s+)?failure", "Error"),
93
- (r"driver\s+error(s)?\s+(when|during|on)", "Error"),
94
- (r"(replication|sync)\s+task\s+(did\s+not\s+complete|failed)", "Error"),
95
- (r"null\s+pointer|segmentation\s+fault|stack\s+overflow", "Error"),
96
-
97
- # ── Critical Error ──────────────────────────────────────────────────────
98
- (r"\bCRITICAL\b", "Critical Error"),
99
- (r"(FATAL|PANIC)\b", "Critical Error"),
100
- (r"(data\s+loss|data\s+corruption)\s+(detected|occurred)", "Critical Error"),
101
- (r"(cluster|node|shard)\s+(failure|crashed|went\s+down)", "Critical Error"),
102
- (r"(catastrophic|unrecoverable)\s+(failure|error)", "Critical Error"),
103
- (r"kernel\s+panic", "Critical Error"),
104
- (r"out[\s-]of[\s-](memory|disk)\s+(error|killed|OOM)", "Critical Error"),
105
- ]
106
-
107
- # Pre-compile all patterns at import time (not per-call)
108
- REGEX_PATTERNS: list[tuple[re.Pattern, str]] = [
109
- (re.compile(pat, re.IGNORECASE), label)
110
- for pat, label in _RAW_PATTERNS
111
- ]
112
-
113
-
114
- def classify_with_regex(log_message: str) -> Optional[str]:
115
- """
116
- Tier 1: Rule-based classifier.
117
- Returns category label, or None if no pattern matches.
118
- Latency: sub-millisecond (patterns pre-compiled at import).
119
- """
120
- for pattern, label in REGEX_PATTERNS:
121
- if pattern.search(log_message):
122
- return label
123
- return None
124
-
125
-
126
- def get_regex_coverage(log_messages: list[str]) -> dict:
127
- """Measure regex tier coverage and per-label breakdown."""
128
- label_counts: dict[str, int] = {}
129
- missed = 0
130
-
131
- for msg in log_messages:
132
- label = classify_with_regex(msg)
133
- if label:
134
- label_counts[label] = label_counts.get(label, 0) + 1
135
- else:
136
- missed += 1
137
-
138
- total = len(log_messages)
139
- matched = total - missed
140
-
141
- return {
142
- "total": total,
143
- "matched": matched,
144
- "missed": missed,
145
- "coverage_pct": round(matched / total * 100, 2) if total else 0.0,
146
- "label_breakdown": label_counts,
147
- }
148
-
149
-
150
- def benchmark_regex(log_messages: list[str], runs: int = 3) -> dict:
151
- """Measure regex tier latency (p50 / p95 / p99) over multiple runs."""
152
- import statistics
153
- per_log_ms: list[float] = []
154
-
155
- for _ in range(runs):
156
- for msg in log_messages:
157
- t0 = time.perf_counter()
158
- classify_with_regex(msg)
159
- per_log_ms.append((time.perf_counter() - t0) * 1000)
160
-
161
- per_log_ms.sort()
162
- return {
163
- "p50_ms": round(statistics.median(per_log_ms), 4),
164
- "p95_ms": round(per_log_ms[int(len(per_log_ms) * 0.95)], 4),
165
- "p99_ms": round(per_log_ms[int(len(per_log_ms) * 0.99)], 4),
166
- "mean_ms": round(statistics.mean(per_log_ms), 4),
167
- }
168
-
169
-
170
- # ── CLI self-test ────────────────────────────────────────────────────────────
171
- if __name__ == "__main__":
172
- test_cases: list[tuple[str, str]] = [
173
- # HTTP
174
- ("GET /api/v2/resource HTTP/1.1 status: 200 len: 1583 time: 0.19", "HTTP Status"),
175
- ("POST /v1/users HTTP/1.1 status: 201 len: 42 time: 0.05", "HTTP Status"),
176
- ("nova.osapi_compute.wsgi.server GET /v2/servers/detail HTTP/1.1 status: 404", "HTTP Status"),
177
- # Security
178
- ("Multiple login failures occurred on user 6454 account", "Security Alert"),
179
- ("IP 192.168.133.114 blocked due to potential attack", "Security Alert"),
180
- ("Brute force login attempt from 10.0.0.5 detected", "Security Alert"),
181
- ("Admin access escalation detected for user 9429", "Security Alert"),
182
- # User Action
183
- ("User User12345 logged in.", "User Action"),
184
- ("Account with ID 456 created by Admin.", "User Action"),
185
- # System Notification
186
- ("Backup completed successfully.", "System Notification"),
187
- ("CPU usage at 98% for the last 10 minutes on node-7", "System Notification"),
188
- ("Health check passed for service payments-api", "System Notification"),
189
- # Error
190
- ("System crashed due to disk I/O failure on node-3", "Error"),
191
- ("Database connection failed after 3 retries", "Error"),
192
- # Critical
193
- ("CRITICAL: data corruption detected on shard-14", "Critical Error"),
194
- ("kernel panic: not syncing: VFS: unable to mount root fs", "Critical Error"),
195
- # Should be None (unmatched)
196
- ("The 'BulkEmailSender' feature will be deprecated in v5.0.", None),
197
- ("Case escalation for ticket 7324 failed.", None),
198
- ]
199
-
200
- correct = 0
201
- print(f"{'Expected':<22} {'Got':<22} {'✓/✗'} | Log")
202
- print("─" * 100)
203
- for log, expected in test_cases:
204
- got = classify_with_regex(log)
205
- ok = got == expected
206
- correct += ok
207
- icon = "✓" if ok else "✗"
208
- print(f"{str(expected):<22} {str(got):<22} {icon} | {log[:55]}")
209
-
210
- print(f"\n{correct}/{len(test_cases)} correct")
211
-
212
- # Coverage demo
213
- all_logs = [log for log, _ in test_cases]
214
- cov = get_regex_coverage(all_logs)
215
- print(f"\nCoverage: {cov['coverage_pct']}% ({cov['matched']}/{cov['total']} matched)")
216
- print("Label breakdown:", cov["label_breakdown"])
217
-
218
- # Latency benchmark
219
- lat = benchmark_regex(all_logs * 100)
220
- print(f"\nLatency (p50/p95/p99): {lat['p50_ms']}ms / {lat['p95_ms']}ms / {lat['p99_ms']}ms")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
HF/test/__init__.py DELETED
File without changes
HF/test/__pycache__/__init__.cpython-312.pyc DELETED
Binary file (139 Bytes)
 
HF/test/__pycache__/test_regex.cpython-312-pytest-9.0.3.pyc DELETED
Binary file (36.5 kB)
 
HF/test/test_llm.py DELETED
@@ -1,197 +0,0 @@
1
- """
2
- tests/test_llm.py — Tests for Tier 3: LLM Classifier
3
-
4
- Tests verify:
5
- 1. Cache hit avoids API call
6
- 2. Retry logic on transient failure
7
- 3. Returns "Unclassified" on all error paths (never crashes pipeline)
8
- 4. Response normalization handles edge cases
9
- 5. No HF_TOKEN → returns Unclassified gracefully
10
-
11
- Run:
12
- pytest tests/test_llm.py -v
13
- """
14
- import sys, os
15
- sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
16
-
17
- import pytest
18
- from unittest.mock import patch, MagicMock, call
19
- import processor_llm as llm_module
20
- from processor_llm import (
21
- classify_with_llm, get_cache_stats,
22
- _cache_key, _cache_get, _cache_set, _normalize,
23
- _RESPONSE_CACHE,
24
- )
25
-
26
-
27
- # ── Setup / teardown ──────────────────────────────────────────────────────────
28
- @pytest.fixture(autouse=True)
29
- def clear_cache():
30
- """Clear LLM cache before each test."""
31
- _RESPONSE_CACHE.clear()
32
- yield
33
- _RESPONSE_CACHE.clear()
34
-
35
-
36
- # ── Normalize ─────────────────────────────────────────────────────────────────
37
- class TestNormalize:
38
- def test_exact_match(self):
39
- assert _normalize("Workflow Error") == "Workflow Error"
40
-
41
- def test_case_insensitive(self):
42
- assert _normalize("workflow error") == "Workflow Error"
43
-
44
- def test_deprecation_warning(self):
45
- assert _normalize("Deprecation Warning") == "Deprecation Warning"
46
-
47
- def test_random_text_returns_unclassified(self):
48
- assert _normalize("I don't know") == "Unclassified"
49
-
50
- def test_empty_string_returns_unclassified(self):
51
- assert _normalize("") == "Unclassified"
52
-
53
- def test_partial_match(self):
54
- # Model might return "Category: Workflow Error" → still should match
55
- assert _normalize("Category: Workflow Error") == "Workflow Error"
56
-
57
- def test_strips_quotes(self):
58
- assert _normalize('"Deprecation Warning"') == "Deprecation Warning"
59
-
60
-
61
- # ── Cache ─────────────────────────────────────────────────────────────────────
62
- class TestCache:
63
- def test_cache_miss_returns_none(self):
64
- assert _cache_get("totally new log message xyz") is None
65
-
66
- def test_cache_set_and_get(self):
67
- log = "test log message for caching"
68
- _cache_set(log, "Workflow Error")
69
- assert _cache_get(log) == "Workflow Error"
70
-
71
- def test_cache_key_is_deterministic(self):
72
- log = "same log every time"
73
- assert _cache_key(log) == _cache_key(log)
74
-
75
- def test_different_logs_different_keys(self):
76
- k1 = _cache_key("log message A")
77
- k2 = _cache_key("log message B")
78
- assert k1 != k2
79
-
80
- def test_cache_hit_avoids_api_call(self):
81
- log = "Case escalation for ticket 7324 failed."
82
- _cache_set(log, "Workflow Error") # Pre-populate cache
83
-
84
- with patch("processor_llm.InferenceClient") as mock_client:
85
- result = classify_with_llm(log)
86
-
87
- mock_client.assert_not_called()
88
- assert result == "Workflow Error"
89
-
90
- def test_cache_stats_size(self):
91
- _cache_set("log1", "Workflow Error")
92
- _cache_set("log2", "Deprecation Warning")
93
- stats = get_cache_stats()
94
- assert stats["size"] == 2
95
-
96
-
97
- # ── No token ──────────────────────────────────────────────────────────────────
98
- class TestNoToken:
99
- def test_no_hf_token_returns_unclassified(self, monkeypatch):
100
- monkeypatch.setattr(llm_module, "HF_TOKEN", None)
101
- result = classify_with_llm("Case escalation for ticket 1234.")
102
- assert result == "Unclassified"
103
-
104
-
105
- # ── Retry logic ───────────────────────────────────────────────────────────────
106
- class TestRetry:
107
- def _make_mock_client(self, responses):
108
- """responses: list of (Exception | str) — raised or returned in order."""
109
- call_count = [0]
110
-
111
- def mock_create(**kwargs):
112
- idx = call_count[0]
113
- call_count[0] += 1
114
- if isinstance(responses[idx], Exception):
115
- raise responses[idx]
116
- mock_resp = MagicMock()
117
- mock_resp.choices[0].message.content = responses[idx]
118
- return mock_resp
119
-
120
- mock_client = MagicMock()
121
- mock_client.chat.completions.create.side_effect = mock_create
122
- return mock_client
123
-
124
- def test_success_on_first_try(self, monkeypatch):
125
- monkeypatch.setattr(llm_module, "HF_TOKEN", "fake-token")
126
- monkeypatch.setattr(llm_module, "RETRY_DELAY_SEC", 0) # no sleep
127
-
128
- client = self._make_mock_client(["Workflow Error"])
129
-
130
- with patch("processor_llm.InferenceClient", return_value=client):
131
- result = classify_with_llm("Case escalation for ticket 7324.")
132
-
133
- assert result == "Workflow Error"
134
- assert client.chat.completions.create.call_count == 1
135
-
136
- def test_retry_on_transient_failure(self, monkeypatch):
137
- monkeypatch.setattr(llm_module, "HF_TOKEN", "fake-token")
138
- monkeypatch.setattr(llm_module, "RETRY_DELAY_SEC", 0)
139
- monkeypatch.setattr(llm_module, "MAX_RETRIES", 2)
140
-
141
- # Fail once, succeed on second attempt
142
- client = self._make_mock_client([
143
- ConnectionError("timeout"),
144
- "Deprecation Warning",
145
- ])
146
-
147
- with patch("processor_llm.InferenceClient", return_value=client), \
148
- patch("processor_llm.time.sleep"): # skip actual sleep
149
- result = classify_with_llm("Module will be retired in v4.")
150
-
151
- assert result == "Deprecation Warning"
152
- assert client.chat.completions.create.call_count == 2
153
-
154
- def test_all_retries_exhausted_returns_unclassified(self, monkeypatch):
155
- monkeypatch.setattr(llm_module, "HF_TOKEN", "fake-token")
156
- monkeypatch.setattr(llm_module, "RETRY_DELAY_SEC", 0)
157
- monkeypatch.setattr(llm_module, "MAX_RETRIES", 2)
158
-
159
- client = self._make_mock_client([
160
- ConnectionError("timeout"),
161
- ConnectionError("timeout"),
162
- ConnectionError("timeout"),
163
- ])
164
-
165
- with patch("processor_llm.InferenceClient", return_value=client), \
166
- patch("processor_llm.time.sleep"):
167
- result = classify_with_llm("Something that keeps failing.")
168
-
169
- assert result == "Unclassified"
170
- assert client.chat.completions.create.call_count == 3 # 1 initial + 2 retries
171
-
172
- def test_successful_result_gets_cached(self, monkeypatch):
173
- monkeypatch.setattr(llm_module, "HF_TOKEN", "fake-token")
174
- monkeypatch.setattr(llm_module, "RETRY_DELAY_SEC", 0)
175
-
176
- client = self._make_mock_client(["Workflow Error"])
177
-
178
- log = "Case escalation for unique ticket 99999."
179
- with patch("processor_llm.InferenceClient", return_value=client):
180
- result = classify_with_llm(log)
181
-
182
- assert result == "Workflow Error"
183
- # Should now be in cache
184
- assert _cache_get(log) == "Workflow Error"
185
-
186
-
187
- # ── Pipeline safety ───────────────────────────────────────────────────────────
188
- class TestPipelineSafety:
189
- def test_classify_never_raises(self, monkeypatch):
190
- """LLM failures must NEVER propagate as exceptions to the pipeline."""
191
- monkeypatch.setattr(llm_module, "HF_TOKEN", "fake-token")
192
- monkeypatch.setattr(llm_module, "RETRY_DELAY_SEC", 0)
193
-
194
- with patch("processor_llm.InferenceClient", side_effect=RuntimeError("catastrophic")):
195
- result = classify_with_llm("Any log message here.")
196
-
197
- assert result == "Unclassified" # Never raises, always returns string
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
HF/test/test_regex.py DELETED
@@ -1,222 +0,0 @@
1
- """
2
- tests/test_regex.py — Unit tests for Tier 1: Regex Classifier
3
-
4
- Tests verify:
5
- 1. Every pattern category has positive matches
6
- 2. No false positives on known non-matching logs
7
- 3. Pattern order doesn't cause mis-labeling
8
- 4. Coverage improvement (should be > 35% on balanced test set)
9
-
10
- Run:
11
- pytest tests/ -v
12
- pytest tests/test_regex.py -v --tb=short
13
- """
14
- import sys, os
15
- sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
16
-
17
- import pytest
18
- from processor_regex import classify_with_regex, get_regex_coverage
19
-
20
-
21
- # ── Positive cases: must match and return correct label ───────────────────────
22
- class TestHTTPStatus:
23
- def test_get_request(self):
24
- assert classify_with_regex("GET /api/v2/resource HTTP/1.1 status: 200 len: 100 time: 0.1") == "HTTP Status"
25
-
26
- def test_post_request(self):
27
- assert classify_with_regex("POST /v1/users HTTP/1.1 status: 201 len: 42 time: 0.05") == "HTTP Status"
28
-
29
- def test_delete_request(self):
30
- assert classify_with_regex("DELETE /v1/users/123 HTTP/1.1 status: 204 len: 0 time: 0.02") == "HTTP Status"
31
-
32
- def test_nova_style(self):
33
- assert classify_with_regex(
34
- "nova.osapi_compute.wsgi.server GET /v2/servers/detail HTTP/1.1 status: 404 len: 1583 time: 0.19"
35
- ) == "HTTP Status"
36
-
37
- def test_status_code_only(self):
38
- assert classify_with_regex("API call /invoices returned HTTP 500 in 2.1s") == "HTTP Status"
39
-
40
- def test_patch_request(self):
41
- assert classify_with_regex("PATCH /v1/config HTTP/1.1 status: 200 len: 55 time: 0.04") == "HTTP Status"
42
-
43
-
44
- class TestSecurityAlert:
45
- def test_login_failures(self):
46
- assert classify_with_regex("Multiple login failures occurred on user 6454 account") == "Security Alert"
47
-
48
- def test_ip_blocked(self):
49
- assert classify_with_regex("IP 192.168.133.114 blocked due to potential attack") == "Security Alert"
50
-
51
- def test_brute_force(self):
52
- assert classify_with_regex("Alert: brute force login attempt from 10.0.0.5 detected") == "Security Alert"
53
-
54
- def test_admin_escalation(self):
55
- assert classify_with_regex("Admin access escalation detected for user 9429") == "Security Alert"
56
-
57
- def test_privilege_elevation(self):
58
- assert classify_with_regex("Privilege elevation detected for user Admin99") == "Security Alert"
59
-
60
- def test_ddos(self):
61
- assert classify_with_regex("Potential DDoS attack from 1.2.3.4 detected") == "Security Alert"
62
-
63
- def test_suspicious_activity(self):
64
- assert classify_with_regex("Suspicious login activity detected from 203.0.113.1") == "Security Alert"
65
-
66
- def test_unauthorized_access(self):
67
- assert classify_with_regex("Unauthorized access to data was attempted by User123") == "Security Alert"
68
-
69
-
70
- class TestUserAction:
71
- def test_login(self):
72
- assert classify_with_regex("User User12345 logged in.") == "User Action"
73
-
74
- def test_logout(self):
75
- assert classify_with_regex("User User99 logged out.") == "User Action"
76
-
77
- def test_account_created(self):
78
- assert classify_with_regex("Account with ID 456 created by Admin.") == "User Action"
79
-
80
- def test_password_changed(self):
81
- assert classify_with_regex("User User42 changed password successfully.") == "User Action"
82
-
83
- def test_new_user_registered(self):
84
- assert classify_with_regex("New user User9999 registered with email u@e.com.") == "User Action"
85
-
86
-
87
- class TestSystemNotification:
88
- def test_backup_completed(self):
89
- assert classify_with_regex("Backup completed successfully.") == "System Notification"
90
-
91
- def test_backup_started(self):
92
- assert classify_with_regex("Backup started at 2024-01-14 03:00:00.") == "System Notification"
93
-
94
- def test_system_updated(self):
95
- assert classify_with_regex("System updated to version 4.2.1.") == "System Notification"
96
-
97
- def test_disk_cleanup(self):
98
- assert classify_with_regex("Disk cleanup completed successfully.") == "System Notification"
99
-
100
- def test_service_restarted(self):
101
- assert classify_with_regex("Service payments restarted successfully.") == "System Notification"
102
-
103
- def test_cpu_usage(self):
104
- assert classify_with_regex("CPU usage at 98% for the last 10 minutes on node-7") == "System Notification"
105
-
106
- def test_health_check_passed(self):
107
- assert classify_with_regex("Health check passed for service auth-api") == "System Notification"
108
-
109
- def test_cron_executed(self):
110
- assert classify_with_regex("Cron job cleanup-tokens executed successfully.") == "System Notification"
111
-
112
- def test_certificate_renewed(self):
113
- assert classify_with_regex("Certificate renewed successfully for domain api.example.com") == "System Notification"
114
-
115
-
116
- class TestError:
117
- def test_system_crashed(self):
118
- assert classify_with_regex("System crashed due to disk I/O failure on node-3") == "Error"
119
-
120
- def test_db_connection_failed(self):
121
- assert classify_with_regex("Database connection failed after 3 retries") == "Error"
122
-
123
- def test_service_down(self):
124
- assert classify_with_regex("Service payments-api is down") == "Error"
125
-
126
- def test_request_timeout(self):
127
- assert classify_with_regex("Connection timed out after 30s on shard-7") == "Error"
128
-
129
-
130
- class TestCriticalError:
131
- def test_critical_prefix(self):
132
- assert classify_with_regex("CRITICAL: data corruption detected on shard-14") == "Critical Error"
133
-
134
- def test_fatal(self):
135
- assert classify_with_regex("FATAL: kernel panic — system halted") == "Critical Error"
136
-
137
- def test_data_loss(self):
138
- assert classify_with_regex("data loss detected during write to replica-3") == "Critical Error"
139
-
140
- def test_oom(self):
141
- assert classify_with_regex("out-of-memory error: process killed (OOM)") == "Critical Error"
142
-
143
-
144
- # ── Negative cases: must return None (don't mis-classify) ────────────────────
145
- class TestNegativeCases:
146
- @pytest.mark.parametrize("log", [
147
- "Case escalation for ticket ID 7324 failed because the assigned support agent is no longer active.",
148
- "The 'ReportGenerator' module will be retired in version 4.0.",
149
- "The 'BulkEmailSender' feature will be deprecated in v5.0.",
150
- "Invoice generation aborted for order ID 8910 due to invalid tax calculation module.",
151
- "Hey bro chill ya!",
152
- ])
153
- def test_no_false_positives(self, log):
154
- result = classify_with_regex(log)
155
- assert result is None, f"Expected None but got '{result}' for: {log[:80]}"
156
-
157
-
158
- # ── Coverage test ─────────────────────────────────────────────────────────────
159
- class TestCoverage:
160
- BALANCED_SAMPLE = [
161
- # HTTP (6)
162
- "GET /api/v2/resource HTTP/1.1 status: 200 len: 100 time: 0.1",
163
- "POST /v1/users HTTP/1.1 status: 201 len: 42 time: 0.05",
164
- "nova.osapi_compute.wsgi.server GET /v2/servers/detail HTTP/1.1 status: 200",
165
- "DELETE /v1/items/99 HTTP/1.1 status: 204 len: 0 time: 0.01",
166
- "PATCH /v1/config HTTP/1.1 status: 200 len: 55 time: 0.04",
167
- "API call /invoices returned HTTP 500 in 2.1s",
168
- # Security (6)
169
- "Multiple login failures occurred on user 6454 account",
170
- "IP 10.0.0.5 blocked due to potential attack",
171
- "Brute force login attempt from 192.168.1.1 detected",
172
- "Admin access escalation detected for user 9429",
173
- "Suspicious login activity detected from 1.2.3.4",
174
- "Potential DDoS attack from 203.0.113.1 detected",
175
- # User Action (5)
176
- "User User12345 logged in.",
177
- "User User99 logged out.",
178
- "Account with ID 456 created by Admin.",
179
- "User User42 changed password successfully.",
180
- "New user User9999 registered with email u@e.com.",
181
- # System Notification (5)
182
- "Backup completed successfully.",
183
- "System updated to version 4.2.1.",
184
- "Disk cleanup completed successfully.",
185
- "CPU usage at 98% for the last 10 minutes on node-7",
186
- "Cron job cleanup-tokens executed successfully.",
187
- # Error (4)
188
- "System crashed due to disk I/O failure on node-3",
189
- "Database connection failed after 3 retries",
190
- "Service auth-api is down",
191
- "Connection timed out after 30s",
192
- # Critical (3)
193
- "CRITICAL: data corruption detected on shard-14",
194
- "FATAL: kernel panic — system halted",
195
- "data loss detected during write to replica-3",
196
- # LegacyCRM / unmatched (5) → should NOT match
197
- "Case escalation for ticket ID 7324 failed.",
198
- "The 'BulkEmailSender' feature will be deprecated in v5.0.",
199
- "Invoice generation aborted for order ID 8910.",
200
- "Workflow stalled at approval step 3 for case 9021.",
201
- "SLA breach detected for case ID 7701 (P1 4h breach).",
202
- ]
203
-
204
- def test_coverage_above_35_percent(self):
205
- result = get_regex_coverage(self.BALANCED_SAMPLE)
206
- pct = result["coverage_pct"]
207
- # 29 of 34 logs should match regex (29/34 = 85%)
208
- # 5 LegacyCRM logs should NOT match → ~85% expected
209
- assert pct >= 35.0, (
210
- f"Regex coverage {pct}% is below 35% minimum. "
211
- f"Check pattern additions in processor_regex.py"
212
- )
213
-
214
- def test_no_false_positive_on_legacy_logs(self):
215
- legacy_logs = [
216
- "Case escalation for ticket ID 7324 failed.",
217
- "The 'BulkEmailSender' feature will be deprecated in v5.0.",
218
- "Invoice generation aborted for order ID 8910.",
219
- ]
220
- for log in legacy_logs:
221
- result = classify_with_regex(log)
222
- assert result is None, f"False positive: '{result}' on legacy log: {log}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
HF/test/test_routing.py DELETED
@@ -1,179 +0,0 @@
1
- """
2
- tests/test_routing.py — Pipeline Routing Tests
3
-
4
- Tests verify:
5
- 1. LegacyCRM source → LLM tier (always)
6
- 2. Regex match → Regex tier (never reaches BERT)
7
- 3. High-confidence BERT → BERT tier
8
- 4. Unclassified BERT → LLM fallback tier
9
- 5. Result schema is complete (all keys present)
10
-
11
- Run:
12
- pytest tests/test_routing.py -v
13
- """
14
- import sys, os
15
- sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
16
-
17
- import pytest
18
- from unittest.mock import patch, MagicMock
19
- from classify import classify_log, classify_logs, pipeline_summary
20
-
21
-
22
- # ── Fixtures ──────────────────────────────────────────────────────────────────
23
- REGEX_HIT_LOG = ("ModernCRM", "User User123 logged in.")
24
- REGEX_HIT_LOG2 = ("BillingSystem", "GET /api/v1/invoices HTTP/1.1 status: 200 len: 100 time: 0.1")
25
- LEGACY_LOG = ("LegacyCRM", "Case escalation for ticket 9021 failed.")
26
- NON_REGEX_LOG = ("ModernHR", "The inventory sync completed without matching standard patterns.")
27
-
28
-
29
- # ── Schema completeness ───────────────────────────────────────────────────────
30
- class TestResultSchema:
31
- def test_classify_log_has_required_keys(self):
32
- with patch("classify.bert_batch", return_value=[("Error", 0.95)]):
33
- result = classify_log(*NON_REGEX_LOG)
34
- assert "label" in result
35
- assert "tier" in result
36
- assert "confidence" in result
37
- assert "latency_ms" in result
38
-
39
- def test_latency_ms_is_positive(self):
40
- result = classify_log(*REGEX_HIT_LOG)
41
- assert result["latency_ms"] > 0
42
-
43
- def test_confidence_is_float_or_none(self):
44
- result = classify_log(*REGEX_HIT_LOG)
45
- assert result["confidence"] is None or isinstance(result["confidence"], float)
46
-
47
-
48
- # ── Regex tier routing ─────────────────────────────────────────────────────────
49
- class TestRegexRouting:
50
- def test_regex_match_returns_regex_tier(self):
51
- result = classify_log(*REGEX_HIT_LOG)
52
- assert result["tier"] == "Regex"
53
-
54
- def test_regex_match_has_full_confidence(self):
55
- result = classify_log(*REGEX_HIT_LOG)
56
- assert result["confidence"] == 1.0
57
-
58
- def test_regex_match_http_log(self):
59
- result = classify_log(*REGEX_HIT_LOG2)
60
- assert result["tier"] == "Regex"
61
- assert result["label"] == "HTTP Status"
62
-
63
- def test_regex_match_skips_bert(self):
64
- """If regex matches, bert_batch should never be called."""
65
- with patch("classify.bert_batch") as mock_bert:
66
- classify_log(*REGEX_HIT_LOG)
67
- mock_bert.assert_not_called()
68
-
69
-
70
- # ── LegacyCRM routing ─────────────────────────────────────────────────────────
71
- class TestLegacyCRMRouting:
72
- def test_legacy_crm_goes_to_llm(self):
73
- with patch("classify.classify_with_llm", return_value="Workflow Error") as mock_llm:
74
- result = classify_log(*LEGACY_LOG)
75
- assert result["tier"] == "LLM"
76
- mock_llm.assert_called_once()
77
-
78
- def test_legacy_crm_skips_regex(self):
79
- """LegacyCRM should skip regex entirely — go straight to LLM."""
80
- with patch("classify.classify_with_regex") as mock_regex, \
81
- patch("classify.classify_with_llm", return_value="Workflow Error"):
82
- classify_log(*LEGACY_LOG)
83
- mock_regex.assert_not_called()
84
-
85
- def test_legacy_crm_skips_bert(self):
86
- with patch("classify.bert_batch") as mock_bert, \
87
- patch("classify.classify_with_llm", return_value="Workflow Error"):
88
- classify_log(*LEGACY_LOG)
89
- mock_bert.assert_not_called()
90
-
91
-
92
- # ── BERT routing ──────────────────────────────────────────────────────────────
93
- class TestBERTRouting:
94
- def test_high_confidence_bert_stays_bert(self):
95
- with patch("classify.bert_batch", return_value=[("Security Alert", 0.95)]):
96
- result = classify_log(*NON_REGEX_LOG)
97
- assert result["tier"] == "BERT"
98
- assert result["label"] == "Security Alert"
99
- assert result["confidence"] == pytest.approx(0.95)
100
-
101
- def test_low_confidence_bert_falls_back_to_llm(self):
102
- """BERT returning 'Unclassified' should escalate to LLM."""
103
- with patch("classify.bert_batch", return_value=[("Unclassified", 0.20)]), \
104
- patch("classify.classify_with_llm", return_value="Workflow Error") as mock_llm:
105
- result = classify_log(*NON_REGEX_LOG)
106
- assert "LLM" in result["tier"]
107
- mock_llm.assert_called_once()
108
-
109
- def test_bert_batch_called_for_non_regex_log(self):
110
- with patch("classify.bert_batch", return_value=[("Error", 0.88)]) as mock_bert:
111
- classify_log(*NON_REGEX_LOG)
112
- mock_bert.assert_called_once()
113
-
114
-
115
- # ── Batch routing ──────────────────────────────────────────────────────────────
116
- class TestBatchRouting:
117
- def test_batch_returns_correct_length(self):
118
- logs = [REGEX_HIT_LOG, REGEX_HIT_LOG2, LEGACY_LOG]
119
- with patch("classify.classify_with_llm", return_value="Workflow Error"):
120
- results = classify_logs(logs)
121
- assert len(results) == len(logs)
122
-
123
- def test_batch_mixed_tiers(self):
124
- logs = [
125
- REGEX_HIT_LOG, # → Regex
126
- ("ModernCRM", "GET /api HTTP/1.1 status: 200"), # → Regex (HTTP)
127
- LEGACY_LOG, # → LLM
128
- ]
129
- with patch("classify.classify_with_llm", return_value="Workflow Error"):
130
- results = classify_logs(logs)
131
-
132
- assert results[0]["tier"] == "Regex"
133
- assert results[1]["tier"] == "Regex"
134
- assert results[2]["tier"] == "LLM"
135
-
136
- def test_pipeline_summary_structure(self):
137
- logs = [REGEX_HIT_LOG, REGEX_HIT_LOG2]
138
- results = classify_logs(logs)
139
- summary = pipeline_summary(results)
140
-
141
- assert "total" in summary
142
- assert "tier_stats" in summary
143
- assert "label_counts" in summary
144
- assert summary["total"] == 2
145
-
146
- def test_pipeline_summary_tier_pcts_sum_to_100(self):
147
- logs = [REGEX_HIT_LOG, REGEX_HIT_LOG2, REGEX_HIT_LOG]
148
- results = classify_logs(logs)
149
- summary = pipeline_summary(results)
150
- total_pct = sum(s["pct"] for s in summary["tier_stats"].values())
151
- assert abs(total_pct - 100.0) < 1.0, f"Tier pcts don't sum to 100: {total_pct}"
152
-
153
-
154
- # ── Edge cases ────────────────────────────────────────────────────────────────
155
- class TestEdgeCases:
156
- def test_empty_batch_returns_empty(self):
157
- results = classify_logs([])
158
- assert results == []
159
-
160
- def test_single_log_batch(self):
161
- with patch("classify.bert_batch", return_value=[("Error", 0.85)]):
162
- results = classify_logs([NON_REGEX_LOG])
163
- assert len(results) == 1
164
-
165
- def test_all_regex_batch_never_calls_bert(self):
166
- logs = [REGEX_HIT_LOG] * 10
167
- with patch("classify.bert_batch") as mock_bert:
168
- classify_logs(logs)
169
- mock_bert.assert_not_called()
170
-
171
- def test_llm_failure_returns_unclassified(self):
172
- """LLM crashing should return Unclassified, not raise."""
173
- with patch("classify.classify_with_llm", side_effect=Exception("LLM down")):
174
- try:
175
- result = classify_log(*LEGACY_LOG)
176
- # If it doesn't raise, Unclassified should be label
177
- assert result["label"] == "Unclassified"
178
- except Exception:
179
- pytest.fail("classify_log raised an exception — should have returned Unclassified")