Spaces:
Sleeping
Sleeping
Delete HF
Browse files- HF/app_gradio.py +0 -187
- HF/benchmark.py +0 -214
- HF/error_analysis.py +0 -250
- HF/models/log_classifier.joblib +0 -3
- HF/onnx_model/config.json +0 -24
- HF/onnx_model/special_tokens_map.json +0 -37
- HF/onnx_model/tokenizer.json +0 -0
- HF/onnx_model/tokenizer_config.json +0 -65
- HF/onnx_model/vocab.txt +0 -0
- HF/processor_llm.py +0 -192
- HF/processor_regex.py +0 -220
- HF/test/__init__.py +0 -0
- HF/test/__pycache__/__init__.cpython-312.pyc +0 -0
- HF/test/__pycache__/test_regex.cpython-312-pytest-9.0.3.pyc +0 -0
- HF/test/test_llm.py +0 -197
- HF/test/test_regex.py +0 -222
- HF/test/test_routing.py +0 -179
HF/app_gradio.py
DELETED
|
@@ -1,187 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Log Classification System — HuggingFace Spaces
|
| 3 |
-
Gradio UI for the 3-tier hybrid log classification pipeline.
|
| 4 |
-
"""
|
| 5 |
-
from __future__ import annotations
|
| 6 |
-
import io
|
| 7 |
-
import time
|
| 8 |
-
import pandas as pd
|
| 9 |
-
import gradio as gr
|
| 10 |
-
from classify import classify_log, classify_csv
|
| 11 |
-
|
| 12 |
-
# ── Source options ──────────────────────────────────────────────────────────
|
| 13 |
-
SOURCES = [
|
| 14 |
-
"ModernCRM",
|
| 15 |
-
"ModernHR",
|
| 16 |
-
"BillingSystem",
|
| 17 |
-
"AnalyticsEngine",
|
| 18 |
-
"ThirdPartyAPI",
|
| 19 |
-
"LegacyCRM",
|
| 20 |
-
]
|
| 21 |
-
|
| 22 |
-
TIER_COLORS = {
|
| 23 |
-
"Regex": "🟢",
|
| 24 |
-
"BERT": "🔵",
|
| 25 |
-
"LLM": "🟡",
|
| 26 |
-
"LLM (fallback)": "🟠",
|
| 27 |
-
}
|
| 28 |
-
|
| 29 |
-
EXAMPLE_LOGS = [
|
| 30 |
-
["ModernCRM", "User User12345 logged in."],
|
| 31 |
-
["ModernHR", "Multiple login failures occurred on user 6454 account"],
|
| 32 |
-
["BillingSystem", "GET /v2/servers/detail HTTP/1.1 status: 200 len: 1583 time: 0.19"],
|
| 33 |
-
["AnalyticsEngine", "System crashed due to disk I/O failure on node-3"],
|
| 34 |
-
["LegacyCRM", "Case escalation for ticket ID 7324 failed — support agent is no longer active."],
|
| 35 |
-
["LegacyCRM", "The 'BulkEmailSender' feature will be deprecated in v5.0. Use 'EmailCampaignManager'."],
|
| 36 |
-
]
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
# ── Single log tab ──────────────────────────────────────────────────────────
|
| 40 |
-
def classify_single(source: str, log_message: str):
|
| 41 |
-
if not log_message.strip():
|
| 42 |
-
return "—", "—", "—", "—"
|
| 43 |
-
|
| 44 |
-
t0 = time.perf_counter()
|
| 45 |
-
result = classify_log(source, log_message)
|
| 46 |
-
latency_ms = (time.perf_counter() - t0) * 1000
|
| 47 |
-
|
| 48 |
-
label = result["label"]
|
| 49 |
-
tier = result["tier"]
|
| 50 |
-
confidence = f"{result['confidence']:.1%}" if result["confidence"] is not None else "N/A"
|
| 51 |
-
icon = TIER_COLORS.get(tier, "⚪")
|
| 52 |
-
|
| 53 |
-
return (
|
| 54 |
-
label,
|
| 55 |
-
f"{icon} {tier}",
|
| 56 |
-
confidence,
|
| 57 |
-
f"{latency_ms:.1f} ms",
|
| 58 |
-
)
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
# ── Batch CSV tab ───────────────────────────────────────────────────────────
|
| 62 |
-
def classify_batch(file):
|
| 63 |
-
if file is None:
|
| 64 |
-
return None, "⚠️ Please upload a CSV file."
|
| 65 |
-
|
| 66 |
-
try:
|
| 67 |
-
output_path, df = classify_csv(file.name, "/tmp/classified_output.csv")
|
| 68 |
-
except ValueError as e:
|
| 69 |
-
return None, f"⚠️ {e}"
|
| 70 |
-
except Exception as e:
|
| 71 |
-
return None, f"❌ Error: {e}"
|
| 72 |
-
|
| 73 |
-
total = len(df)
|
| 74 |
-
tier_counts = df["tier_used"].value_counts().to_dict()
|
| 75 |
-
label_counts = df["predicted_label"].value_counts().to_dict()
|
| 76 |
-
|
| 77 |
-
tier_lines = "\n".join(f" {TIER_COLORS.get(k,'⚪')} {k}: {v} ({v/total:.0%})" for k, v in tier_counts.items())
|
| 78 |
-
label_lines = "\n".join(f" • {k}: {v}" for k, v in label_counts.items())
|
| 79 |
-
|
| 80 |
-
stats = (
|
| 81 |
-
f"✅ Classified {total} logs\n\n"
|
| 82 |
-
f"📊 Tier breakdown:\n{tier_lines}\n\n"
|
| 83 |
-
f"🏷️ Label distribution:\n{label_lines}"
|
| 84 |
-
)
|
| 85 |
-
|
| 86 |
-
return output_path, stats
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
# ── UI ──────────────────────────────────────────────────────────────────────
|
| 90 |
-
with gr.Blocks(title="Log Classification System", theme=gr.themes.Soft()) as demo:
|
| 91 |
-
|
| 92 |
-
gr.Markdown("""
|
| 93 |
-
# 🔍 Log Classification System
|
| 94 |
-
**3-tier hybrid pipeline** → 🟢 Regex · 🔵 BERT + LogReg · 🟡 LLM
|
| 95 |
-
Built to mimic production enterprise log monitoring architecture.
|
| 96 |
-
""")
|
| 97 |
-
|
| 98 |
-
with gr.Tabs():
|
| 99 |
-
|
| 100 |
-
# ── Tab 1: Single Log ────────────────────────────────────────────
|
| 101 |
-
with gr.Tab("Single Log"):
|
| 102 |
-
with gr.Row():
|
| 103 |
-
source_input = gr.Dropdown(
|
| 104 |
-
choices=SOURCES,
|
| 105 |
-
value="ModernCRM",
|
| 106 |
-
label="Source System",
|
| 107 |
-
)
|
| 108 |
-
log_input = gr.Textbox(
|
| 109 |
-
label="Log Message",
|
| 110 |
-
placeholder="Paste a log message here...",
|
| 111 |
-
lines=3,
|
| 112 |
-
)
|
| 113 |
-
|
| 114 |
-
classify_btn = gr.Button("Classify", variant="primary")
|
| 115 |
-
|
| 116 |
-
with gr.Row():
|
| 117 |
-
label_out = gr.Textbox(label="🏷️ Predicted Label", interactive=False)
|
| 118 |
-
tier_out = gr.Textbox(label="⚙️ Tier Used", interactive=False)
|
| 119 |
-
confidence_out = gr.Textbox(label="📈 Confidence", interactive=False)
|
| 120 |
-
latency_out = gr.Textbox(label="⏱️ Latency", interactive=False)
|
| 121 |
-
|
| 122 |
-
classify_btn.click(
|
| 123 |
-
fn=classify_single,
|
| 124 |
-
inputs=[source_input, log_input],
|
| 125 |
-
outputs=[label_out, tier_out, confidence_out, latency_out],
|
| 126 |
-
)
|
| 127 |
-
|
| 128 |
-
gr.Examples(
|
| 129 |
-
examples=EXAMPLE_LOGS,
|
| 130 |
-
inputs=[source_input, log_input],
|
| 131 |
-
label="📋 Example Logs (click to try)",
|
| 132 |
-
)
|
| 133 |
-
|
| 134 |
-
# ── Tab 2: Batch CSV ─────────────────────────────────────────────
|
| 135 |
-
with gr.Tab("Batch CSV Upload"):
|
| 136 |
-
gr.Markdown("""
|
| 137 |
-
Upload a CSV with columns: **`source`**, **`log_message`**
|
| 138 |
-
Download the classified CSV with added columns: `predicted_label`, `tier_used`, `confidence`.
|
| 139 |
-
""")
|
| 140 |
-
with gr.Row():
|
| 141 |
-
with gr.Column():
|
| 142 |
-
csv_input = gr.File(label="📂 Upload CSV", file_types=[".csv"])
|
| 143 |
-
batch_btn = gr.Button("Classify All", variant="primary")
|
| 144 |
-
with gr.Column():
|
| 145 |
-
csv_output = gr.File(label="📥 Download Classified CSV")
|
| 146 |
-
stats_out = gr.Textbox(label="📊 Stats", lines=12, interactive=False)
|
| 147 |
-
|
| 148 |
-
batch_btn.click(
|
| 149 |
-
fn=classify_batch,
|
| 150 |
-
inputs=[csv_input],
|
| 151 |
-
outputs=[csv_output, stats_out],
|
| 152 |
-
)
|
| 153 |
-
|
| 154 |
-
gr.Markdown("""
|
| 155 |
-
**Sample CSV format:**
|
| 156 |
-
```
|
| 157 |
-
source,log_message
|
| 158 |
-
ModernCRM,User User123 logged in.
|
| 159 |
-
LegacyCRM,Case escalation for ticket ID 7324 failed.
|
| 160 |
-
BillingSystem,GET /api/v2/invoice HTTP/1.1 status: 500
|
| 161 |
-
```
|
| 162 |
-
""")
|
| 163 |
-
|
| 164 |
-
# ── Tab 3: Architecture ──────────────────────────────────────────
|
| 165 |
-
with gr.Tab("Architecture"):
|
| 166 |
-
gr.Markdown("""
|
| 167 |
-
## 🏗️ 3-Tier Hybrid Pipeline
|
| 168 |
-
|
| 169 |
-
| Tier | Method | Coverage | Latency | When Used |
|
| 170 |
-
|------|--------|----------|---------|-----------|
|
| 171 |
-
| 🟢 Regex | Python `re` patterns | ~21% | < 1ms | Fixed patterns (login, backup, etc.) |
|
| 172 |
-
| 🔵 BERT | `all-MiniLM-L6-v2` + LogReg | ~79% | 20–80ms | High-volume categories with 150+ samples |
|
| 173 |
-
| 🟡 LLM | HuggingFace Inference API | ~0.3% | 500–2000ms | LegacyCRM logs, rare patterns |
|
| 174 |
-
|
| 175 |
-
## 📊 Model Performance (from training)
|
| 176 |
-
- **BERT + LogReg** trained on 2,410 synthetic enterprise logs
|
| 177 |
-
- **Confidence threshold**: 0.5 (below → escalate to LLM)
|
| 178 |
-
- **Source-aware routing**: `LegacyCRM` bypasses ML entirely (only 7 training samples)
|
| 179 |
-
|
| 180 |
-
## 🔑 Environment Variables
|
| 181 |
-
| Secret | Required For |
|
| 182 |
-
|--------|-------------|
|
| 183 |
-
| `HF_TOKEN` | LLM inference (LegacyCRM logs) |
|
| 184 |
-
""")
|
| 185 |
-
|
| 186 |
-
if __name__ == "__main__":
|
| 187 |
-
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
HF/benchmark.py
DELETED
|
@@ -1,214 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
benchmark.py — Full Benchmark Harness
|
| 3 |
-
|
| 4 |
-
Outputs a CSV with columns:
|
| 5 |
-
batch_size, mode, throughput_logs_sec, p50_ms, p95_ms, p99_ms, cpu_pct, ram_mb, tier_regex_pct, tier_bert_pct, tier_llm_pct
|
| 6 |
-
|
| 7 |
-
Usage:
|
| 8 |
-
python benchmark.py --logs 5000 --output benchmark_results.csv
|
| 9 |
-
|
| 10 |
-
What it measures:
|
| 11 |
-
- Batch size sweep: 1, 8, 16, 32, 64, 128
|
| 12 |
-
- Throughput (logs/sec)
|
| 13 |
-
- Latency: p50 / p95 / p99 (per-log)
|
| 14 |
-
- CPU and RAM during inference
|
| 15 |
-
- Tier distribution (Regex % / BERT % / LLM %)
|
| 16 |
-
|
| 17 |
-
Google interview talking point:
|
| 18 |
-
"I designed a benchmark harness that sweeps batch sizes and measures
|
| 19 |
-
latency percentiles + resource utilization, so I can show the
|
| 20 |
-
throughput-latency tradeoff curve empirically."
|
| 21 |
-
"""
|
| 22 |
-
from __future__ import annotations
|
| 23 |
-
import argparse
|
| 24 |
-
import csv
|
| 25 |
-
import os
|
| 26 |
-
import random
|
| 27 |
-
import sys
|
| 28 |
-
import time
|
| 29 |
-
import statistics
|
| 30 |
-
from pathlib import Path
|
| 31 |
-
|
| 32 |
-
import psutil
|
| 33 |
-
|
| 34 |
-
# ── Synthetic log generator (no external deps needed) ────────────────────────
|
| 35 |
-
SOURCES = ["ModernCRM", "ModernHR", "BillingSystem", "AnalyticsEngine", "ThirdPartyAPI"]
|
| 36 |
-
|
| 37 |
-
_LOG_TEMPLATES = [
|
| 38 |
-
("ModernCRM", "User User{id} logged in."),
|
| 39 |
-
("ModernCRM", "IP {ip} blocked due to potential attack"),
|
| 40 |
-
("ModernHR", "Multiple login failures occurred on user {id} account"),
|
| 41 |
-
("ModernHR", "Admin access escalation detected for user {id}"),
|
| 42 |
-
("BillingSystem", "GET /api/v2/invoices HTTP/1.1 status: {code} len: {len} time: {t}"),
|
| 43 |
-
("BillingSystem", "POST /api/v1/payments HTTP/1.1 status: {code} len: {len} time: {t}"),
|
| 44 |
-
("AnalyticsEngine", "System crashed due to disk I/O failure on node-{n}"),
|
| 45 |
-
("AnalyticsEngine", "Backup completed successfully."),
|
| 46 |
-
("ThirdPartyAPI", "Service payments-api is unreachable after 3 retries"),
|
| 47 |
-
("ThirdPartyAPI", "CPU usage at {pct}% for the last 10 minutes on node-{n}"),
|
| 48 |
-
("AnalyticsEngine", "CRITICAL: data corruption detected on shard-{n}"),
|
| 49 |
-
("ModernCRM", "Health check passed for service {svc}"),
|
| 50 |
-
]
|
| 51 |
-
|
| 52 |
-
def _rand_ip():
|
| 53 |
-
return f"{random.randint(10,192)}.{random.randint(0,255)}.{random.randint(0,255)}.{random.randint(1,254)}"
|
| 54 |
-
|
| 55 |
-
def _fill(template: str) -> str:
|
| 56 |
-
return (template
|
| 57 |
-
.replace("{id}", str(random.randint(100, 99999)))
|
| 58 |
-
.replace("{ip}", _rand_ip())
|
| 59 |
-
.replace("{code}", random.choice(["200", "201", "400", "404", "500", "503"]))
|
| 60 |
-
.replace("{len}", str(random.randint(100, 9999)))
|
| 61 |
-
.replace("{t}", f"{random.uniform(0.01, 2.5):.2f}")
|
| 62 |
-
.replace("{n}", str(random.randint(1, 20)))
|
| 63 |
-
.replace("{pct}", str(random.randint(60, 99)))
|
| 64 |
-
.replace("{svc}", random.choice(["auth-api", "billing", "analytics", "events"]))
|
| 65 |
-
)
|
| 66 |
-
|
| 67 |
-
def generate_logs(n: int) -> list[tuple[str, str]]:
|
| 68 |
-
random.seed(42)
|
| 69 |
-
return [
|
| 70 |
-
(src, _fill(tmpl))
|
| 71 |
-
for src, tmpl in random.choices(_LOG_TEMPLATES, k=n)
|
| 72 |
-
]
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
# ── Benchmark runner ─────────────────────────────────────────────────────────
|
| 76 |
-
def run_benchmark(
|
| 77 |
-
logs: list[tuple[str, str]],
|
| 78 |
-
batch_sizes: list[int],
|
| 79 |
-
output_csv: str,
|
| 80 |
-
warmup_n: int = 50,
|
| 81 |
-
) -> list[dict]:
|
| 82 |
-
from classify import classify_logs, pipeline_summary
|
| 83 |
-
|
| 84 |
-
proc = psutil.Process(os.getpid())
|
| 85 |
-
rows: list[dict] = []
|
| 86 |
-
|
| 87 |
-
# Warmup (model load, JIT, etc.)
|
| 88 |
-
print(f"🔥 Warming up with {warmup_n} logs…")
|
| 89 |
-
classify_logs(logs[:warmup_n])
|
| 90 |
-
|
| 91 |
-
for bs in batch_sizes:
|
| 92 |
-
# Slice logs into batches of size `bs`
|
| 93 |
-
batches = [logs[i:i + bs] for i in range(0, len(logs), bs)]
|
| 94 |
-
if not batches:
|
| 95 |
-
continue
|
| 96 |
-
|
| 97 |
-
per_log_latencies: list[float] = []
|
| 98 |
-
cpu_samples: list[float] = []
|
| 99 |
-
ram_samples: list[float] = []
|
| 100 |
-
all_results: list[dict] = []
|
| 101 |
-
|
| 102 |
-
print(f"\n📐 Batch size = {bs} ({len(batches)} batches × {bs} logs)…")
|
| 103 |
-
|
| 104 |
-
wall_start = time.perf_counter()
|
| 105 |
-
|
| 106 |
-
for batch in batches:
|
| 107 |
-
t0 = time.perf_counter()
|
| 108 |
-
results = classify_logs(batch)
|
| 109 |
-
t1 = time.perf_counter()
|
| 110 |
-
batch_ms = (t1 - t0) * 1000
|
| 111 |
-
per_log_ms = batch_ms / len(batch)
|
| 112 |
-
|
| 113 |
-
per_log_latencies.extend([per_log_ms] * len(batch))
|
| 114 |
-
all_results.extend(results)
|
| 115 |
-
|
| 116 |
-
# Resource snapshot
|
| 117 |
-
cpu_samples.append(proc.cpu_percent(interval=None))
|
| 118 |
-
ram_samples.append(proc.memory_info().rss / 1_048_576) # MB
|
| 119 |
-
|
| 120 |
-
wall_elapsed = time.perf_counter() - wall_start
|
| 121 |
-
total_logs = len(logs)
|
| 122 |
-
throughput = round(total_logs / wall_elapsed, 1)
|
| 123 |
-
|
| 124 |
-
per_log_latencies.sort()
|
| 125 |
-
n = len(per_log_latencies)
|
| 126 |
-
|
| 127 |
-
summary = pipeline_summary(all_results)
|
| 128 |
-
tier_stats = summary["tier_stats"]
|
| 129 |
-
|
| 130 |
-
def tier_pct(name):
|
| 131 |
-
return tier_stats.get(name, {}).get("pct", 0.0)
|
| 132 |
-
|
| 133 |
-
row = {
|
| 134 |
-
"batch_size": bs,
|
| 135 |
-
"total_logs": total_logs,
|
| 136 |
-
"elapsed_sec": round(wall_elapsed, 2),
|
| 137 |
-
"throughput_logs_sec": throughput,
|
| 138 |
-
"p50_ms": round(statistics.median(per_log_latencies), 3),
|
| 139 |
-
"p95_ms": round(per_log_latencies[min(int(n * 0.95), n - 1)], 3),
|
| 140 |
-
"p99_ms": round(per_log_latencies[min(int(n * 0.99), n - 1)], 3),
|
| 141 |
-
"mean_ms": round(statistics.mean(per_log_latencies), 3),
|
| 142 |
-
"cpu_mean_pct": round(statistics.mean(cpu_samples), 1) if cpu_samples else 0,
|
| 143 |
-
"cpu_max_pct": round(max(cpu_samples), 1) if cpu_samples else 0,
|
| 144 |
-
"ram_mean_mb": round(statistics.mean(ram_samples), 1) if ram_samples else 0,
|
| 145 |
-
"ram_max_mb": round(max(ram_samples), 1) if ram_samples else 0,
|
| 146 |
-
"tier_regex_pct": tier_pct("Regex"),
|
| 147 |
-
"tier_bert_pct": tier_pct("BERT"),
|
| 148 |
-
"tier_llm_pct": tier_pct("LLM") + tier_pct("LLM (fallback)"),
|
| 149 |
-
}
|
| 150 |
-
rows.append(row)
|
| 151 |
-
|
| 152 |
-
print(f" ✅ Throughput: {throughput} logs/sec | "
|
| 153 |
-
f"p50={row['p50_ms']}ms p95={row['p95_ms']}ms p99={row['p99_ms']}ms | "
|
| 154 |
-
f"CPU={row['cpu_mean_pct']}% RAM={row['ram_mean_mb']}MB")
|
| 155 |
-
print(f" 📊 Tiers: Regex={row['tier_regex_pct']}% "
|
| 156 |
-
f"BERT={row['tier_bert_pct']}% "
|
| 157 |
-
f"LLM={row['tier_llm_pct']}%")
|
| 158 |
-
|
| 159 |
-
# Write CSV
|
| 160 |
-
Path(output_csv).parent.mkdir(parents=True, exist_ok=True)
|
| 161 |
-
with open(output_csv, "w", newline="") as f:
|
| 162 |
-
writer = csv.DictWriter(f, fieldnames=rows[0].keys())
|
| 163 |
-
writer.writeheader()
|
| 164 |
-
writer.writerows(rows)
|
| 165 |
-
|
| 166 |
-
print(f"\n✅ Benchmark results saved → {output_csv}")
|
| 167 |
-
return rows
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
# ── Scaling stress test ──────────────────────────────────────────────────────
|
| 171 |
-
def stress_test(sizes: list[int] = [5_000, 20_000, 50_000, 100_000]) -> None:
|
| 172 |
-
"""Quick throughput check at different total log counts."""
|
| 173 |
-
from classify import classify_logs
|
| 174 |
-
|
| 175 |
-
print("\n🔥 Stress Test — Scaling")
|
| 176 |
-
print(f"{'N logs':>10} {'Elapsed(s)':>12} {'Throughput':>12} {'p95_ms':>10}")
|
| 177 |
-
print("─" * 50)
|
| 178 |
-
|
| 179 |
-
for n in sizes:
|
| 180 |
-
logs = generate_logs(n)
|
| 181 |
-
t0 = time.perf_counter()
|
| 182 |
-
classify_logs(logs)
|
| 183 |
-
elapsed = time.perf_counter() - t0
|
| 184 |
-
tput = n / elapsed
|
| 185 |
-
# Rough p95 approximation: time / n * correction factor
|
| 186 |
-
p95_approx = (elapsed / n * 1000) * 1.5
|
| 187 |
-
print(f"{n:>10,} {elapsed:>12.2f}s {tput:>12.1f}/s {p95_approx:>10.1f}ms")
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
# ── CLI ──────────────────────────────────────────────────────────────────────
|
| 191 |
-
def main():
|
| 192 |
-
parser = argparse.ArgumentParser(description="Log pipeline benchmark harness")
|
| 193 |
-
parser.add_argument("--logs", type=int, default=5_000,
|
| 194 |
-
help="Number of logs to benchmark (default: 5000)")
|
| 195 |
-
parser.add_argument("--output", default="benchmark_results.csv",
|
| 196 |
-
help="Output CSV path")
|
| 197 |
-
parser.add_argument("--stress", action="store_true",
|
| 198 |
-
help="Run scaling stress test (5k, 20k, 50k, 100k)")
|
| 199 |
-
parser.add_argument("--batches", default="1,8,16,32,64,128",
|
| 200 |
-
help="Comma-separated batch sizes to sweep")
|
| 201 |
-
args = parser.parse_args()
|
| 202 |
-
|
| 203 |
-
batch_sizes = [int(x) for x in args.batches.split(",")]
|
| 204 |
-
logs = generate_logs(args.logs)
|
| 205 |
-
print(f"📦 Generated {len(logs):,} synthetic logs")
|
| 206 |
-
|
| 207 |
-
run_benchmark(logs, batch_sizes, args.output)
|
| 208 |
-
|
| 209 |
-
if args.stress:
|
| 210 |
-
stress_test()
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
if __name__ == "__main__":
|
| 214 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
HF/error_analysis.py
DELETED
|
@@ -1,250 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
error_analysis.py — Deep Dive into Unclassified / Misclassified Logs
|
| 3 |
-
|
| 4 |
-
This script addresses the 76 unclassified logs from the 20k run.
|
| 5 |
-
It answers:
|
| 6 |
-
1. What do these logs look like? (print + group)
|
| 7 |
-
2. Why did the model fail? (pattern analysis)
|
| 8 |
-
3. What should we do? (actionable fix suggestions)
|
| 9 |
-
|
| 10 |
-
Google interview talking point:
|
| 11 |
-
"I performed structured error analysis on my model's failure cases.
|
| 12 |
-
I grouped them by failure type — vocabulary mismatch, ambiguous intent,
|
| 13 |
-
formatting noise — and used that to drive targeted improvements."
|
| 14 |
-
|
| 15 |
-
Usage:
|
| 16 |
-
python error_analysis.py --input output.csv # post-classify CSV
|
| 17 |
-
python error_analysis.py --simulate # demo with synthetic data
|
| 18 |
-
"""
|
| 19 |
-
from __future__ import annotations
|
| 20 |
-
import argparse
|
| 21 |
-
import re
|
| 22 |
-
import sys
|
| 23 |
-
from collections import Counter, defaultdict
|
| 24 |
-
from typing import Optional
|
| 25 |
-
import pandas as pd
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
# ── Failure mode taxonomy ────────────────────────────────────────────────────
|
| 29 |
-
class FailureMode:
|
| 30 |
-
RARE_VOCAB = "rare_vocabulary" # domain-specific terms not in training
|
| 31 |
-
AMBIGUOUS = "ambiguous_intent" # log could match multiple categories
|
| 32 |
-
LEGACY_FORMAT = "legacy_format" # non-standard / old-school formatting
|
| 33 |
-
TRUNCATED = "truncated_or_noisy" # partial / malformed log line
|
| 34 |
-
NUMERIC_ONLY = "mostly_numeric" # ID/code-heavy, no semantic signal
|
| 35 |
-
MULTI_EVENT = "multi_event" # one line, multiple events
|
| 36 |
-
UNKNOWN = "unknown"
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
def _detect_failure_mode(log: str) -> str:
|
| 40 |
-
"""Heuristic: guess WHY this log was unclassified."""
|
| 41 |
-
log_l = log.lower()
|
| 42 |
-
|
| 43 |
-
if len(log) < 20:
|
| 44 |
-
return FailureMode.TRUNCATED
|
| 45 |
-
|
| 46 |
-
# Check ratio of digits to total chars
|
| 47 |
-
digit_ratio = sum(c.isdigit() for c in log) / max(len(log), 1)
|
| 48 |
-
if digit_ratio > 0.40:
|
| 49 |
-
return FailureMode.NUMERIC_ONLY
|
| 50 |
-
|
| 51 |
-
# Looks like it has 2+ events joined
|
| 52 |
-
if log.count(";") >= 2 or log.count(" AND ") >= 1 or log.count(" | ") >= 2:
|
| 53 |
-
return FailureMode.MULTI_EVENT
|
| 54 |
-
|
| 55 |
-
# Legacy / unusual format signals
|
| 56 |
-
legacy_signals = ["ticket", "escalation", "crm", "deprecated", "retire",
|
| 57 |
-
"module will be", "workflow", "assigned agent"]
|
| 58 |
-
if any(s in log_l for s in legacy_signals):
|
| 59 |
-
return FailureMode.LEGACY_FORMAT
|
| 60 |
-
|
| 61 |
-
# Ambiguity signals — could be error OR security
|
| 62 |
-
ambiguous_signals = ["failed", "error", "unauthorized", "denied", "blocked"]
|
| 63 |
-
if sum(1 for s in ambiguous_signals if s in log_l) >= 2:
|
| 64 |
-
return FailureMode.AMBIGUOUS
|
| 65 |
-
|
| 66 |
-
# Rare vocabulary
|
| 67 |
-
rare_signals = ["sla", "oncall", "runbook", "pagerduty", "janitor", "gc ", "eviction"]
|
| 68 |
-
if any(s in log_l for s in rare_signals):
|
| 69 |
-
return FailureMode.RARE_VOCAB
|
| 70 |
-
|
| 71 |
-
return FailureMode.UNKNOWN
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
def _suggest_fix(mode: str) -> str:
|
| 75 |
-
fixes = {
|
| 76 |
-
FailureMode.RARE_VOCAB: "Add 5–10 training examples covering this vocabulary; or add regex rule.",
|
| 77 |
-
FailureMode.AMBIGUOUS: "Use multi-label or add a dedicated 'Ambiguous' class; review confidence threshold.",
|
| 78 |
-
FailureMode.LEGACY_FORMAT: "Route all legacy-format logs to LLM tier; add few-shot examples for LLM prompt.",
|
| 79 |
-
FailureMode.TRUNCATED: "Add input validation: reject/flag logs under 15 chars before classification.",
|
| 80 |
-
FailureMode.NUMERIC_ONLY: "Add regex patterns for structured numeric formats (job IDs, error codes, etc.).",
|
| 81 |
-
FailureMode.MULTI_EVENT: "Pre-process: split multi-event lines on ';' or ' | ' before classifying.",
|
| 82 |
-
FailureMode.UNKNOWN: "Manually review and add to training data or LLM few-shot examples.",
|
| 83 |
-
}
|
| 84 |
-
return fixes.get(mode, "Manual review required.")
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
# ── Core analysis ────────────────────────────────────────────────────────────
|
| 88 |
-
def analyze_unclassified(df: pd.DataFrame, label_col: str = "predicted_label") -> None:
|
| 89 |
-
"""Full error analysis on a classified CSV DataFrame."""
|
| 90 |
-
|
| 91 |
-
unclassified = df[df[label_col] == "Unclassified"].copy()
|
| 92 |
-
total_unclassified = len(unclassified)
|
| 93 |
-
|
| 94 |
-
if total_unclassified == 0:
|
| 95 |
-
print("✅ No unclassified logs found!")
|
| 96 |
-
return
|
| 97 |
-
|
| 98 |
-
print(f"\n{'='*70}")
|
| 99 |
-
print(f"🔍 ERROR ANALYSIS: {total_unclassified} Unclassified Logs")
|
| 100 |
-
print(f"{'='*70}\n")
|
| 101 |
-
|
| 102 |
-
# ── Step 1: Print all unclassified logs ─────────────────────────────────
|
| 103 |
-
log_col = "log_message" if "log_message" in df.columns else df.columns[-1]
|
| 104 |
-
print(f"{'#':>4} {'Log Message'}")
|
| 105 |
-
print("─" * 80)
|
| 106 |
-
for i, (_, row) in enumerate(unclassified.iterrows(), 1):
|
| 107 |
-
log = str(row.get(log_col, ""))
|
| 108 |
-
print(f"{i:>4}. {log[:120]}")
|
| 109 |
-
|
| 110 |
-
# ── Step 2: Group by failure mode ───────────────────────────────────────
|
| 111 |
-
print(f"\n{'='*70}")
|
| 112 |
-
print("📂 GROUPING BY FAILURE MODE")
|
| 113 |
-
print("─" * 70)
|
| 114 |
-
|
| 115 |
-
groups: dict[str, list[str]] = defaultdict(list)
|
| 116 |
-
for _, row in unclassified.iterrows():
|
| 117 |
-
log = str(row.get(log_col, ""))
|
| 118 |
-
mode = _detect_failure_mode(log)
|
| 119 |
-
groups[mode].append(log)
|
| 120 |
-
|
| 121 |
-
for mode, logs in sorted(groups.items(), key=lambda x: -len(x[1])):
|
| 122 |
-
pct = len(logs) / total_unclassified * 100
|
| 123 |
-
print(f"\n🔹 {mode} — {len(logs)} logs ({pct:.1f}%)")
|
| 124 |
-
print(f" 💡 Fix: {_suggest_fix(mode)}")
|
| 125 |
-
print(f" Examples:")
|
| 126 |
-
for log in logs[:3]:
|
| 127 |
-
print(f" • {log[:110]}")
|
| 128 |
-
|
| 129 |
-
# ── Step 3: Token frequency analysis ────────────────────────────────────
|
| 130 |
-
print(f"\n{'='*70}")
|
| 131 |
-
print("📊 COMMON TOKENS IN UNCLASSIFIED LOGS")
|
| 132 |
-
print("─" * 70)
|
| 133 |
-
|
| 134 |
-
STOPWORDS = {"the", "a", "an", "is", "in", "on", "for", "to", "of",
|
| 135 |
-
"and", "or", "by", "at", "with", "has", "was", "be",
|
| 136 |
-
"this", "that", "it", "not", "are", "from", "as"}
|
| 137 |
-
|
| 138 |
-
all_tokens: list[str] = []
|
| 139 |
-
for _, row in unclassified.iterrows():
|
| 140 |
-
log = str(row.get(log_col, "")).lower()
|
| 141 |
-
tokens = re.findall(r"[a-z]{3,}", log)
|
| 142 |
-
all_tokens.extend(t for t in tokens if t not in STOPWORDS)
|
| 143 |
-
|
| 144 |
-
counter = Counter(all_tokens)
|
| 145 |
-
print("Top 20 tokens in unclassified logs:")
|
| 146 |
-
for token, count in counter.most_common(20):
|
| 147 |
-
bar = "█" * min(count, 40)
|
| 148 |
-
print(f" {token:<20} {count:>4} {bar}")
|
| 149 |
-
|
| 150 |
-
# ── Step 4: Length distribution ─────────────────────────────────────────
|
| 151 |
-
lengths = unclassified[log_col].apply(lambda x: len(str(x)))
|
| 152 |
-
print(f"\n{'='*70}")
|
| 153 |
-
print("📏 LOG LENGTH DISTRIBUTION (Unclassified)")
|
| 154 |
-
print(f" Mean: {lengths.mean():.1f} chars")
|
| 155 |
-
print(f" Median: {lengths.median():.1f} chars")
|
| 156 |
-
print(f" Min: {lengths.min()} chars")
|
| 157 |
-
print(f" Max: {lengths.max()} chars")
|
| 158 |
-
|
| 159 |
-
short = (lengths < 30).sum()
|
| 160 |
-
if short:
|
| 161 |
-
print(f" ⚠️ {short} logs under 30 chars — likely truncated/noisy")
|
| 162 |
-
|
| 163 |
-
# ── Step 5: Source breakdown ─────────────────────────────────────────────
|
| 164 |
-
if "source" in df.columns:
|
| 165 |
-
print(f"\n{'='*70}")
|
| 166 |
-
print("🏷️ UNCLASSIFIED BY SOURCE")
|
| 167 |
-
src_counts = unclassified["source"].value_counts()
|
| 168 |
-
for src, cnt in src_counts.items():
|
| 169 |
-
bar = "█" * min(cnt, 40)
|
| 170 |
-
print(f" {src:<22} {cnt:>4} {bar}")
|
| 171 |
-
|
| 172 |
-
# ── Step 6: Actionable summary ───────────────────────────────────────────
|
| 173 |
-
print(f"\n{'='*70}")
|
| 174 |
-
print("✅ ACTIONABLE FIXES (Priority Order)")
|
| 175 |
-
print("─" * 70)
|
| 176 |
-
dominant_mode = max(groups.items(), key=lambda x: len(x[1]))[0] if groups else FailureMode.UNKNOWN
|
| 177 |
-
fixes = [
|
| 178 |
-
(1, "regex", "Add patterns for top unclassified tokens to processor_regex.py"),
|
| 179 |
-
(2, "training", "Add 10–20 examples per failure mode to training data"),
|
| 180 |
-
(3, "llm", "For LEGACY_FORMAT failures: add to LLM few-shot examples"),
|
| 181 |
-
(4, "preproc", "Pre-process: split multi-event logs, reject truncated logs"),
|
| 182 |
-
(5, "threshold","Tune BERT confidence threshold (currently 0.30 — try 0.40)"),
|
| 183 |
-
]
|
| 184 |
-
for priority, area, fix in fixes:
|
| 185 |
-
print(f" {priority}. [{area.upper():^10}] {fix}")
|
| 186 |
-
|
| 187 |
-
print(f"\n📌 Dominant failure mode: '{dominant_mode}' ({len(groups.get(dominant_mode,[]))} logs)")
|
| 188 |
-
print(f" Start here: {_suggest_fix(dominant_mode)}\n")
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
# ── Simulate 76 unclassified logs for demo ────────────────────────────────────
|
| 192 |
-
def _simulate_unclassified() -> pd.DataFrame:
|
| 193 |
-
"""Generate synthetic 'unclassified' logs that mimic real failure patterns."""
|
| 194 |
-
logs = [
|
| 195 |
-
# Legacy format / CRM
|
| 196 |
-
"Case escalation for ticket ID 9021 failed: agent inactive.",
|
| 197 |
-
"CRM module 'ReportGenerator' will be retired in v4.1.",
|
| 198 |
-
"Workflow for approval chain #4421 stalled at step 3.",
|
| 199 |
-
"SLA breach detected for case ID 7701 (P1, 4h breach).",
|
| 200 |
-
# Ambiguous
|
| 201 |
-
"Service auth-api failed and unauthorized access was logged.",
|
| 202 |
-
"Error: blocked request from 10.0.0.5 — reason unknown.",
|
| 203 |
-
# Truncated / noisy
|
| 204 |
-
"ERR",
|
| 205 |
-
"srv timeout",
|
| 206 |
-
"node-7",
|
| 207 |
-
# Numeric-heavy
|
| 208 |
-
"8821 9001 443 0 0 DROP IN=eth0 OUT= MAC=",
|
| 209 |
-
"16 0 0 1 2024-01-14 03:21:00.001",
|
| 210 |
-
# Multi-event
|
| 211 |
-
"Backup started; disk usage at 92%; health check failed | node-3",
|
| 212 |
-
# Rare vocab
|
| 213 |
-
"PagerDuty alert triggered for on-call rotation P1-incident.",
|
| 214 |
-
"GC eviction: 3.2GB heap compacted in 420ms.",
|
| 215 |
-
"Janitor job completed: 14,000 stale tokens purged.",
|
| 216 |
-
"Runbook auto-remediation triggered for alert ALT-9021.",
|
| 217 |
-
]
|
| 218 |
-
# Pad to ~76
|
| 219 |
-
padded = (logs * 5)[:76]
|
| 220 |
-
return pd.DataFrame({
|
| 221 |
-
"source": ["ModernCRM"] * 30 + ["LegacyCRM"] * 20 + ["AnalyticsEngine"] * 26,
|
| 222 |
-
"log_message": padded,
|
| 223 |
-
"predicted_label": ["Unclassified"] * 76,
|
| 224 |
-
})
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
# ── CLI ──────────────────────────────────────────────────────────────────────
|
| 228 |
-
def main():
|
| 229 |
-
parser = argparse.ArgumentParser(description="Analyze unclassified/misclassified logs")
|
| 230 |
-
parser.add_argument("--input", help="Path to classified CSV from classify_csv()")
|
| 231 |
-
parser.add_argument("--simulate", action="store_true",
|
| 232 |
-
help="Run with synthetic unclassified logs (no CSV needed)")
|
| 233 |
-
parser.add_argument("--label-col", default="predicted_label",
|
| 234 |
-
help="Column name that holds the predicted label")
|
| 235 |
-
args = parser.parse_args()
|
| 236 |
-
|
| 237 |
-
if args.simulate:
|
| 238 |
-
df = _simulate_unclassified()
|
| 239 |
-
print("🎭 Running with SIMULATED 76 unclassified logs…")
|
| 240 |
-
elif args.input:
|
| 241 |
-
df = pd.read_csv(args.input)
|
| 242 |
-
else:
|
| 243 |
-
parser.print_help()
|
| 244 |
-
sys.exit(1)
|
| 245 |
-
|
| 246 |
-
analyze_unclassified(df, label_col=args.label_col)
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
if __name__ == "__main__":
|
| 250 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
HF/models/log_classifier.joblib
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:9bfe9c71b71412797de0d426be2255566dbf6cf87b3f2ae5d2cd1fd69a98d18d
|
| 3 |
-
size 23997
|
|
|
|
|
|
|
|
|
|
|
|
HF/onnx_model/config.json
DELETED
|
@@ -1,24 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"architectures": [
|
| 3 |
-
"BertModel"
|
| 4 |
-
],
|
| 5 |
-
"attention_probs_dropout_prob": 0.1,
|
| 6 |
-
"classifier_dropout": null,
|
| 7 |
-
"gradient_checkpointing": false,
|
| 8 |
-
"hidden_act": "gelu",
|
| 9 |
-
"hidden_dropout_prob": 0.1,
|
| 10 |
-
"hidden_size": 384,
|
| 11 |
-
"initializer_range": 0.02,
|
| 12 |
-
"intermediate_size": 1536,
|
| 13 |
-
"layer_norm_eps": 1e-12,
|
| 14 |
-
"max_position_embeddings": 512,
|
| 15 |
-
"model_type": "bert",
|
| 16 |
-
"num_attention_heads": 12,
|
| 17 |
-
"num_hidden_layers": 6,
|
| 18 |
-
"pad_token_id": 0,
|
| 19 |
-
"position_embedding_type": "absolute",
|
| 20 |
-
"transformers_version": "4.57.6",
|
| 21 |
-
"type_vocab_size": 2,
|
| 22 |
-
"use_cache": true,
|
| 23 |
-
"vocab_size": 30522
|
| 24 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
HF/onnx_model/special_tokens_map.json
DELETED
|
@@ -1,37 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cls_token": {
|
| 3 |
-
"content": "[CLS]",
|
| 4 |
-
"lstrip": false,
|
| 5 |
-
"normalized": false,
|
| 6 |
-
"rstrip": false,
|
| 7 |
-
"single_word": false
|
| 8 |
-
},
|
| 9 |
-
"mask_token": {
|
| 10 |
-
"content": "[MASK]",
|
| 11 |
-
"lstrip": false,
|
| 12 |
-
"normalized": false,
|
| 13 |
-
"rstrip": false,
|
| 14 |
-
"single_word": false
|
| 15 |
-
},
|
| 16 |
-
"pad_token": {
|
| 17 |
-
"content": "[PAD]",
|
| 18 |
-
"lstrip": false,
|
| 19 |
-
"normalized": false,
|
| 20 |
-
"rstrip": false,
|
| 21 |
-
"single_word": false
|
| 22 |
-
},
|
| 23 |
-
"sep_token": {
|
| 24 |
-
"content": "[SEP]",
|
| 25 |
-
"lstrip": false,
|
| 26 |
-
"normalized": false,
|
| 27 |
-
"rstrip": false,
|
| 28 |
-
"single_word": false
|
| 29 |
-
},
|
| 30 |
-
"unk_token": {
|
| 31 |
-
"content": "[UNK]",
|
| 32 |
-
"lstrip": false,
|
| 33 |
-
"normalized": false,
|
| 34 |
-
"rstrip": false,
|
| 35 |
-
"single_word": false
|
| 36 |
-
}
|
| 37 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
HF/onnx_model/tokenizer.json
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
HF/onnx_model/tokenizer_config.json
DELETED
|
@@ -1,65 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"added_tokens_decoder": {
|
| 3 |
-
"0": {
|
| 4 |
-
"content": "[PAD]",
|
| 5 |
-
"lstrip": false,
|
| 6 |
-
"normalized": false,
|
| 7 |
-
"rstrip": false,
|
| 8 |
-
"single_word": false,
|
| 9 |
-
"special": true
|
| 10 |
-
},
|
| 11 |
-
"100": {
|
| 12 |
-
"content": "[UNK]",
|
| 13 |
-
"lstrip": false,
|
| 14 |
-
"normalized": false,
|
| 15 |
-
"rstrip": false,
|
| 16 |
-
"single_word": false,
|
| 17 |
-
"special": true
|
| 18 |
-
},
|
| 19 |
-
"101": {
|
| 20 |
-
"content": "[CLS]",
|
| 21 |
-
"lstrip": false,
|
| 22 |
-
"normalized": false,
|
| 23 |
-
"rstrip": false,
|
| 24 |
-
"single_word": false,
|
| 25 |
-
"special": true
|
| 26 |
-
},
|
| 27 |
-
"102": {
|
| 28 |
-
"content": "[SEP]",
|
| 29 |
-
"lstrip": false,
|
| 30 |
-
"normalized": false,
|
| 31 |
-
"rstrip": false,
|
| 32 |
-
"single_word": false,
|
| 33 |
-
"special": true
|
| 34 |
-
},
|
| 35 |
-
"103": {
|
| 36 |
-
"content": "[MASK]",
|
| 37 |
-
"lstrip": false,
|
| 38 |
-
"normalized": false,
|
| 39 |
-
"rstrip": false,
|
| 40 |
-
"single_word": false,
|
| 41 |
-
"special": true
|
| 42 |
-
}
|
| 43 |
-
},
|
| 44 |
-
"clean_up_tokenization_spaces": false,
|
| 45 |
-
"cls_token": "[CLS]",
|
| 46 |
-
"do_basic_tokenize": true,
|
| 47 |
-
"do_lower_case": true,
|
| 48 |
-
"extra_special_tokens": {},
|
| 49 |
-
"mask_token": "[MASK]",
|
| 50 |
-
"max_length": 128,
|
| 51 |
-
"model_max_length": 512,
|
| 52 |
-
"never_split": null,
|
| 53 |
-
"pad_to_multiple_of": null,
|
| 54 |
-
"pad_token": "[PAD]",
|
| 55 |
-
"pad_token_type_id": 0,
|
| 56 |
-
"padding_side": "right",
|
| 57 |
-
"sep_token": "[SEP]",
|
| 58 |
-
"stride": 0,
|
| 59 |
-
"strip_accents": null,
|
| 60 |
-
"tokenize_chinese_chars": true,
|
| 61 |
-
"tokenizer_class": "BertTokenizer",
|
| 62 |
-
"truncation_side": "right",
|
| 63 |
-
"truncation_strategy": "longest_first",
|
| 64 |
-
"unk_token": "[UNK]"
|
| 65 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
HF/onnx_model/vocab.txt
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
HF/processor_llm.py
DELETED
|
@@ -1,192 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
processor_llm.py — Tier 3: LLM-based Classifier
|
| 3 |
-
|
| 4 |
-
Used for:
|
| 5 |
-
- LegacyCRM logs (Workflow Error, Deprecation Warning)
|
| 6 |
-
- BERT fallback when confidence < threshold
|
| 7 |
-
|
| 8 |
-
Production hardening in V3:
|
| 9 |
-
- Timeout (configurable, default 5s)
|
| 10 |
-
- Retry with exponential backoff (max 2 retries)
|
| 11 |
-
- Explicit failure modes: returns "Unclassified" on all error paths
|
| 12 |
-
- Caching for repeated log patterns (hash-based, in-memory)
|
| 13 |
-
- Token budget enforcement (max_tokens=15)
|
| 14 |
-
"""
|
| 15 |
-
from __future__ import annotations
|
| 16 |
-
import os
|
| 17 |
-
import re
|
| 18 |
-
import time
|
| 19 |
-
import hashlib
|
| 20 |
-
import logging
|
| 21 |
-
from functools import lru_cache
|
| 22 |
-
from typing import Optional
|
| 23 |
-
|
| 24 |
-
logger = logging.getLogger(__name__)
|
| 25 |
-
|
| 26 |
-
# ── Config ─────────────────────────────────────────────────────────────────
|
| 27 |
-
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 28 |
-
LLM_MODEL = "mistralai/Mistral-7B-Instruct-v0.3"
|
| 29 |
-
|
| 30 |
-
VALID_CATEGORIES = ["Workflow Error", "Deprecation Warning"]
|
| 31 |
-
|
| 32 |
-
# Retry / timeout config
|
| 33 |
-
MAX_RETRIES = 2
|
| 34 |
-
RETRY_DELAY_SEC = 1.0 # doubles on each retry (exponential backoff)
|
| 35 |
-
REQUEST_TIMEOUT = 5 # seconds — fail fast, do not hang pipeline
|
| 36 |
-
|
| 37 |
-
# In-memory cache to avoid redundant LLM calls for repeated logs
|
| 38 |
-
_RESPONSE_CACHE: dict[str, str] = {}
|
| 39 |
-
MAX_CACHE_SIZE = 1000 # evict oldest when full (simple FIFO)
|
| 40 |
-
|
| 41 |
-
SYSTEM_PROMPT = (
|
| 42 |
-
"You are an enterprise log classifier. "
|
| 43 |
-
"Classify log messages into exactly one category. "
|
| 44 |
-
"Return ONLY the category name — no explanation, no punctuation."
|
| 45 |
-
)
|
| 46 |
-
|
| 47 |
-
FEW_SHOT_EXAMPLES = [
|
| 48 |
-
{
|
| 49 |
-
"log": "Case escalation for ticket ID 7324 failed because the assigned support agent is no longer active.",
|
| 50 |
-
"label": "Workflow Error",
|
| 51 |
-
},
|
| 52 |
-
{
|
| 53 |
-
"log": "The 'BulkEmailSender' feature is no longer supported. Use 'EmailCampaignManager' instead.",
|
| 54 |
-
"label": "Deprecation Warning",
|
| 55 |
-
},
|
| 56 |
-
{
|
| 57 |
-
"log": "Invoice generation aborted for order ID 8910 due to invalid tax calculation module.",
|
| 58 |
-
"label": "Workflow Error",
|
| 59 |
-
},
|
| 60 |
-
]
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
# ── Cache helpers ────────────────────────────────────────────────────────────
|
| 64 |
-
def _cache_key(log_msg: str) -> str:
|
| 65 |
-
return hashlib.md5(log_msg.strip().encode()).hexdigest()
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
def _cache_get(log_msg: str) -> Optional[str]:
|
| 69 |
-
return _RESPONSE_CACHE.get(_cache_key(log_msg))
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
def _cache_set(log_msg: str, label: str) -> None:
|
| 73 |
-
key = _cache_key(log_msg)
|
| 74 |
-
if len(_RESPONSE_CACHE) >= MAX_CACHE_SIZE:
|
| 75 |
-
# Evict oldest (first inserted) key
|
| 76 |
-
oldest = next(iter(_RESPONSE_CACHE))
|
| 77 |
-
del _RESPONSE_CACHE[oldest]
|
| 78 |
-
_RESPONSE_CACHE[key] = label
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
def get_cache_stats() -> dict:
|
| 82 |
-
return {"size": len(_RESPONSE_CACHE), "max_size": MAX_CACHE_SIZE}
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
# ── Prompt builder ───────────────────────────────────────────────────────────
|
| 86 |
-
def _build_messages(log_msg: str) -> list[dict]:
|
| 87 |
-
categories_str = ", ".join(f'"{c}"' for c in VALID_CATEGORIES)
|
| 88 |
-
user_content = (
|
| 89 |
-
f'Classify the following log into one of these categories: {categories_str}.\n'
|
| 90 |
-
'If none fits, return "Unclassified".\n\n'
|
| 91 |
-
)
|
| 92 |
-
for ex in FEW_SHOT_EXAMPLES:
|
| 93 |
-
user_content += f'Log: {ex["log"]}\nCategory: {ex["label"]}\n\n'
|
| 94 |
-
user_content += f"Log: {log_msg}\nCategory:"
|
| 95 |
-
|
| 96 |
-
return [
|
| 97 |
-
{"role": "system", "content": SYSTEM_PROMPT},
|
| 98 |
-
{"role": "user", "content": user_content},
|
| 99 |
-
]
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
# ── Normalize raw LLM output ─────────────────────────────────────────────────
|
| 103 |
-
def _normalize(raw: str) -> str:
|
| 104 |
-
"""Map raw LLM output to a valid category or 'Unclassified'."""
|
| 105 |
-
raw = raw.strip().strip('"').strip("'")
|
| 106 |
-
for cat in VALID_CATEGORIES:
|
| 107 |
-
if cat.lower() in raw.lower():
|
| 108 |
-
return cat
|
| 109 |
-
return "Unclassified"
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
# ── Main classify function ────────────────────────────────────────────────────
|
| 113 |
-
def classify_with_llm(log_msg: str) -> str:
|
| 114 |
-
"""
|
| 115 |
-
Tier 3 LLM classifier with:
|
| 116 |
-
- In-memory cache (avoids duplicate API calls)
|
| 117 |
-
- Timeout (REQUEST_TIMEOUT seconds)
|
| 118 |
-
- Retry with exponential backoff (MAX_RETRIES attempts)
|
| 119 |
-
- Explicit fallback to "Unclassified" on all error paths
|
| 120 |
-
|
| 121 |
-
Latency: 500–2000ms on cache miss; ~0ms on cache hit.
|
| 122 |
-
"""
|
| 123 |
-
# ── Cache hit ────────────────────────────────────────────────────────────
|
| 124 |
-
cached = _cache_get(log_msg)
|
| 125 |
-
if cached is not None:
|
| 126 |
-
logger.debug(f"[LLM] Cache hit for: {log_msg[:60]}")
|
| 127 |
-
return cached
|
| 128 |
-
|
| 129 |
-
# ── Inference with retry ─────────────────────────────────────────────────
|
| 130 |
-
if not HF_TOKEN:
|
| 131 |
-
logger.warning("[LLM] HF_TOKEN not set — returning Unclassified")
|
| 132 |
-
return "Unclassified"
|
| 133 |
-
|
| 134 |
-
from huggingface_hub import InferenceClient
|
| 135 |
-
|
| 136 |
-
client = InferenceClient(token=HF_TOKEN, timeout=REQUEST_TIMEOUT)
|
| 137 |
-
delay = RETRY_DELAY_SEC
|
| 138 |
-
last_err: Optional[Exception] = None
|
| 139 |
-
|
| 140 |
-
for attempt in range(1, MAX_RETRIES + 2): # +2: initial + MAX_RETRIES
|
| 141 |
-
try:
|
| 142 |
-
response = client.chat.completions.create(
|
| 143 |
-
model=LLM_MODEL,
|
| 144 |
-
messages=_build_messages(log_msg),
|
| 145 |
-
max_tokens=15,
|
| 146 |
-
temperature=0.1,
|
| 147 |
-
)
|
| 148 |
-
raw = response.choices[0].message.content
|
| 149 |
-
label = _normalize(raw)
|
| 150 |
-
|
| 151 |
-
_cache_set(log_msg, label)
|
| 152 |
-
logger.debug(f"[LLM] Attempt {attempt}: '{raw.strip()}' → '{label}'")
|
| 153 |
-
return label
|
| 154 |
-
|
| 155 |
-
except Exception as e:
|
| 156 |
-
last_err = e
|
| 157 |
-
if attempt <= MAX_RETRIES:
|
| 158 |
-
logger.warning(f"[LLM] Attempt {attempt} failed ({e}), retrying in {delay:.1f}s…")
|
| 159 |
-
time.sleep(delay)
|
| 160 |
-
delay *= 2 # exponential backoff
|
| 161 |
-
else:
|
| 162 |
-
logger.error(f"[LLM] All {MAX_RETRIES + 1} attempts failed. Last error: {e}")
|
| 163 |
-
|
| 164 |
-
return "Unclassified"
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
# ── Batch classify (serial — LLM is already rate-limited) ────────────────────
|
| 168 |
-
def classify_batch_llm(log_msgs: list[str]) -> list[str]:
|
| 169 |
-
"""Classify multiple logs through LLM. Each call is sequential to respect rate limits."""
|
| 170 |
-
return [classify_with_llm(msg) for msg in log_msgs]
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
# ── CLI test ─────────────────────────────────────────────────────────────────
|
| 174 |
-
if __name__ == "__main__":
|
| 175 |
-
logging.basicConfig(level=logging.INFO)
|
| 176 |
-
|
| 177 |
-
test_logs = [
|
| 178 |
-
"Case escalation for ticket ID 7324 failed because the assigned support agent is no longer active.",
|
| 179 |
-
"The 'ReportGenerator' module will be retired in version 4.0. Migrate to 'AdvancedAnalyticsSuite'.",
|
| 180 |
-
"System reboot initiated by user 12345.", # should be Unclassified
|
| 181 |
-
]
|
| 182 |
-
for log in test_logs:
|
| 183 |
-
result = classify_with_llm(log)
|
| 184 |
-
print(f"{result:25s} | {log[:80]}")
|
| 185 |
-
|
| 186 |
-
# Cache hit test
|
| 187 |
-
print("\n── Cache hit test ──")
|
| 188 |
-
t0 = time.perf_counter()
|
| 189 |
-
classify_with_llm(test_logs[0])
|
| 190 |
-
t1 = time.perf_counter()
|
| 191 |
-
print(f"Cache hit latency: {(t1-t0)*1000:.2f}ms")
|
| 192 |
-
print(f"Cache stats: {get_cache_stats()}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
HF/processor_regex.py
DELETED
|
@@ -1,220 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
processor_regex.py — Tier 1: Rule-based Classifier
|
| 3 |
-
|
| 4 |
-
Target coverage: 40%+ (up from 15%)
|
| 5 |
-
Latency: sub-millisecond per log
|
| 6 |
-
|
| 7 |
-
New pattern groups added:
|
| 8 |
-
- HTTP request/response logs (was completely missing!)
|
| 9 |
-
- Auth / credential events (login failures, MFA, lockouts)
|
| 10 |
-
- System/infra events (disk, CPU, memory, cron)
|
| 11 |
-
- Network / firewall events (IP block, port scan)
|
| 12 |
-
- Structured error codes (ERROR, CRITICAL prefix logs)
|
| 13 |
-
"""
|
| 14 |
-
from __future__ import annotations
|
| 15 |
-
import re
|
| 16 |
-
import time
|
| 17 |
-
from typing import Optional
|
| 18 |
-
|
| 19 |
-
# ---------------------------------------------------------------------------
|
| 20 |
-
# Pattern registry: (compiled_pattern, label)
|
| 21 |
-
# Order matters — more specific patterns FIRST to avoid mis-labeling.
|
| 22 |
-
# ---------------------------------------------------------------------------
|
| 23 |
-
_RAW_PATTERNS: list[tuple[str, str]] = [
|
| 24 |
-
|
| 25 |
-
# ── HTTP Status ─────────────────────────────────────────────────────────
|
| 26 |
-
# Covers: GET/POST/PUT/DELETE/PATCH + status code in request line
|
| 27 |
-
(r"\b(GET|POST|PUT|DELETE|PATCH|HEAD|OPTIONS)\s+\S+\s+HTTP/\d", "HTTP Status"),
|
| 28 |
-
# Nova / OpenStack style
|
| 29 |
-
(r"nova\.\S+\s+(GET|POST|PUT|DELETE)\s+\S+\s+HTTP/\d", "HTTP Status"),
|
| 30 |
-
# Status code only style: "returned HTTP 200" or "status: 404"
|
| 31 |
-
(r"\bstatus[:\s]+\d{3}\b", "HTTP Status"),
|
| 32 |
-
(r"\breturned\s+HTTP\s+\d{3}\b", "HTTP Status"),
|
| 33 |
-
(r"\bHTTP\s+status\s+code\s*[:-]?\s*\d{3}\b", "HTTP Status"),
|
| 34 |
-
# API response style
|
| 35 |
-
(r"\bAPI\s+(call|request)\s+\S+\s+completed\s+with\s+status\s+\d{3}", "HTTP Status"),
|
| 36 |
-
(r"\bEndpoint\s+\S+\s+responded\s+with\s+code\s+\d{3}", "HTTP Status"),
|
| 37 |
-
|
| 38 |
-
# ── Security Alert ──────────────────────────────────────────────────────
|
| 39 |
-
# Brute force / login failures
|
| 40 |
-
(r"(multiple\s+)?(bad\s+|failed?\s+)?login\s+(failure|attempt|failures)", "Security Alert"),
|
| 41 |
-
(r"brute[\s_-]force\s+(login|attack|attempt)", "Security Alert"),
|
| 42 |
-
# Unauthorized access
|
| 43 |
-
(r"unauthorized\s+(access|admin|privilege|attempt)", "Security Alert"),
|
| 44 |
-
(r"access\s+denied\s+(for|to)\s+(user|ip|host)", "Security Alert"),
|
| 45 |
-
# Privilege escalation
|
| 46 |
-
(r"(admin\s+)?access\s+escalation\s+detected", "Security Alert"),
|
| 47 |
-
(r"privilege\s+(elev|escalat)", "Security Alert"),
|
| 48 |
-
# IP blocking / suspicious traffic
|
| 49 |
-
(r"IP\s+\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\s+blocked", "Security Alert"),
|
| 50 |
-
(r"(suspicious|anomalous)\s+(login|traffic|activity|request)", "Security Alert"),
|
| 51 |
-
(r"potential\s+(DDoS|attack|breach|intrusion)", "Security Alert"),
|
| 52 |
-
(r"security\s+breach\s+suspected", "Security Alert"),
|
| 53 |
-
(r"(API\s+security\s+breach|bypass\s+API\s+security)", "Security Alert"),
|
| 54 |
-
(r"port\s+scan\s+(detected|attempt)", "Security Alert"),
|
| 55 |
-
|
| 56 |
-
# ── User Action ─────────────────────────────────────────────────────────
|
| 57 |
-
(r"User\s+\w+\d*\s+logged\s+(in|out)", "User Action"),
|
| 58 |
-
(r"Account\s+(with\s+)?ID\s+\S+\s+created\s+by", "User Action"),
|
| 59 |
-
(r"User\s+\w+\d*\s+(updated\s+profile|changed\s+password|enabled\s+two|downloaded|exported)", "User Action"),
|
| 60 |
-
(r"(New\s+user|user\s+\w+\d*)\s+registered", "User Action"),
|
| 61 |
-
(r"Account\s+\S+\s+deleted\s+by\s+(administrator|admin)", "User Action"),
|
| 62 |
-
(r"User\s+\w+\d*\s+(tried|attempted)", "User Action"),
|
| 63 |
-
|
| 64 |
-
# ── System Notification ─────────────────────────────────────────────────
|
| 65 |
-
# Backup events
|
| 66 |
-
(r"Backup\s+(started|ended|completed\s+successfully|failed|aborted)", "System Notification"),
|
| 67 |
-
(r"System\s+updated\s+to\s+version", "System Notification"),
|
| 68 |
-
(r"File\s+\S+\s+uploaded\s+successfully\s+by\s+user", "System Notification"),
|
| 69 |
-
(r"Disk\s+cleanup\s+completed\s+successfully", "System Notification"),
|
| 70 |
-
(r"System\s+reboot\s+initiated\s+by\s+user", "System Notification"),
|
| 71 |
-
(r"Scheduled\s+maintenance\s+(started|completed)", "System Notification"),
|
| 72 |
-
(r"Service\s+\w+\s+restarted\s+successfully", "System Notification"),
|
| 73 |
-
# NEW: cache, cron, health check, cert, log rotation
|
| 74 |
-
(r"Cache\s+cleared\s+successfully", "System Notification"),
|
| 75 |
-
(r"Log\s+rotation\s+completed", "System Notification"),
|
| 76 |
-
(r"Health\s+check\s+(passed|failed)\s+for\s+service", "System Notification"),
|
| 77 |
-
(r"Certificate\s+(renewed|expired|revoked)\s+successfully", "System Notification"),
|
| 78 |
-
(r"Cron\s+job\s+\S+\s+(executed|failed|completed)\s+successfully", "System Notification"),
|
| 79 |
-
(r"(Disk|Storage)\s+(usage|space)\s+(at|reached|exceeded)\s+\d+%", "System Notification"),
|
| 80 |
-
(r"CPU\s+usage\s+at\s+\d+%", "System Notification"),
|
| 81 |
-
(r"Memory\s+(usage|limit)\s+(at|reached|exceeded)\s+\d+%", "System Notification"),
|
| 82 |
-
# Deployment / config
|
| 83 |
-
(r"Deployment\s+(of|for)\s+\S+\s+(completed|failed|started)", "System Notification"),
|
| 84 |
-
(r"Configuration\s+(reloaded|updated|applied)\s+successfully", "System Notification"),
|
| 85 |
-
|
| 86 |
-
# ── Error ───────────────────────────────────────────────────────────────
|
| 87 |
-
(r"\bERROR\b.*\b(exception|failed|failure|crash|timeout|unavailable)\b", "Error"),
|
| 88 |
-
(r"System\s+crashed\s+due\s+to", "Error"),
|
| 89 |
-
(r"(connection|request|task|job)\s+(timed?\s*out|timeout)", "Error"),
|
| 90 |
-
(r"service\s+\S+\s+(is\s+down|unavailable|unreachable)", "Error"),
|
| 91 |
-
(r"database\s+connection\s+(failed|refused|lost|dropped)", "Error"),
|
| 92 |
-
(r"disk\s+(I/O\s+)?failure", "Error"),
|
| 93 |
-
(r"driver\s+error(s)?\s+(when|during|on)", "Error"),
|
| 94 |
-
(r"(replication|sync)\s+task\s+(did\s+not\s+complete|failed)", "Error"),
|
| 95 |
-
(r"null\s+pointer|segmentation\s+fault|stack\s+overflow", "Error"),
|
| 96 |
-
|
| 97 |
-
# ── Critical Error ──────────────────────────────────────────────────────
|
| 98 |
-
(r"\bCRITICAL\b", "Critical Error"),
|
| 99 |
-
(r"(FATAL|PANIC)\b", "Critical Error"),
|
| 100 |
-
(r"(data\s+loss|data\s+corruption)\s+(detected|occurred)", "Critical Error"),
|
| 101 |
-
(r"(cluster|node|shard)\s+(failure|crashed|went\s+down)", "Critical Error"),
|
| 102 |
-
(r"(catastrophic|unrecoverable)\s+(failure|error)", "Critical Error"),
|
| 103 |
-
(r"kernel\s+panic", "Critical Error"),
|
| 104 |
-
(r"out[\s-]of[\s-](memory|disk)\s+(error|killed|OOM)", "Critical Error"),
|
| 105 |
-
]
|
| 106 |
-
|
| 107 |
-
# Pre-compile all patterns at import time (not per-call)
|
| 108 |
-
REGEX_PATTERNS: list[tuple[re.Pattern, str]] = [
|
| 109 |
-
(re.compile(pat, re.IGNORECASE), label)
|
| 110 |
-
for pat, label in _RAW_PATTERNS
|
| 111 |
-
]
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
def classify_with_regex(log_message: str) -> Optional[str]:
|
| 115 |
-
"""
|
| 116 |
-
Tier 1: Rule-based classifier.
|
| 117 |
-
Returns category label, or None if no pattern matches.
|
| 118 |
-
Latency: sub-millisecond (patterns pre-compiled at import).
|
| 119 |
-
"""
|
| 120 |
-
for pattern, label in REGEX_PATTERNS:
|
| 121 |
-
if pattern.search(log_message):
|
| 122 |
-
return label
|
| 123 |
-
return None
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
def get_regex_coverage(log_messages: list[str]) -> dict:
|
| 127 |
-
"""Measure regex tier coverage and per-label breakdown."""
|
| 128 |
-
label_counts: dict[str, int] = {}
|
| 129 |
-
missed = 0
|
| 130 |
-
|
| 131 |
-
for msg in log_messages:
|
| 132 |
-
label = classify_with_regex(msg)
|
| 133 |
-
if label:
|
| 134 |
-
label_counts[label] = label_counts.get(label, 0) + 1
|
| 135 |
-
else:
|
| 136 |
-
missed += 1
|
| 137 |
-
|
| 138 |
-
total = len(log_messages)
|
| 139 |
-
matched = total - missed
|
| 140 |
-
|
| 141 |
-
return {
|
| 142 |
-
"total": total,
|
| 143 |
-
"matched": matched,
|
| 144 |
-
"missed": missed,
|
| 145 |
-
"coverage_pct": round(matched / total * 100, 2) if total else 0.0,
|
| 146 |
-
"label_breakdown": label_counts,
|
| 147 |
-
}
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
def benchmark_regex(log_messages: list[str], runs: int = 3) -> dict:
|
| 151 |
-
"""Measure regex tier latency (p50 / p95 / p99) over multiple runs."""
|
| 152 |
-
import statistics
|
| 153 |
-
per_log_ms: list[float] = []
|
| 154 |
-
|
| 155 |
-
for _ in range(runs):
|
| 156 |
-
for msg in log_messages:
|
| 157 |
-
t0 = time.perf_counter()
|
| 158 |
-
classify_with_regex(msg)
|
| 159 |
-
per_log_ms.append((time.perf_counter() - t0) * 1000)
|
| 160 |
-
|
| 161 |
-
per_log_ms.sort()
|
| 162 |
-
return {
|
| 163 |
-
"p50_ms": round(statistics.median(per_log_ms), 4),
|
| 164 |
-
"p95_ms": round(per_log_ms[int(len(per_log_ms) * 0.95)], 4),
|
| 165 |
-
"p99_ms": round(per_log_ms[int(len(per_log_ms) * 0.99)], 4),
|
| 166 |
-
"mean_ms": round(statistics.mean(per_log_ms), 4),
|
| 167 |
-
}
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
# ── CLI self-test ────────────────────────────────────────────────────────────
|
| 171 |
-
if __name__ == "__main__":
|
| 172 |
-
test_cases: list[tuple[str, str]] = [
|
| 173 |
-
# HTTP
|
| 174 |
-
("GET /api/v2/resource HTTP/1.1 status: 200 len: 1583 time: 0.19", "HTTP Status"),
|
| 175 |
-
("POST /v1/users HTTP/1.1 status: 201 len: 42 time: 0.05", "HTTP Status"),
|
| 176 |
-
("nova.osapi_compute.wsgi.server GET /v2/servers/detail HTTP/1.1 status: 404", "HTTP Status"),
|
| 177 |
-
# Security
|
| 178 |
-
("Multiple login failures occurred on user 6454 account", "Security Alert"),
|
| 179 |
-
("IP 192.168.133.114 blocked due to potential attack", "Security Alert"),
|
| 180 |
-
("Brute force login attempt from 10.0.0.5 detected", "Security Alert"),
|
| 181 |
-
("Admin access escalation detected for user 9429", "Security Alert"),
|
| 182 |
-
# User Action
|
| 183 |
-
("User User12345 logged in.", "User Action"),
|
| 184 |
-
("Account with ID 456 created by Admin.", "User Action"),
|
| 185 |
-
# System Notification
|
| 186 |
-
("Backup completed successfully.", "System Notification"),
|
| 187 |
-
("CPU usage at 98% for the last 10 minutes on node-7", "System Notification"),
|
| 188 |
-
("Health check passed for service payments-api", "System Notification"),
|
| 189 |
-
# Error
|
| 190 |
-
("System crashed due to disk I/O failure on node-3", "Error"),
|
| 191 |
-
("Database connection failed after 3 retries", "Error"),
|
| 192 |
-
# Critical
|
| 193 |
-
("CRITICAL: data corruption detected on shard-14", "Critical Error"),
|
| 194 |
-
("kernel panic: not syncing: VFS: unable to mount root fs", "Critical Error"),
|
| 195 |
-
# Should be None (unmatched)
|
| 196 |
-
("The 'BulkEmailSender' feature will be deprecated in v5.0.", None),
|
| 197 |
-
("Case escalation for ticket 7324 failed.", None),
|
| 198 |
-
]
|
| 199 |
-
|
| 200 |
-
correct = 0
|
| 201 |
-
print(f"{'Expected':<22} {'Got':<22} {'✓/✗'} | Log")
|
| 202 |
-
print("─" * 100)
|
| 203 |
-
for log, expected in test_cases:
|
| 204 |
-
got = classify_with_regex(log)
|
| 205 |
-
ok = got == expected
|
| 206 |
-
correct += ok
|
| 207 |
-
icon = "✓" if ok else "✗"
|
| 208 |
-
print(f"{str(expected):<22} {str(got):<22} {icon} | {log[:55]}")
|
| 209 |
-
|
| 210 |
-
print(f"\n{correct}/{len(test_cases)} correct")
|
| 211 |
-
|
| 212 |
-
# Coverage demo
|
| 213 |
-
all_logs = [log for log, _ in test_cases]
|
| 214 |
-
cov = get_regex_coverage(all_logs)
|
| 215 |
-
print(f"\nCoverage: {cov['coverage_pct']}% ({cov['matched']}/{cov['total']} matched)")
|
| 216 |
-
print("Label breakdown:", cov["label_breakdown"])
|
| 217 |
-
|
| 218 |
-
# Latency benchmark
|
| 219 |
-
lat = benchmark_regex(all_logs * 100)
|
| 220 |
-
print(f"\nLatency (p50/p95/p99): {lat['p50_ms']}ms / {lat['p95_ms']}ms / {lat['p99_ms']}ms")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
HF/test/__init__.py
DELETED
|
File without changes
|
HF/test/__pycache__/__init__.cpython-312.pyc
DELETED
|
Binary file (139 Bytes)
|
|
|
HF/test/__pycache__/test_regex.cpython-312-pytest-9.0.3.pyc
DELETED
|
Binary file (36.5 kB)
|
|
|
HF/test/test_llm.py
DELETED
|
@@ -1,197 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
tests/test_llm.py — Tests for Tier 3: LLM Classifier
|
| 3 |
-
|
| 4 |
-
Tests verify:
|
| 5 |
-
1. Cache hit avoids API call
|
| 6 |
-
2. Retry logic on transient failure
|
| 7 |
-
3. Returns "Unclassified" on all error paths (never crashes pipeline)
|
| 8 |
-
4. Response normalization handles edge cases
|
| 9 |
-
5. No HF_TOKEN → returns Unclassified gracefully
|
| 10 |
-
|
| 11 |
-
Run:
|
| 12 |
-
pytest tests/test_llm.py -v
|
| 13 |
-
"""
|
| 14 |
-
import sys, os
|
| 15 |
-
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
| 16 |
-
|
| 17 |
-
import pytest
|
| 18 |
-
from unittest.mock import patch, MagicMock, call
|
| 19 |
-
import processor_llm as llm_module
|
| 20 |
-
from processor_llm import (
|
| 21 |
-
classify_with_llm, get_cache_stats,
|
| 22 |
-
_cache_key, _cache_get, _cache_set, _normalize,
|
| 23 |
-
_RESPONSE_CACHE,
|
| 24 |
-
)
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
# ── Setup / teardown ──────────────────────────────────────────────────────────
|
| 28 |
-
@pytest.fixture(autouse=True)
|
| 29 |
-
def clear_cache():
|
| 30 |
-
"""Clear LLM cache before each test."""
|
| 31 |
-
_RESPONSE_CACHE.clear()
|
| 32 |
-
yield
|
| 33 |
-
_RESPONSE_CACHE.clear()
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
# ── Normalize ─────────────────────────────────────────────────────────────────
|
| 37 |
-
class TestNormalize:
|
| 38 |
-
def test_exact_match(self):
|
| 39 |
-
assert _normalize("Workflow Error") == "Workflow Error"
|
| 40 |
-
|
| 41 |
-
def test_case_insensitive(self):
|
| 42 |
-
assert _normalize("workflow error") == "Workflow Error"
|
| 43 |
-
|
| 44 |
-
def test_deprecation_warning(self):
|
| 45 |
-
assert _normalize("Deprecation Warning") == "Deprecation Warning"
|
| 46 |
-
|
| 47 |
-
def test_random_text_returns_unclassified(self):
|
| 48 |
-
assert _normalize("I don't know") == "Unclassified"
|
| 49 |
-
|
| 50 |
-
def test_empty_string_returns_unclassified(self):
|
| 51 |
-
assert _normalize("") == "Unclassified"
|
| 52 |
-
|
| 53 |
-
def test_partial_match(self):
|
| 54 |
-
# Model might return "Category: Workflow Error" → still should match
|
| 55 |
-
assert _normalize("Category: Workflow Error") == "Workflow Error"
|
| 56 |
-
|
| 57 |
-
def test_strips_quotes(self):
|
| 58 |
-
assert _normalize('"Deprecation Warning"') == "Deprecation Warning"
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
# ── Cache ─────────────────────────────────────────────────────────────────────
|
| 62 |
-
class TestCache:
|
| 63 |
-
def test_cache_miss_returns_none(self):
|
| 64 |
-
assert _cache_get("totally new log message xyz") is None
|
| 65 |
-
|
| 66 |
-
def test_cache_set_and_get(self):
|
| 67 |
-
log = "test log message for caching"
|
| 68 |
-
_cache_set(log, "Workflow Error")
|
| 69 |
-
assert _cache_get(log) == "Workflow Error"
|
| 70 |
-
|
| 71 |
-
def test_cache_key_is_deterministic(self):
|
| 72 |
-
log = "same log every time"
|
| 73 |
-
assert _cache_key(log) == _cache_key(log)
|
| 74 |
-
|
| 75 |
-
def test_different_logs_different_keys(self):
|
| 76 |
-
k1 = _cache_key("log message A")
|
| 77 |
-
k2 = _cache_key("log message B")
|
| 78 |
-
assert k1 != k2
|
| 79 |
-
|
| 80 |
-
def test_cache_hit_avoids_api_call(self):
|
| 81 |
-
log = "Case escalation for ticket 7324 failed."
|
| 82 |
-
_cache_set(log, "Workflow Error") # Pre-populate cache
|
| 83 |
-
|
| 84 |
-
with patch("processor_llm.InferenceClient") as mock_client:
|
| 85 |
-
result = classify_with_llm(log)
|
| 86 |
-
|
| 87 |
-
mock_client.assert_not_called()
|
| 88 |
-
assert result == "Workflow Error"
|
| 89 |
-
|
| 90 |
-
def test_cache_stats_size(self):
|
| 91 |
-
_cache_set("log1", "Workflow Error")
|
| 92 |
-
_cache_set("log2", "Deprecation Warning")
|
| 93 |
-
stats = get_cache_stats()
|
| 94 |
-
assert stats["size"] == 2
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
# ── No token ──────────────────────────────────────────────────────────────────
|
| 98 |
-
class TestNoToken:
|
| 99 |
-
def test_no_hf_token_returns_unclassified(self, monkeypatch):
|
| 100 |
-
monkeypatch.setattr(llm_module, "HF_TOKEN", None)
|
| 101 |
-
result = classify_with_llm("Case escalation for ticket 1234.")
|
| 102 |
-
assert result == "Unclassified"
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
# ── Retry logic ───────────────────────────────────────────────────────────────
|
| 106 |
-
class TestRetry:
|
| 107 |
-
def _make_mock_client(self, responses):
|
| 108 |
-
"""responses: list of (Exception | str) — raised or returned in order."""
|
| 109 |
-
call_count = [0]
|
| 110 |
-
|
| 111 |
-
def mock_create(**kwargs):
|
| 112 |
-
idx = call_count[0]
|
| 113 |
-
call_count[0] += 1
|
| 114 |
-
if isinstance(responses[idx], Exception):
|
| 115 |
-
raise responses[idx]
|
| 116 |
-
mock_resp = MagicMock()
|
| 117 |
-
mock_resp.choices[0].message.content = responses[idx]
|
| 118 |
-
return mock_resp
|
| 119 |
-
|
| 120 |
-
mock_client = MagicMock()
|
| 121 |
-
mock_client.chat.completions.create.side_effect = mock_create
|
| 122 |
-
return mock_client
|
| 123 |
-
|
| 124 |
-
def test_success_on_first_try(self, monkeypatch):
|
| 125 |
-
monkeypatch.setattr(llm_module, "HF_TOKEN", "fake-token")
|
| 126 |
-
monkeypatch.setattr(llm_module, "RETRY_DELAY_SEC", 0) # no sleep
|
| 127 |
-
|
| 128 |
-
client = self._make_mock_client(["Workflow Error"])
|
| 129 |
-
|
| 130 |
-
with patch("processor_llm.InferenceClient", return_value=client):
|
| 131 |
-
result = classify_with_llm("Case escalation for ticket 7324.")
|
| 132 |
-
|
| 133 |
-
assert result == "Workflow Error"
|
| 134 |
-
assert client.chat.completions.create.call_count == 1
|
| 135 |
-
|
| 136 |
-
def test_retry_on_transient_failure(self, monkeypatch):
|
| 137 |
-
monkeypatch.setattr(llm_module, "HF_TOKEN", "fake-token")
|
| 138 |
-
monkeypatch.setattr(llm_module, "RETRY_DELAY_SEC", 0)
|
| 139 |
-
monkeypatch.setattr(llm_module, "MAX_RETRIES", 2)
|
| 140 |
-
|
| 141 |
-
# Fail once, succeed on second attempt
|
| 142 |
-
client = self._make_mock_client([
|
| 143 |
-
ConnectionError("timeout"),
|
| 144 |
-
"Deprecation Warning",
|
| 145 |
-
])
|
| 146 |
-
|
| 147 |
-
with patch("processor_llm.InferenceClient", return_value=client), \
|
| 148 |
-
patch("processor_llm.time.sleep"): # skip actual sleep
|
| 149 |
-
result = classify_with_llm("Module will be retired in v4.")
|
| 150 |
-
|
| 151 |
-
assert result == "Deprecation Warning"
|
| 152 |
-
assert client.chat.completions.create.call_count == 2
|
| 153 |
-
|
| 154 |
-
def test_all_retries_exhausted_returns_unclassified(self, monkeypatch):
|
| 155 |
-
monkeypatch.setattr(llm_module, "HF_TOKEN", "fake-token")
|
| 156 |
-
monkeypatch.setattr(llm_module, "RETRY_DELAY_SEC", 0)
|
| 157 |
-
monkeypatch.setattr(llm_module, "MAX_RETRIES", 2)
|
| 158 |
-
|
| 159 |
-
client = self._make_mock_client([
|
| 160 |
-
ConnectionError("timeout"),
|
| 161 |
-
ConnectionError("timeout"),
|
| 162 |
-
ConnectionError("timeout"),
|
| 163 |
-
])
|
| 164 |
-
|
| 165 |
-
with patch("processor_llm.InferenceClient", return_value=client), \
|
| 166 |
-
patch("processor_llm.time.sleep"):
|
| 167 |
-
result = classify_with_llm("Something that keeps failing.")
|
| 168 |
-
|
| 169 |
-
assert result == "Unclassified"
|
| 170 |
-
assert client.chat.completions.create.call_count == 3 # 1 initial + 2 retries
|
| 171 |
-
|
| 172 |
-
def test_successful_result_gets_cached(self, monkeypatch):
|
| 173 |
-
monkeypatch.setattr(llm_module, "HF_TOKEN", "fake-token")
|
| 174 |
-
monkeypatch.setattr(llm_module, "RETRY_DELAY_SEC", 0)
|
| 175 |
-
|
| 176 |
-
client = self._make_mock_client(["Workflow Error"])
|
| 177 |
-
|
| 178 |
-
log = "Case escalation for unique ticket 99999."
|
| 179 |
-
with patch("processor_llm.InferenceClient", return_value=client):
|
| 180 |
-
result = classify_with_llm(log)
|
| 181 |
-
|
| 182 |
-
assert result == "Workflow Error"
|
| 183 |
-
# Should now be in cache
|
| 184 |
-
assert _cache_get(log) == "Workflow Error"
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
# ── Pipeline safety ───────────────────────────────────────────────────────────
|
| 188 |
-
class TestPipelineSafety:
|
| 189 |
-
def test_classify_never_raises(self, monkeypatch):
|
| 190 |
-
"""LLM failures must NEVER propagate as exceptions to the pipeline."""
|
| 191 |
-
monkeypatch.setattr(llm_module, "HF_TOKEN", "fake-token")
|
| 192 |
-
monkeypatch.setattr(llm_module, "RETRY_DELAY_SEC", 0)
|
| 193 |
-
|
| 194 |
-
with patch("processor_llm.InferenceClient", side_effect=RuntimeError("catastrophic")):
|
| 195 |
-
result = classify_with_llm("Any log message here.")
|
| 196 |
-
|
| 197 |
-
assert result == "Unclassified" # Never raises, always returns string
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
HF/test/test_regex.py
DELETED
|
@@ -1,222 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
tests/test_regex.py — Unit tests for Tier 1: Regex Classifier
|
| 3 |
-
|
| 4 |
-
Tests verify:
|
| 5 |
-
1. Every pattern category has positive matches
|
| 6 |
-
2. No false positives on known non-matching logs
|
| 7 |
-
3. Pattern order doesn't cause mis-labeling
|
| 8 |
-
4. Coverage improvement (should be > 35% on balanced test set)
|
| 9 |
-
|
| 10 |
-
Run:
|
| 11 |
-
pytest tests/ -v
|
| 12 |
-
pytest tests/test_regex.py -v --tb=short
|
| 13 |
-
"""
|
| 14 |
-
import sys, os
|
| 15 |
-
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
| 16 |
-
|
| 17 |
-
import pytest
|
| 18 |
-
from processor_regex import classify_with_regex, get_regex_coverage
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
# ── Positive cases: must match and return correct label ───────────────────────
|
| 22 |
-
class TestHTTPStatus:
|
| 23 |
-
def test_get_request(self):
|
| 24 |
-
assert classify_with_regex("GET /api/v2/resource HTTP/1.1 status: 200 len: 100 time: 0.1") == "HTTP Status"
|
| 25 |
-
|
| 26 |
-
def test_post_request(self):
|
| 27 |
-
assert classify_with_regex("POST /v1/users HTTP/1.1 status: 201 len: 42 time: 0.05") == "HTTP Status"
|
| 28 |
-
|
| 29 |
-
def test_delete_request(self):
|
| 30 |
-
assert classify_with_regex("DELETE /v1/users/123 HTTP/1.1 status: 204 len: 0 time: 0.02") == "HTTP Status"
|
| 31 |
-
|
| 32 |
-
def test_nova_style(self):
|
| 33 |
-
assert classify_with_regex(
|
| 34 |
-
"nova.osapi_compute.wsgi.server GET /v2/servers/detail HTTP/1.1 status: 404 len: 1583 time: 0.19"
|
| 35 |
-
) == "HTTP Status"
|
| 36 |
-
|
| 37 |
-
def test_status_code_only(self):
|
| 38 |
-
assert classify_with_regex("API call /invoices returned HTTP 500 in 2.1s") == "HTTP Status"
|
| 39 |
-
|
| 40 |
-
def test_patch_request(self):
|
| 41 |
-
assert classify_with_regex("PATCH /v1/config HTTP/1.1 status: 200 len: 55 time: 0.04") == "HTTP Status"
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
class TestSecurityAlert:
|
| 45 |
-
def test_login_failures(self):
|
| 46 |
-
assert classify_with_regex("Multiple login failures occurred on user 6454 account") == "Security Alert"
|
| 47 |
-
|
| 48 |
-
def test_ip_blocked(self):
|
| 49 |
-
assert classify_with_regex("IP 192.168.133.114 blocked due to potential attack") == "Security Alert"
|
| 50 |
-
|
| 51 |
-
def test_brute_force(self):
|
| 52 |
-
assert classify_with_regex("Alert: brute force login attempt from 10.0.0.5 detected") == "Security Alert"
|
| 53 |
-
|
| 54 |
-
def test_admin_escalation(self):
|
| 55 |
-
assert classify_with_regex("Admin access escalation detected for user 9429") == "Security Alert"
|
| 56 |
-
|
| 57 |
-
def test_privilege_elevation(self):
|
| 58 |
-
assert classify_with_regex("Privilege elevation detected for user Admin99") == "Security Alert"
|
| 59 |
-
|
| 60 |
-
def test_ddos(self):
|
| 61 |
-
assert classify_with_regex("Potential DDoS attack from 1.2.3.4 detected") == "Security Alert"
|
| 62 |
-
|
| 63 |
-
def test_suspicious_activity(self):
|
| 64 |
-
assert classify_with_regex("Suspicious login activity detected from 203.0.113.1") == "Security Alert"
|
| 65 |
-
|
| 66 |
-
def test_unauthorized_access(self):
|
| 67 |
-
assert classify_with_regex("Unauthorized access to data was attempted by User123") == "Security Alert"
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
class TestUserAction:
|
| 71 |
-
def test_login(self):
|
| 72 |
-
assert classify_with_regex("User User12345 logged in.") == "User Action"
|
| 73 |
-
|
| 74 |
-
def test_logout(self):
|
| 75 |
-
assert classify_with_regex("User User99 logged out.") == "User Action"
|
| 76 |
-
|
| 77 |
-
def test_account_created(self):
|
| 78 |
-
assert classify_with_regex("Account with ID 456 created by Admin.") == "User Action"
|
| 79 |
-
|
| 80 |
-
def test_password_changed(self):
|
| 81 |
-
assert classify_with_regex("User User42 changed password successfully.") == "User Action"
|
| 82 |
-
|
| 83 |
-
def test_new_user_registered(self):
|
| 84 |
-
assert classify_with_regex("New user User9999 registered with email u@e.com.") == "User Action"
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
class TestSystemNotification:
|
| 88 |
-
def test_backup_completed(self):
|
| 89 |
-
assert classify_with_regex("Backup completed successfully.") == "System Notification"
|
| 90 |
-
|
| 91 |
-
def test_backup_started(self):
|
| 92 |
-
assert classify_with_regex("Backup started at 2024-01-14 03:00:00.") == "System Notification"
|
| 93 |
-
|
| 94 |
-
def test_system_updated(self):
|
| 95 |
-
assert classify_with_regex("System updated to version 4.2.1.") == "System Notification"
|
| 96 |
-
|
| 97 |
-
def test_disk_cleanup(self):
|
| 98 |
-
assert classify_with_regex("Disk cleanup completed successfully.") == "System Notification"
|
| 99 |
-
|
| 100 |
-
def test_service_restarted(self):
|
| 101 |
-
assert classify_with_regex("Service payments restarted successfully.") == "System Notification"
|
| 102 |
-
|
| 103 |
-
def test_cpu_usage(self):
|
| 104 |
-
assert classify_with_regex("CPU usage at 98% for the last 10 minutes on node-7") == "System Notification"
|
| 105 |
-
|
| 106 |
-
def test_health_check_passed(self):
|
| 107 |
-
assert classify_with_regex("Health check passed for service auth-api") == "System Notification"
|
| 108 |
-
|
| 109 |
-
def test_cron_executed(self):
|
| 110 |
-
assert classify_with_regex("Cron job cleanup-tokens executed successfully.") == "System Notification"
|
| 111 |
-
|
| 112 |
-
def test_certificate_renewed(self):
|
| 113 |
-
assert classify_with_regex("Certificate renewed successfully for domain api.example.com") == "System Notification"
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
class TestError:
|
| 117 |
-
def test_system_crashed(self):
|
| 118 |
-
assert classify_with_regex("System crashed due to disk I/O failure on node-3") == "Error"
|
| 119 |
-
|
| 120 |
-
def test_db_connection_failed(self):
|
| 121 |
-
assert classify_with_regex("Database connection failed after 3 retries") == "Error"
|
| 122 |
-
|
| 123 |
-
def test_service_down(self):
|
| 124 |
-
assert classify_with_regex("Service payments-api is down") == "Error"
|
| 125 |
-
|
| 126 |
-
def test_request_timeout(self):
|
| 127 |
-
assert classify_with_regex("Connection timed out after 30s on shard-7") == "Error"
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
class TestCriticalError:
|
| 131 |
-
def test_critical_prefix(self):
|
| 132 |
-
assert classify_with_regex("CRITICAL: data corruption detected on shard-14") == "Critical Error"
|
| 133 |
-
|
| 134 |
-
def test_fatal(self):
|
| 135 |
-
assert classify_with_regex("FATAL: kernel panic — system halted") == "Critical Error"
|
| 136 |
-
|
| 137 |
-
def test_data_loss(self):
|
| 138 |
-
assert classify_with_regex("data loss detected during write to replica-3") == "Critical Error"
|
| 139 |
-
|
| 140 |
-
def test_oom(self):
|
| 141 |
-
assert classify_with_regex("out-of-memory error: process killed (OOM)") == "Critical Error"
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
# ── Negative cases: must return None (don't mis-classify) ────────────────────
|
| 145 |
-
class TestNegativeCases:
|
| 146 |
-
@pytest.mark.parametrize("log", [
|
| 147 |
-
"Case escalation for ticket ID 7324 failed because the assigned support agent is no longer active.",
|
| 148 |
-
"The 'ReportGenerator' module will be retired in version 4.0.",
|
| 149 |
-
"The 'BulkEmailSender' feature will be deprecated in v5.0.",
|
| 150 |
-
"Invoice generation aborted for order ID 8910 due to invalid tax calculation module.",
|
| 151 |
-
"Hey bro chill ya!",
|
| 152 |
-
])
|
| 153 |
-
def test_no_false_positives(self, log):
|
| 154 |
-
result = classify_with_regex(log)
|
| 155 |
-
assert result is None, f"Expected None but got '{result}' for: {log[:80]}"
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
# ── Coverage test ─────────────────────────────────────────────────────────────
|
| 159 |
-
class TestCoverage:
|
| 160 |
-
BALANCED_SAMPLE = [
|
| 161 |
-
# HTTP (6)
|
| 162 |
-
"GET /api/v2/resource HTTP/1.1 status: 200 len: 100 time: 0.1",
|
| 163 |
-
"POST /v1/users HTTP/1.1 status: 201 len: 42 time: 0.05",
|
| 164 |
-
"nova.osapi_compute.wsgi.server GET /v2/servers/detail HTTP/1.1 status: 200",
|
| 165 |
-
"DELETE /v1/items/99 HTTP/1.1 status: 204 len: 0 time: 0.01",
|
| 166 |
-
"PATCH /v1/config HTTP/1.1 status: 200 len: 55 time: 0.04",
|
| 167 |
-
"API call /invoices returned HTTP 500 in 2.1s",
|
| 168 |
-
# Security (6)
|
| 169 |
-
"Multiple login failures occurred on user 6454 account",
|
| 170 |
-
"IP 10.0.0.5 blocked due to potential attack",
|
| 171 |
-
"Brute force login attempt from 192.168.1.1 detected",
|
| 172 |
-
"Admin access escalation detected for user 9429",
|
| 173 |
-
"Suspicious login activity detected from 1.2.3.4",
|
| 174 |
-
"Potential DDoS attack from 203.0.113.1 detected",
|
| 175 |
-
# User Action (5)
|
| 176 |
-
"User User12345 logged in.",
|
| 177 |
-
"User User99 logged out.",
|
| 178 |
-
"Account with ID 456 created by Admin.",
|
| 179 |
-
"User User42 changed password successfully.",
|
| 180 |
-
"New user User9999 registered with email u@e.com.",
|
| 181 |
-
# System Notification (5)
|
| 182 |
-
"Backup completed successfully.",
|
| 183 |
-
"System updated to version 4.2.1.",
|
| 184 |
-
"Disk cleanup completed successfully.",
|
| 185 |
-
"CPU usage at 98% for the last 10 minutes on node-7",
|
| 186 |
-
"Cron job cleanup-tokens executed successfully.",
|
| 187 |
-
# Error (4)
|
| 188 |
-
"System crashed due to disk I/O failure on node-3",
|
| 189 |
-
"Database connection failed after 3 retries",
|
| 190 |
-
"Service auth-api is down",
|
| 191 |
-
"Connection timed out after 30s",
|
| 192 |
-
# Critical (3)
|
| 193 |
-
"CRITICAL: data corruption detected on shard-14",
|
| 194 |
-
"FATAL: kernel panic — system halted",
|
| 195 |
-
"data loss detected during write to replica-3",
|
| 196 |
-
# LegacyCRM / unmatched (5) → should NOT match
|
| 197 |
-
"Case escalation for ticket ID 7324 failed.",
|
| 198 |
-
"The 'BulkEmailSender' feature will be deprecated in v5.0.",
|
| 199 |
-
"Invoice generation aborted for order ID 8910.",
|
| 200 |
-
"Workflow stalled at approval step 3 for case 9021.",
|
| 201 |
-
"SLA breach detected for case ID 7701 (P1 4h breach).",
|
| 202 |
-
]
|
| 203 |
-
|
| 204 |
-
def test_coverage_above_35_percent(self):
|
| 205 |
-
result = get_regex_coverage(self.BALANCED_SAMPLE)
|
| 206 |
-
pct = result["coverage_pct"]
|
| 207 |
-
# 29 of 34 logs should match regex (29/34 = 85%)
|
| 208 |
-
# 5 LegacyCRM logs should NOT match → ~85% expected
|
| 209 |
-
assert pct >= 35.0, (
|
| 210 |
-
f"Regex coverage {pct}% is below 35% minimum. "
|
| 211 |
-
f"Check pattern additions in processor_regex.py"
|
| 212 |
-
)
|
| 213 |
-
|
| 214 |
-
def test_no_false_positive_on_legacy_logs(self):
|
| 215 |
-
legacy_logs = [
|
| 216 |
-
"Case escalation for ticket ID 7324 failed.",
|
| 217 |
-
"The 'BulkEmailSender' feature will be deprecated in v5.0.",
|
| 218 |
-
"Invoice generation aborted for order ID 8910.",
|
| 219 |
-
]
|
| 220 |
-
for log in legacy_logs:
|
| 221 |
-
result = classify_with_regex(log)
|
| 222 |
-
assert result is None, f"False positive: '{result}' on legacy log: {log}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
HF/test/test_routing.py
DELETED
|
@@ -1,179 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
tests/test_routing.py — Pipeline Routing Tests
|
| 3 |
-
|
| 4 |
-
Tests verify:
|
| 5 |
-
1. LegacyCRM source → LLM tier (always)
|
| 6 |
-
2. Regex match → Regex tier (never reaches BERT)
|
| 7 |
-
3. High-confidence BERT → BERT tier
|
| 8 |
-
4. Unclassified BERT → LLM fallback tier
|
| 9 |
-
5. Result schema is complete (all keys present)
|
| 10 |
-
|
| 11 |
-
Run:
|
| 12 |
-
pytest tests/test_routing.py -v
|
| 13 |
-
"""
|
| 14 |
-
import sys, os
|
| 15 |
-
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
| 16 |
-
|
| 17 |
-
import pytest
|
| 18 |
-
from unittest.mock import patch, MagicMock
|
| 19 |
-
from classify import classify_log, classify_logs, pipeline_summary
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
# ── Fixtures ──────────────────────────────────────────────────────────────────
|
| 23 |
-
REGEX_HIT_LOG = ("ModernCRM", "User User123 logged in.")
|
| 24 |
-
REGEX_HIT_LOG2 = ("BillingSystem", "GET /api/v1/invoices HTTP/1.1 status: 200 len: 100 time: 0.1")
|
| 25 |
-
LEGACY_LOG = ("LegacyCRM", "Case escalation for ticket 9021 failed.")
|
| 26 |
-
NON_REGEX_LOG = ("ModernHR", "The inventory sync completed without matching standard patterns.")
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
# ── Schema completeness ───────────────────────────────────────────────────────
|
| 30 |
-
class TestResultSchema:
|
| 31 |
-
def test_classify_log_has_required_keys(self):
|
| 32 |
-
with patch("classify.bert_batch", return_value=[("Error", 0.95)]):
|
| 33 |
-
result = classify_log(*NON_REGEX_LOG)
|
| 34 |
-
assert "label" in result
|
| 35 |
-
assert "tier" in result
|
| 36 |
-
assert "confidence" in result
|
| 37 |
-
assert "latency_ms" in result
|
| 38 |
-
|
| 39 |
-
def test_latency_ms_is_positive(self):
|
| 40 |
-
result = classify_log(*REGEX_HIT_LOG)
|
| 41 |
-
assert result["latency_ms"] > 0
|
| 42 |
-
|
| 43 |
-
def test_confidence_is_float_or_none(self):
|
| 44 |
-
result = classify_log(*REGEX_HIT_LOG)
|
| 45 |
-
assert result["confidence"] is None or isinstance(result["confidence"], float)
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
# ── Regex tier routing ─────────────────────────────────────────────────────────
|
| 49 |
-
class TestRegexRouting:
|
| 50 |
-
def test_regex_match_returns_regex_tier(self):
|
| 51 |
-
result = classify_log(*REGEX_HIT_LOG)
|
| 52 |
-
assert result["tier"] == "Regex"
|
| 53 |
-
|
| 54 |
-
def test_regex_match_has_full_confidence(self):
|
| 55 |
-
result = classify_log(*REGEX_HIT_LOG)
|
| 56 |
-
assert result["confidence"] == 1.0
|
| 57 |
-
|
| 58 |
-
def test_regex_match_http_log(self):
|
| 59 |
-
result = classify_log(*REGEX_HIT_LOG2)
|
| 60 |
-
assert result["tier"] == "Regex"
|
| 61 |
-
assert result["label"] == "HTTP Status"
|
| 62 |
-
|
| 63 |
-
def test_regex_match_skips_bert(self):
|
| 64 |
-
"""If regex matches, bert_batch should never be called."""
|
| 65 |
-
with patch("classify.bert_batch") as mock_bert:
|
| 66 |
-
classify_log(*REGEX_HIT_LOG)
|
| 67 |
-
mock_bert.assert_not_called()
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
# ── LegacyCRM routing ─────────────────────────────────────────────────────────
|
| 71 |
-
class TestLegacyCRMRouting:
|
| 72 |
-
def test_legacy_crm_goes_to_llm(self):
|
| 73 |
-
with patch("classify.classify_with_llm", return_value="Workflow Error") as mock_llm:
|
| 74 |
-
result = classify_log(*LEGACY_LOG)
|
| 75 |
-
assert result["tier"] == "LLM"
|
| 76 |
-
mock_llm.assert_called_once()
|
| 77 |
-
|
| 78 |
-
def test_legacy_crm_skips_regex(self):
|
| 79 |
-
"""LegacyCRM should skip regex entirely — go straight to LLM."""
|
| 80 |
-
with patch("classify.classify_with_regex") as mock_regex, \
|
| 81 |
-
patch("classify.classify_with_llm", return_value="Workflow Error"):
|
| 82 |
-
classify_log(*LEGACY_LOG)
|
| 83 |
-
mock_regex.assert_not_called()
|
| 84 |
-
|
| 85 |
-
def test_legacy_crm_skips_bert(self):
|
| 86 |
-
with patch("classify.bert_batch") as mock_bert, \
|
| 87 |
-
patch("classify.classify_with_llm", return_value="Workflow Error"):
|
| 88 |
-
classify_log(*LEGACY_LOG)
|
| 89 |
-
mock_bert.assert_not_called()
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
# ── BERT routing ──────────────────────────────────────────────────────────────
|
| 93 |
-
class TestBERTRouting:
|
| 94 |
-
def test_high_confidence_bert_stays_bert(self):
|
| 95 |
-
with patch("classify.bert_batch", return_value=[("Security Alert", 0.95)]):
|
| 96 |
-
result = classify_log(*NON_REGEX_LOG)
|
| 97 |
-
assert result["tier"] == "BERT"
|
| 98 |
-
assert result["label"] == "Security Alert"
|
| 99 |
-
assert result["confidence"] == pytest.approx(0.95)
|
| 100 |
-
|
| 101 |
-
def test_low_confidence_bert_falls_back_to_llm(self):
|
| 102 |
-
"""BERT returning 'Unclassified' should escalate to LLM."""
|
| 103 |
-
with patch("classify.bert_batch", return_value=[("Unclassified", 0.20)]), \
|
| 104 |
-
patch("classify.classify_with_llm", return_value="Workflow Error") as mock_llm:
|
| 105 |
-
result = classify_log(*NON_REGEX_LOG)
|
| 106 |
-
assert "LLM" in result["tier"]
|
| 107 |
-
mock_llm.assert_called_once()
|
| 108 |
-
|
| 109 |
-
def test_bert_batch_called_for_non_regex_log(self):
|
| 110 |
-
with patch("classify.bert_batch", return_value=[("Error", 0.88)]) as mock_bert:
|
| 111 |
-
classify_log(*NON_REGEX_LOG)
|
| 112 |
-
mock_bert.assert_called_once()
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
# ── Batch routing ──────────────────────────────────────────────────────────────
|
| 116 |
-
class TestBatchRouting:
|
| 117 |
-
def test_batch_returns_correct_length(self):
|
| 118 |
-
logs = [REGEX_HIT_LOG, REGEX_HIT_LOG2, LEGACY_LOG]
|
| 119 |
-
with patch("classify.classify_with_llm", return_value="Workflow Error"):
|
| 120 |
-
results = classify_logs(logs)
|
| 121 |
-
assert len(results) == len(logs)
|
| 122 |
-
|
| 123 |
-
def test_batch_mixed_tiers(self):
|
| 124 |
-
logs = [
|
| 125 |
-
REGEX_HIT_LOG, # → Regex
|
| 126 |
-
("ModernCRM", "GET /api HTTP/1.1 status: 200"), # → Regex (HTTP)
|
| 127 |
-
LEGACY_LOG, # → LLM
|
| 128 |
-
]
|
| 129 |
-
with patch("classify.classify_with_llm", return_value="Workflow Error"):
|
| 130 |
-
results = classify_logs(logs)
|
| 131 |
-
|
| 132 |
-
assert results[0]["tier"] == "Regex"
|
| 133 |
-
assert results[1]["tier"] == "Regex"
|
| 134 |
-
assert results[2]["tier"] == "LLM"
|
| 135 |
-
|
| 136 |
-
def test_pipeline_summary_structure(self):
|
| 137 |
-
logs = [REGEX_HIT_LOG, REGEX_HIT_LOG2]
|
| 138 |
-
results = classify_logs(logs)
|
| 139 |
-
summary = pipeline_summary(results)
|
| 140 |
-
|
| 141 |
-
assert "total" in summary
|
| 142 |
-
assert "tier_stats" in summary
|
| 143 |
-
assert "label_counts" in summary
|
| 144 |
-
assert summary["total"] == 2
|
| 145 |
-
|
| 146 |
-
def test_pipeline_summary_tier_pcts_sum_to_100(self):
|
| 147 |
-
logs = [REGEX_HIT_LOG, REGEX_HIT_LOG2, REGEX_HIT_LOG]
|
| 148 |
-
results = classify_logs(logs)
|
| 149 |
-
summary = pipeline_summary(results)
|
| 150 |
-
total_pct = sum(s["pct"] for s in summary["tier_stats"].values())
|
| 151 |
-
assert abs(total_pct - 100.0) < 1.0, f"Tier pcts don't sum to 100: {total_pct}"
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
# ── Edge cases ────────────────────────────────────────────────────────────────
|
| 155 |
-
class TestEdgeCases:
|
| 156 |
-
def test_empty_batch_returns_empty(self):
|
| 157 |
-
results = classify_logs([])
|
| 158 |
-
assert results == []
|
| 159 |
-
|
| 160 |
-
def test_single_log_batch(self):
|
| 161 |
-
with patch("classify.bert_batch", return_value=[("Error", 0.85)]):
|
| 162 |
-
results = classify_logs([NON_REGEX_LOG])
|
| 163 |
-
assert len(results) == 1
|
| 164 |
-
|
| 165 |
-
def test_all_regex_batch_never_calls_bert(self):
|
| 166 |
-
logs = [REGEX_HIT_LOG] * 10
|
| 167 |
-
with patch("classify.bert_batch") as mock_bert:
|
| 168 |
-
classify_logs(logs)
|
| 169 |
-
mock_bert.assert_not_called()
|
| 170 |
-
|
| 171 |
-
def test_llm_failure_returns_unclassified(self):
|
| 172 |
-
"""LLM crashing should return Unclassified, not raise."""
|
| 173 |
-
with patch("classify.classify_with_llm", side_effect=Exception("LLM down")):
|
| 174 |
-
try:
|
| 175 |
-
result = classify_log(*LEGACY_LOG)
|
| 176 |
-
# If it doesn't raise, Unclassified should be label
|
| 177 |
-
assert result["label"] == "Unclassified"
|
| 178 |
-
except Exception:
|
| 179 |
-
pytest.fail("classify_log raised an exception — should have returned Unclassified")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|