Spaces:

NOT-OMEGA
/

LogAI-Engine

Sleeping

App Files Files Community

NOT-OMEGA commited on Apr 14

Commit

238ab41

verified ·

1 Parent(s): 0a86214

Delete HF

Browse files

Files changed (17) hide show

HF/app_gradio.py +0 -187
HF/benchmark.py +0 -214
HF/error_analysis.py +0 -250
HF/models/log_classifier.joblib +0 -3
HF/onnx_model/config.json +0 -24
HF/onnx_model/special_tokens_map.json +0 -37
HF/onnx_model/tokenizer.json +0 -0
HF/onnx_model/tokenizer_config.json +0 -65
HF/onnx_model/vocab.txt +0 -0
HF/processor_llm.py +0 -192
HF/processor_regex.py +0 -220
HF/test/__init__.py +0 -0
HF/test/__pycache__/__init__.cpython-312.pyc +0 -0
HF/test/__pycache__/test_regex.cpython-312-pytest-9.0.3.pyc +0 -0
HF/test/test_llm.py +0 -197
HF/test/test_regex.py +0 -222
HF/test/test_routing.py +0 -179

HF/app_gradio.py DELETED Viewed

@@ -1,187 +0,0 @@
-"""
-Log Classification System — HuggingFace Spaces
-Gradio UI for the 3-tier hybrid log classification pipeline.
-"""
-from __future__ import annotations
-import io
-import time
-import pandas as pd
-import gradio as gr
-from classify import classify_log, classify_csv
-# ── Source options ──────────────────────────────────────────────────────────
-SOURCES = [
-    "ModernCRM",
-    "ModernHR",
-    "BillingSystem",
-    "AnalyticsEngine",
-    "ThirdPartyAPI",
-    "LegacyCRM",
-]
-TIER_COLORS = {
-    "Regex":        "🟢",
-    "BERT":         "🔵",
-    "LLM":          "🟡",
-    "LLM (fallback)": "🟠",
-}
-EXAMPLE_LOGS = [
-    ["ModernCRM",       "User User12345 logged in."],
-    ["ModernHR",        "Multiple login failures occurred on user 6454 account"],
-    ["BillingSystem",   "GET /v2/servers/detail HTTP/1.1 status: 200 len: 1583 time: 0.19"],
-    ["AnalyticsEngine", "System crashed due to disk I/O failure on node-3"],
-    ["LegacyCRM",       "Case escalation for ticket ID 7324 failed — support agent is no longer active."],
-    ["LegacyCRM",       "The 'BulkEmailSender' feature will be deprecated in v5.0. Use 'EmailCampaignManager'."],
-]
-# ── Single log tab ──────────────────────────────────────────────────────────
-def classify_single(source: str, log_message: str):
-    if not log_message.strip():
-        return "—", "—", "—", "—"
-    t0 = time.perf_counter()
-    result = classify_log(source, log_message)
-    latency_ms = (time.perf_counter() - t0) * 1000
-    label      = result["label"]
-    tier       = result["tier"]
-    confidence = f"{result['confidence']:.1%}" if result["confidence"] is not None else "N/A"
-    icon       = TIER_COLORS.get(tier, "⚪")
-    return (
-        label,
-        f"{icon} {tier}",
-        confidence,
-        f"{latency_ms:.1f} ms",
-    )
-# ── Batch CSV tab ───────────────────────────────────────────────────────────
-def classify_batch(file):
-    if file is None:
-        return None, "⚠️ Please upload a CSV file."
-    try:
-        output_path, df = classify_csv(file.name, "/tmp/classified_output.csv")
-    except ValueError as e:
-        return None, f"⚠️ {e}"
-    except Exception as e:
-        return None, f"❌ Error: {e}"
-    total = len(df)
-    tier_counts  = df["tier_used"].value_counts().to_dict()
-    label_counts = df["predicted_label"].value_counts().to_dict()
-    tier_lines  = "\n".join(f"  {TIER_COLORS.get(k,'⚪')} {k}: {v} ({v/total:.0%})" for k, v in tier_counts.items())
-    label_lines = "\n".join(f"  • {k}: {v}" for k, v in label_counts.items())
-    stats = (
-        f"✅ Classified {total} logs\n\n"
-        f"📊 Tier breakdown:\n{tier_lines}\n\n"
-        f"🏷️ Label distribution:\n{label_lines}"
-    )
-    return output_path, stats
-# ── UI ──────────────────────────────────────────────────────────────────────
-with gr.Blocks(title="Log Classification System", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("""
-# 🔍 Log Classification System
-**3-tier hybrid pipeline** → 🟢 Regex · 🔵 BERT + LogReg · 🟡 LLM
-Built to mimic production enterprise log monitoring architecture.
-""")
-    with gr.Tabs():
-        # ── Tab 1: Single Log ────────────────────────────────────────────
-        with gr.Tab("Single Log"):
-            with gr.Row():
-                source_input = gr.Dropdown(
-                    choices=SOURCES,
-                    value="ModernCRM",
-                    label="Source System",
-                )
-                log_input = gr.Textbox(
-                    label="Log Message",
-                    placeholder="Paste a log message here...",
-                    lines=3,
-                )
-            classify_btn = gr.Button("Classify", variant="primary")
-            with gr.Row():
-                label_out      = gr.Textbox(label="🏷️ Predicted Label",     interactive=False)
-                tier_out       = gr.Textbox(label="⚙️ Tier Used",           interactive=False)
-                confidence_out = gr.Textbox(label="📈 Confidence",          interactive=False)
-                latency_out    = gr.Textbox(label="⏱️ Latency",             interactive=False)
-            classify_btn.click(
-                fn=classify_single,
-                inputs=[source_input, log_input],
-                outputs=[label_out, tier_out, confidence_out, latency_out],
-            )
-            gr.Examples(
-                examples=EXAMPLE_LOGS,
-                inputs=[source_input, log_input],
-                label="📋 Example Logs (click to try)",
-            )
-        # ── Tab 2: Batch CSV ─────────────────────────────────────────────
-        with gr.Tab("Batch CSV Upload"):
-            gr.Markdown("""
-Upload a CSV with columns: **`source`**, **`log_message`**
-Download the classified CSV with added columns: `predicted_label`, `tier_used`, `confidence`.
-""")
-            with gr.Row():
-                with gr.Column():
-                    csv_input  = gr.File(label="📂 Upload CSV", file_types=[".csv"])
-                    batch_btn  = gr.Button("Classify All", variant="primary")
-                with gr.Column():
-                    csv_output = gr.File(label="📥 Download Classified CSV")
-                    stats_out  = gr.Textbox(label="📊 Stats", lines=12, interactive=False)
-            batch_btn.click(
-                fn=classify_batch,
-                inputs=[csv_input],
-                outputs=[csv_output, stats_out],
-            )
-            gr.Markdown("""
-**Sample CSV format:**
-```
-source,log_message
-ModernCRM,User User123 logged in.
-LegacyCRM,Case escalation for ticket ID 7324 failed.
-BillingSystem,GET /api/v2/invoice HTTP/1.1 status: 500
-```
-""")
-        # ── Tab 3: Architecture ──────────────────────────────────────────
-        with gr.Tab("Architecture"):
-            gr.Markdown("""
-## 🏗️ 3-Tier Hybrid Pipeline
-| Tier | Method | Coverage | Latency | When Used |
-|------|--------|----------|---------|-----------|
-| 🟢 Regex | Python `re` patterns | ~21% | < 1ms | Fixed patterns (login, backup, etc.) |
-| 🔵 BERT | `all-MiniLM-L6-v2` + LogReg | ~79% | 20–80ms | High-volume categories with 150+ samples |
-| 🟡 LLM | HuggingFace Inference API | ~0.3% | 500–2000ms | LegacyCRM logs, rare patterns |
-## 📊 Model Performance (from training)
-- **BERT + LogReg** trained on 2,410 synthetic enterprise logs
-- **Confidence threshold**: 0.5 (below → escalate to LLM)
-- **Source-aware routing**: `LegacyCRM` bypasses ML entirely (only 7 training samples)
-## 🔑 Environment Variables
-| Secret | Required For |
-|--------|-------------|
-| `HF_TOKEN` | LLM inference (LegacyCRM logs) |
-""")
-if __name__ == "__main__":
-    demo.launch()

HF/benchmark.py DELETED Viewed

@@ -1,214 +0,0 @@
-"""
-benchmark.py — Full Benchmark Harness
-Outputs a CSV with columns:
-  batch_size, mode, throughput_logs_sec, p50_ms, p95_ms, p99_ms, cpu_pct, ram_mb, tier_regex_pct, tier_bert_pct, tier_llm_pct
-Usage:
-  python benchmark.py --logs 5000 --output benchmark_results.csv
-What it measures:
-  - Batch size sweep: 1, 8, 16, 32, 64, 128
-  - Throughput (logs/sec)
-  - Latency: p50 / p95 / p99 (per-log)
-  - CPU and RAM during inference
-  - Tier distribution (Regex % / BERT % / LLM %)
-Google interview talking point:
-  "I designed a benchmark harness that sweeps batch sizes and measures
-   latency percentiles + resource utilization, so I can show the
-   throughput-latency tradeoff curve empirically."
-"""
-from __future__ import annotations
-import argparse
-import csv
-import os
-import random
-import sys
-import time
-import statistics
-from pathlib import Path
-import psutil
-# ── Synthetic log generator (no external deps needed) ────────────────────────
-SOURCES = ["ModernCRM", "ModernHR", "BillingSystem", "AnalyticsEngine", "ThirdPartyAPI"]
-_LOG_TEMPLATES = [
-    ("ModernCRM",       "User User{id} logged in."),
-    ("ModernCRM",       "IP {ip} blocked due to potential attack"),
-    ("ModernHR",        "Multiple login failures occurred on user {id} account"),
-    ("ModernHR",        "Admin access escalation detected for user {id}"),
-    ("BillingSystem",   "GET /api/v2/invoices HTTP/1.1 status: {code} len: {len} time: {t}"),
-    ("BillingSystem",   "POST /api/v1/payments HTTP/1.1 status: {code} len: {len} time: {t}"),
-    ("AnalyticsEngine", "System crashed due to disk I/O failure on node-{n}"),
-    ("AnalyticsEngine", "Backup completed successfully."),
-    ("ThirdPartyAPI",   "Service payments-api is unreachable after 3 retries"),
-    ("ThirdPartyAPI",   "CPU usage at {pct}% for the last 10 minutes on node-{n}"),
-    ("AnalyticsEngine", "CRITICAL: data corruption detected on shard-{n}"),
-    ("ModernCRM",       "Health check passed for service {svc}"),
-]
-def _rand_ip():
-    return f"{random.randint(10,192)}.{random.randint(0,255)}.{random.randint(0,255)}.{random.randint(1,254)}"
-def _fill(template: str) -> str:
-    return (template
-        .replace("{id}",  str(random.randint(100, 99999)))
-        .replace("{ip}",  _rand_ip())
-        .replace("{code}", random.choice(["200", "201", "400", "404", "500", "503"]))
-        .replace("{len}",  str(random.randint(100, 9999)))
-        .replace("{t}",    f"{random.uniform(0.01, 2.5):.2f}")
-        .replace("{n}",    str(random.randint(1, 20)))
-        .replace("{pct}",  str(random.randint(60, 99)))
-        .replace("{svc}",  random.choice(["auth-api", "billing", "analytics", "events"]))
-    )
-def generate_logs(n: int) -> list[tuple[str, str]]:
-    random.seed(42)
-    return [
-        (src, _fill(tmpl))
-        for src, tmpl in random.choices(_LOG_TEMPLATES, k=n)
-    ]
-# ── Benchmark runner ─────────────────────────────────────────────────────────
-def run_benchmark(
-    logs: list[tuple[str, str]],
-    batch_sizes: list[int],
-    output_csv: str,
-    warmup_n: int = 50,
-) -> list[dict]:
-    from classify import classify_logs, pipeline_summary
-    proc = psutil.Process(os.getpid())
-    rows: list[dict] = []
-    # Warmup (model load, JIT, etc.)
-    print(f"🔥 Warming up with {warmup_n} logs…")
-    classify_logs(logs[:warmup_n])
-    for bs in batch_sizes:
-        # Slice logs into batches of size `bs`
-        batches = [logs[i:i + bs] for i in range(0, len(logs), bs)]
-        if not batches:
-            continue
-        per_log_latencies: list[float] = []
-        cpu_samples: list[float] = []
-        ram_samples: list[float] = []
-        all_results: list[dict] = []
-        print(f"\n📐 Batch size = {bs} ({len(batches)} batches × {bs} logs)…")
-        wall_start = time.perf_counter()
-        for batch in batches:
-            t0         = time.perf_counter()
-            results    = classify_logs(batch)
-            t1         = time.perf_counter()
-            batch_ms   = (t1 - t0) * 1000
-            per_log_ms = batch_ms / len(batch)
-            per_log_latencies.extend([per_log_ms] * len(batch))
-            all_results.extend(results)
-            # Resource snapshot
-            cpu_samples.append(proc.cpu_percent(interval=None))
-            ram_samples.append(proc.memory_info().rss / 1_048_576)  # MB
-        wall_elapsed = time.perf_counter() - wall_start
-        total_logs   = len(logs)
-        throughput   = round(total_logs / wall_elapsed, 1)
-        per_log_latencies.sort()
-        n = len(per_log_latencies)
-        summary = pipeline_summary(all_results)
-        tier_stats = summary["tier_stats"]
-        def tier_pct(name):
-            return tier_stats.get(name, {}).get("pct", 0.0)
-        row = {
-            "batch_size":       bs,
-            "total_logs":       total_logs,
-            "elapsed_sec":      round(wall_elapsed, 2),
-            "throughput_logs_sec": throughput,
-            "p50_ms":           round(statistics.median(per_log_latencies), 3),
-            "p95_ms":           round(per_log_latencies[min(int(n * 0.95), n - 1)], 3),
-            "p99_ms":           round(per_log_latencies[min(int(n * 0.99), n - 1)], 3),
-            "mean_ms":          round(statistics.mean(per_log_latencies), 3),
-            "cpu_mean_pct":     round(statistics.mean(cpu_samples), 1) if cpu_samples else 0,
-            "cpu_max_pct":      round(max(cpu_samples), 1) if cpu_samples else 0,
-            "ram_mean_mb":      round(statistics.mean(ram_samples), 1) if ram_samples else 0,
-            "ram_max_mb":       round(max(ram_samples), 1) if ram_samples else 0,
-            "tier_regex_pct":   tier_pct("Regex"),
-            "tier_bert_pct":    tier_pct("BERT"),
-            "tier_llm_pct":     tier_pct("LLM") + tier_pct("LLM (fallback)"),
-        }
-        rows.append(row)
-        print(f"  ✅ Throughput: {throughput} logs/sec | "
-              f"p50={row['p50_ms']}ms p95={row['p95_ms']}ms p99={row['p99_ms']}ms | "
-              f"CPU={row['cpu_mean_pct']}% RAM={row['ram_mean_mb']}MB")
-        print(f"  📊 Tiers: Regex={row['tier_regex_pct']}% "
-              f"BERT={row['tier_bert_pct']}% "
-              f"LLM={row['tier_llm_pct']}%")
-    # Write CSV
-    Path(output_csv).parent.mkdir(parents=True, exist_ok=True)
-    with open(output_csv, "w", newline="") as f:
-        writer = csv.DictWriter(f, fieldnames=rows[0].keys())
-        writer.writeheader()
-        writer.writerows(rows)
-    print(f"\n✅ Benchmark results saved → {output_csv}")
-    return rows
-# ── Scaling stress test ──────────────────────────────────────────────────────
-def stress_test(sizes: list[int] = [5_000, 20_000, 50_000, 100_000]) -> None:
-    """Quick throughput check at different total log counts."""
-    from classify import classify_logs
-    print("\n🔥 Stress Test — Scaling")
-    print(f"{'N logs':>10} {'Elapsed(s)':>12} {'Throughput':>12} {'p95_ms':>10}")
-    print("─" * 50)
-    for n in sizes:
-        logs = generate_logs(n)
-        t0   = time.perf_counter()
-        classify_logs(logs)
-        elapsed = time.perf_counter() - t0
-        tput = n / elapsed
-        # Rough p95 approximation: time / n * correction factor
-        p95_approx = (elapsed / n * 1000) * 1.5
-        print(f"{n:>10,} {elapsed:>12.2f}s {tput:>12.1f}/s {p95_approx:>10.1f}ms")
-# ── CLI ──────────────────────────────────────────────────────────────────────
-def main():
-    parser = argparse.ArgumentParser(description="Log pipeline benchmark harness")
-    parser.add_argument("--logs",    type=int, default=5_000,
-                        help="Number of logs to benchmark (default: 5000)")
-    parser.add_argument("--output",  default="benchmark_results.csv",
-                        help="Output CSV path")
-    parser.add_argument("--stress",  action="store_true",
-                        help="Run scaling stress test (5k, 20k, 50k, 100k)")
-    parser.add_argument("--batches", default="1,8,16,32,64,128",
-                        help="Comma-separated batch sizes to sweep")
-    args = parser.parse_args()
-    batch_sizes = [int(x) for x in args.batches.split(",")]
-    logs = generate_logs(args.logs)
-    print(f"📦 Generated {len(logs):,} synthetic logs")
-    run_benchmark(logs, batch_sizes, args.output)
-    if args.stress:
-        stress_test()
-if __name__ == "__main__":
-    main()

HF/error_analysis.py DELETED Viewed

@@ -1,250 +0,0 @@
-"""
-error_analysis.py — Deep Dive into Unclassified / Misclassified Logs
-This script addresses the 76 unclassified logs from the 20k run.
-It answers:
-  1. What do these logs look like? (print + group)
-  2. Why did the model fail? (pattern analysis)
-  3. What should we do? (actionable fix suggestions)
-Google interview talking point:
-  "I performed structured error analysis on my model's failure cases.
-   I grouped them by failure type — vocabulary mismatch, ambiguous intent,
-   formatting noise — and used that to drive targeted improvements."
-Usage:
-  python error_analysis.py --input output.csv   # post-classify CSV
-  python error_analysis.py --simulate           # demo with synthetic data
-"""
-from __future__ import annotations
-import argparse
-import re
-import sys
-from collections import Counter, defaultdict
-from typing import Optional
-import pandas as pd
-# ── Failure mode taxonomy ────────────────────────────────────────────────────
-class FailureMode:
-    RARE_VOCAB      = "rare_vocabulary"       # domain-specific terms not in training
-    AMBIGUOUS       = "ambiguous_intent"      # log could match multiple categories
-    LEGACY_FORMAT   = "legacy_format"         # non-standard / old-school formatting
-    TRUNCATED       = "truncated_or_noisy"    # partial / malformed log line
-    NUMERIC_ONLY    = "mostly_numeric"        # ID/code-heavy, no semantic signal
-    MULTI_EVENT     = "multi_event"           # one line, multiple events
-    UNKNOWN         = "unknown"
-def _detect_failure_mode(log: str) -> str:
-    """Heuristic: guess WHY this log was unclassified."""
-    log_l = log.lower()
-    if len(log) < 20:
-        return FailureMode.TRUNCATED
-    # Check ratio of digits to total chars
-    digit_ratio = sum(c.isdigit() for c in log) / max(len(log), 1)
-    if digit_ratio > 0.40:
-        return FailureMode.NUMERIC_ONLY
-    # Looks like it has 2+ events joined
-    if log.count(";") >= 2 or log.count(" AND ") >= 1 or log.count(" | ") >= 2:
-        return FailureMode.MULTI_EVENT
-    # Legacy / unusual format signals
-    legacy_signals = ["ticket", "escalation", "crm", "deprecated", "retire",
-                      "module will be", "workflow", "assigned agent"]
-    if any(s in log_l for s in legacy_signals):
-        return FailureMode.LEGACY_FORMAT
-    # Ambiguity signals — could be error OR security
-    ambiguous_signals = ["failed", "error", "unauthorized", "denied", "blocked"]
-    if sum(1 for s in ambiguous_signals if s in log_l) >= 2:
-        return FailureMode.AMBIGUOUS
-    # Rare vocabulary
-    rare_signals = ["sla", "oncall", "runbook", "pagerduty", "janitor", "gc ", "eviction"]
-    if any(s in log_l for s in rare_signals):
-        return FailureMode.RARE_VOCAB
-    return FailureMode.UNKNOWN
-def _suggest_fix(mode: str) -> str:
-    fixes = {
-        FailureMode.RARE_VOCAB:    "Add 5–10 training examples covering this vocabulary; or add regex rule.",
-        FailureMode.AMBIGUOUS:     "Use multi-label or add a dedicated 'Ambiguous' class; review confidence threshold.",
-        FailureMode.LEGACY_FORMAT: "Route all legacy-format logs to LLM tier; add few-shot examples for LLM prompt.",
-        FailureMode.TRUNCATED:     "Add input validation: reject/flag logs under 15 chars before classification.",
-        FailureMode.NUMERIC_ONLY:  "Add regex patterns for structured numeric formats (job IDs, error codes, etc.).",
-        FailureMode.MULTI_EVENT:   "Pre-process: split multi-event lines on ';' or ' | ' before classifying.",
-        FailureMode.UNKNOWN:       "Manually review and add to training data or LLM few-shot examples.",
-    }
-    return fixes.get(mode, "Manual review required.")
-# ── Core analysis ────────────────────────────────────────────────────────────
-def analyze_unclassified(df: pd.DataFrame, label_col: str = "predicted_label") -> None:
-    """Full error analysis on a classified CSV DataFrame."""
-    unclassified = df[df[label_col] == "Unclassified"].copy()
-    total_unclassified = len(unclassified)
-    if total_unclassified == 0:
-        print("✅ No unclassified logs found!")
-        return
-    print(f"\n{'='*70}")
-    print(f"🔍 ERROR ANALYSIS: {total_unclassified} Unclassified Logs")
-    print(f"{'='*70}\n")
-    # ── Step 1: Print all unclassified logs ─────────────────────────────────
-    log_col = "log_message" if "log_message" in df.columns else df.columns[-1]
-    print(f"{'#':>4}  {'Log Message'}")
-    print("─" * 80)
-    for i, (_, row) in enumerate(unclassified.iterrows(), 1):
-        log = str(row.get(log_col, ""))
-        print(f"{i:>4}. {log[:120]}")
-    # ── Step 2: Group by failure mode ───────────────────────────────────────
-    print(f"\n{'='*70}")
-    print("📂 GROUPING BY FAILURE MODE")
-    print("─" * 70)
-    groups: dict[str, list[str]] = defaultdict(list)
-    for _, row in unclassified.iterrows():
-        log  = str(row.get(log_col, ""))
-        mode = _detect_failure_mode(log)
-        groups[mode].append(log)
-    for mode, logs in sorted(groups.items(), key=lambda x: -len(x[1])):
-        pct = len(logs) / total_unclassified * 100
-        print(f"\n🔹 {mode} — {len(logs)} logs ({pct:.1f}%)")
-        print(f"   💡 Fix: {_suggest_fix(mode)}")
-        print(f"   Examples:")
-        for log in logs[:3]:
-            print(f"     • {log[:110]}")
-    # ── Step 3: Token frequency analysis ────────────────────────────────────
-    print(f"\n{'='*70}")
-    print("📊 COMMON TOKENS IN UNCLASSIFIED LOGS")
-    print("─" * 70)
-    STOPWORDS = {"the", "a", "an", "is", "in", "on", "for", "to", "of",
-                 "and", "or", "by", "at", "with", "has", "was", "be",
-                 "this", "that", "it", "not", "are", "from", "as"}
-    all_tokens: list[str] = []
-    for _, row in unclassified.iterrows():
-        log    = str(row.get(log_col, "")).lower()
-        tokens = re.findall(r"[a-z]{3,}", log)
-        all_tokens.extend(t for t in tokens if t not in STOPWORDS)
-    counter = Counter(all_tokens)
-    print("Top 20 tokens in unclassified logs:")
-    for token, count in counter.most_common(20):
-        bar = "█" * min(count, 40)
-        print(f"  {token:<20} {count:>4}  {bar}")
-    # ── Step 4: Length distribution ─────────────────────────────────────────
-    lengths = unclassified[log_col].apply(lambda x: len(str(x)))
-    print(f"\n{'='*70}")
-    print("📏 LOG LENGTH DISTRIBUTION (Unclassified)")
-    print(f"  Mean:   {lengths.mean():.1f} chars")
-    print(f"  Median: {lengths.median():.1f} chars")
-    print(f"  Min:    {lengths.min()} chars")
-    print(f"  Max:    {lengths.max()} chars")
-    short = (lengths < 30).sum()
-    if short:
-        print(f"  ⚠️  {short} logs under 30 chars — likely truncated/noisy")
-    # ── Step 5: Source breakdown ─────────────────────────────────────────────
-    if "source" in df.columns:
-        print(f"\n{'='*70}")
-        print("🏷️  UNCLASSIFIED BY SOURCE")
-        src_counts = unclassified["source"].value_counts()
-        for src, cnt in src_counts.items():
-            bar = "█" * min(cnt, 40)
-            print(f"  {src:<22} {cnt:>4}  {bar}")
-    # ── Step 6: Actionable summary ───────────────────────────────────────────
-    print(f"\n{'='*70}")
-    print("✅ ACTIONABLE FIXES (Priority Order)")
-    print("─" * 70)
-    dominant_mode = max(groups.items(), key=lambda x: len(x[1]))[0] if groups else FailureMode.UNKNOWN
-    fixes = [
-        (1, "regex",    "Add patterns for top unclassified tokens to processor_regex.py"),
-        (2, "training", "Add 10–20 examples per failure mode to training data"),
-        (3, "llm",      "For LEGACY_FORMAT failures: add to LLM few-shot examples"),
-        (4, "preproc",  "Pre-process: split multi-event logs, reject truncated logs"),
-        (5, "threshold","Tune BERT confidence threshold (currently 0.30 — try 0.40)"),
-    ]
-    for priority, area, fix in fixes:
-        print(f"  {priority}. [{area.upper():^10}] {fix}")
-    print(f"\n📌 Dominant failure mode: '{dominant_mode}' ({len(groups.get(dominant_mode,[]))} logs)")
-    print(f"   Start here: {_suggest_fix(dominant_mode)}\n")
-# ── Simulate 76 unclassified logs for demo ────────────────────────────────────
-def _simulate_unclassified() -> pd.DataFrame:
-    """Generate synthetic 'unclassified' logs that mimic real failure patterns."""
-    logs = [
-        # Legacy format / CRM
-        "Case escalation for ticket ID 9021 failed: agent inactive.",
-        "CRM module 'ReportGenerator' will be retired in v4.1.",
-        "Workflow for approval chain #4421 stalled at step 3.",
-        "SLA breach detected for case ID 7701 (P1, 4h breach).",
-        # Ambiguous
-        "Service auth-api failed and unauthorized access was logged.",
-        "Error: blocked request from 10.0.0.5 — reason unknown.",
-        # Truncated / noisy
-        "ERR",
-        "srv timeout",
-        "node-7",
-        # Numeric-heavy
-        "8821 9001 443 0 0 DROP IN=eth0 OUT= MAC=",
-        "16 0 0 1 2024-01-14 03:21:00.001",
-        # Multi-event
-        "Backup started; disk usage at 92%; health check failed | node-3",
-        # Rare vocab
-        "PagerDuty alert triggered for on-call rotation P1-incident.",
-        "GC eviction: 3.2GB heap compacted in 420ms.",
-        "Janitor job completed: 14,000 stale tokens purged.",
-        "Runbook auto-remediation triggered for alert ALT-9021.",
-    ]
-    # Pad to ~76
-    padded = (logs * 5)[:76]
-    return pd.DataFrame({
-        "source":          ["ModernCRM"] * 30 + ["LegacyCRM"] * 20 + ["AnalyticsEngine"] * 26,
-        "log_message":     padded,
-        "predicted_label": ["Unclassified"] * 76,
-    })
-# ── CLI ──────────────────────────────────────────────────────────────────────
-def main():
-    parser = argparse.ArgumentParser(description="Analyze unclassified/misclassified logs")
-    parser.add_argument("--input",    help="Path to classified CSV from classify_csv()")
-    parser.add_argument("--simulate", action="store_true",
-                        help="Run with synthetic unclassified logs (no CSV needed)")
-    parser.add_argument("--label-col", default="predicted_label",
-                        help="Column name that holds the predicted label")
-    args = parser.parse_args()
-    if args.simulate:
-        df = _simulate_unclassified()
-        print("🎭 Running with SIMULATED 76 unclassified logs…")
-    elif args.input:
-        df = pd.read_csv(args.input)
-    else:
-        parser.print_help()
-        sys.exit(1)
-    analyze_unclassified(df, label_col=args.label_col)
-if __name__ == "__main__":
-    main()

HF/models/log_classifier.joblib DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9bfe9c71b71412797de0d426be2255566dbf6cf87b3f2ae5d2cd1fd69a98d18d
-size 23997

HF/onnx_model/config.json DELETED Viewed

@@ -1,24 +0,0 @@
-{
-  "architectures": [
-    "BertModel"
-  ],
-  "attention_probs_dropout_prob": 0.1,
-  "classifier_dropout": null,
-  "gradient_checkpointing": false,
-  "hidden_act": "gelu",
-  "hidden_dropout_prob": 0.1,
-  "hidden_size": 384,
-  "initializer_range": 0.02,
-  "intermediate_size": 1536,
-  "layer_norm_eps": 1e-12,
-  "max_position_embeddings": 512,
-  "model_type": "bert",
-  "num_attention_heads": 12,
-  "num_hidden_layers": 6,
-  "pad_token_id": 0,
-  "position_embedding_type": "absolute",
-  "transformers_version": "4.57.6",
-  "type_vocab_size": 2,
-  "use_cache": true,
-  "vocab_size": 30522
-}

HF/onnx_model/special_tokens_map.json DELETED Viewed

@@ -1,37 +0,0 @@
-{
-  "cls_token": {
-    "content": "[CLS]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "mask_token": {
-    "content": "[MASK]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": {
-    "content": "[PAD]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "sep_token": {
-    "content": "[SEP]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "unk_token": {
-    "content": "[UNK]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  }
-}

HF/onnx_model/tokenizer.json DELETED Viewed

The diff for this file is too large to render. See raw diff

HF/onnx_model/tokenizer_config.json DELETED Viewed

@@ -1,65 +0,0 @@
-{
-  "added_tokens_decoder": {
-    "0": {
-      "content": "[PAD]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "100": {
-      "content": "[UNK]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "101": {
-      "content": "[CLS]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "102": {
-      "content": "[SEP]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "103": {
-      "content": "[MASK]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    }
-  },
-  "clean_up_tokenization_spaces": false,
-  "cls_token": "[CLS]",
-  "do_basic_tokenize": true,
-  "do_lower_case": true,
-  "extra_special_tokens": {},
-  "mask_token": "[MASK]",
-  "max_length": 128,
-  "model_max_length": 512,
-  "never_split": null,
-  "pad_to_multiple_of": null,
-  "pad_token": "[PAD]",
-  "pad_token_type_id": 0,
-  "padding_side": "right",
-  "sep_token": "[SEP]",
-  "stride": 0,
-  "strip_accents": null,
-  "tokenize_chinese_chars": true,
-  "tokenizer_class": "BertTokenizer",
-  "truncation_side": "right",
-  "truncation_strategy": "longest_first",
-  "unk_token": "[UNK]"
-}

HF/onnx_model/vocab.txt DELETED Viewed

The diff for this file is too large to render. See raw diff

HF/processor_llm.py DELETED Viewed

@@ -1,192 +0,0 @@
-"""
-processor_llm.py — Tier 3: LLM-based Classifier
-Used for:
-  - LegacyCRM logs (Workflow Error, Deprecation Warning)
-  - BERT fallback when confidence < threshold
-Production hardening in V3:
-  - Timeout (configurable, default 5s)
-  - Retry with exponential backoff (max 2 retries)
-  - Explicit failure modes: returns "Unclassified" on all error paths
-  - Caching for repeated log patterns (hash-based, in-memory)
-  - Token budget enforcement (max_tokens=15)
-"""
-from __future__ import annotations
-import os
-import re
-import time
-import hashlib
-import logging
-from functools import lru_cache
-from typing import Optional
-logger = logging.getLogger(__name__)
-# ── Config ─────────────────────────────────────────────────────────────────
-HF_TOKEN   = os.getenv("HF_TOKEN")
-LLM_MODEL  = "mistralai/Mistral-7B-Instruct-v0.3"
-VALID_CATEGORIES = ["Workflow Error", "Deprecation Warning"]
-# Retry / timeout config
-MAX_RETRIES     = 2
-RETRY_DELAY_SEC = 1.0   # doubles on each retry (exponential backoff)
-REQUEST_TIMEOUT = 5     # seconds — fail fast, do not hang pipeline
-# In-memory cache to avoid redundant LLM calls for repeated logs
-_RESPONSE_CACHE: dict[str, str] = {}
-MAX_CACHE_SIZE = 1000  # evict oldest when full (simple FIFO)
-SYSTEM_PROMPT = (
-    "You are an enterprise log classifier. "
-    "Classify log messages into exactly one category. "
-    "Return ONLY the category name — no explanation, no punctuation."
-)
-FEW_SHOT_EXAMPLES = [
-    {
-        "log":   "Case escalation for ticket ID 7324 failed because the assigned support agent is no longer active.",
-        "label": "Workflow Error",
-    },
-    {
-        "log":   "The 'BulkEmailSender' feature is no longer supported. Use 'EmailCampaignManager' instead.",
-        "label": "Deprecation Warning",
-    },
-    {
-        "log":   "Invoice generation aborted for order ID 8910 due to invalid tax calculation module.",
-        "label": "Workflow Error",
-    },
-]
-# ── Cache helpers ────────────────────────────────────────────────────────────
-def _cache_key(log_msg: str) -> str:
-    return hashlib.md5(log_msg.strip().encode()).hexdigest()
-def _cache_get(log_msg: str) -> Optional[str]:
-    return _RESPONSE_CACHE.get(_cache_key(log_msg))
-def _cache_set(log_msg: str, label: str) -> None:
-    key = _cache_key(log_msg)
-    if len(_RESPONSE_CACHE) >= MAX_CACHE_SIZE:
-        # Evict oldest (first inserted) key
-        oldest = next(iter(_RESPONSE_CACHE))
-        del _RESPONSE_CACHE[oldest]
-    _RESPONSE_CACHE[key] = label
-def get_cache_stats() -> dict:
-    return {"size": len(_RESPONSE_CACHE), "max_size": MAX_CACHE_SIZE}
-# ── Prompt builder ───────────────────────────────────────────────────────────
-def _build_messages(log_msg: str) -> list[dict]:
-    categories_str = ", ".join(f'"{c}"' for c in VALID_CATEGORIES)
-    user_content = (
-        f'Classify the following log into one of these categories: {categories_str}.\n'
-        'If none fits, return "Unclassified".\n\n'
-    )
-    for ex in FEW_SHOT_EXAMPLES:
-        user_content += f'Log: {ex["log"]}\nCategory: {ex["label"]}\n\n'
-    user_content += f"Log: {log_msg}\nCategory:"
-    return [
-        {"role": "system", "content": SYSTEM_PROMPT},
-        {"role": "user",   "content": user_content},
-    ]
-# ── Normalize raw LLM output ─────────────────────────────────────────────────
-def _normalize(raw: str) -> str:
-    """Map raw LLM output to a valid category or 'Unclassified'."""
-    raw = raw.strip().strip('"').strip("'")
-    for cat in VALID_CATEGORIES:
-        if cat.lower() in raw.lower():
-            return cat
-    return "Unclassified"
-# ── Main classify function ────────────────────────────────────────────────────
-def classify_with_llm(log_msg: str) -> str:
-    """
-    Tier 3 LLM classifier with:
-      - In-memory cache (avoids duplicate API calls)
-      - Timeout (REQUEST_TIMEOUT seconds)
-      - Retry with exponential backoff (MAX_RETRIES attempts)
-      - Explicit fallback to "Unclassified" on all error paths
-    Latency: 500–2000ms on cache miss; ~0ms on cache hit.
-    """
-    # ── Cache hit ────────────────────────────────────────────────────────────
-    cached = _cache_get(log_msg)
-    if cached is not None:
-        logger.debug(f"[LLM] Cache hit for: {log_msg[:60]}")
-        return cached
-    # ── Inference with retry ─────────────────────────────────────────────────
-    if not HF_TOKEN:
-        logger.warning("[LLM] HF_TOKEN not set — returning Unclassified")
-        return "Unclassified"
-    from huggingface_hub import InferenceClient
-    client  = InferenceClient(token=HF_TOKEN, timeout=REQUEST_TIMEOUT)
-    delay   = RETRY_DELAY_SEC
-    last_err: Optional[Exception] = None
-    for attempt in range(1, MAX_RETRIES + 2):  # +2: initial + MAX_RETRIES
-        try:
-            response = client.chat.completions.create(
-                model=LLM_MODEL,
-                messages=_build_messages(log_msg),
-                max_tokens=15,
-                temperature=0.1,
-            )
-            raw   = response.choices[0].message.content
-            label = _normalize(raw)
-            _cache_set(log_msg, label)
-            logger.debug(f"[LLM] Attempt {attempt}: '{raw.strip()}' → '{label}'")
-            return label
-        except Exception as e:
-            last_err = e
-            if attempt <= MAX_RETRIES:
-                logger.warning(f"[LLM] Attempt {attempt} failed ({e}), retrying in {delay:.1f}s…")
-                time.sleep(delay)
-                delay *= 2  # exponential backoff
-            else:
-                logger.error(f"[LLM] All {MAX_RETRIES + 1} attempts failed. Last error: {e}")
-    return "Unclassified"
-# ── Batch classify (serial — LLM is already rate-limited) ────────────────────
-def classify_batch_llm(log_msgs: list[str]) -> list[str]:
-    """Classify multiple logs through LLM. Each call is sequential to respect rate limits."""
-    return [classify_with_llm(msg) for msg in log_msgs]
-# ── CLI test ─────────────────────────────────────────────────────────────────
-if __name__ == "__main__":
-    logging.basicConfig(level=logging.INFO)
-    test_logs = [
-        "Case escalation for ticket ID 7324 failed because the assigned support agent is no longer active.",
-        "The 'ReportGenerator' module will be retired in version 4.0. Migrate to 'AdvancedAnalyticsSuite'.",
-        "System reboot initiated by user 12345.",   # should be Unclassified
-    ]
-    for log in test_logs:
-        result = classify_with_llm(log)
-        print(f"{result:25s} | {log[:80]}")
-    # Cache hit test
-    print("\n── Cache hit test ──")
-    t0 = time.perf_counter()
-    classify_with_llm(test_logs[0])
-    t1 = time.perf_counter()
-    print(f"Cache hit latency: {(t1-t0)*1000:.2f}ms")
-    print(f"Cache stats: {get_cache_stats()}")

HF/processor_regex.py DELETED Viewed

@@ -1,220 +0,0 @@
-"""
-processor_regex.py — Tier 1: Rule-based Classifier
-Target coverage: 40%+ (up from 15%)
-Latency: sub-millisecond per log
-New pattern groups added:
-  - HTTP request/response logs   (was completely missing!)
-  - Auth / credential events     (login failures, MFA, lockouts)
-  - System/infra events          (disk, CPU, memory, cron)
-  - Network / firewall events    (IP block, port scan)
-  - Structured error codes       (ERROR, CRITICAL prefix logs)
-"""
-from __future__ import annotations
-import re
-import time
-from typing import Optional
-# ---------------------------------------------------------------------------
-# Pattern registry: (compiled_pattern, label)
-# Order matters — more specific patterns FIRST to avoid mis-labeling.
-# ---------------------------------------------------------------------------
-_RAW_PATTERNS: list[tuple[str, str]] = [
-    # ── HTTP Status ─────────────────────────────────────────────────────────
-    # Covers: GET/POST/PUT/DELETE/PATCH + status code in request line
-    (r"\b(GET|POST|PUT|DELETE|PATCH|HEAD|OPTIONS)\s+\S+\s+HTTP/\d", "HTTP Status"),
-    # Nova / OpenStack style
-    (r"nova\.\S+\s+(GET|POST|PUT|DELETE)\s+\S+\s+HTTP/\d", "HTTP Status"),
-    # Status code only style: "returned HTTP 200" or "status: 404"
-    (r"\bstatus[:\s]+\d{3}\b", "HTTP Status"),
-    (r"\breturned\s+HTTP\s+\d{3}\b", "HTTP Status"),
-    (r"\bHTTP\s+status\s+code\s*[:-]?\s*\d{3}\b", "HTTP Status"),
-    # API response style
-    (r"\bAPI\s+(call|request)\s+\S+\s+completed\s+with\s+status\s+\d{3}", "HTTP Status"),
-    (r"\bEndpoint\s+\S+\s+responded\s+with\s+code\s+\d{3}", "HTTP Status"),
-    # ── Security Alert ──────────────────────────────────────────────────────
-    # Brute force / login failures
-    (r"(multiple\s+)?(bad\s+|failed?\s+)?login\s+(failure|attempt|failures)", "Security Alert"),
-    (r"brute[\s_-]force\s+(login|attack|attempt)", "Security Alert"),
-    # Unauthorized access
-    (r"unauthorized\s+(access|admin|privilege|attempt)", "Security Alert"),
-    (r"access\s+denied\s+(for|to)\s+(user|ip|host)", "Security Alert"),
-    # Privilege escalation
-    (r"(admin\s+)?access\s+escalation\s+detected", "Security Alert"),
-    (r"privilege\s+(elev|escalat)", "Security Alert"),
-    # IP blocking / suspicious traffic
-    (r"IP\s+\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\s+blocked", "Security Alert"),
-    (r"(suspicious|anomalous)\s+(login|traffic|activity|request)", "Security Alert"),
-    (r"potential\s+(DDoS|attack|breach|intrusion)", "Security Alert"),
-    (r"security\s+breach\s+suspected", "Security Alert"),
-    (r"(API\s+security\s+breach|bypass\s+API\s+security)", "Security Alert"),
-    (r"port\s+scan\s+(detected|attempt)", "Security Alert"),
-    # ── User Action ─────────────────────────────────────────────────────────
-    (r"User\s+\w+\d*\s+logged\s+(in|out)", "User Action"),
-    (r"Account\s+(with\s+)?ID\s+\S+\s+created\s+by", "User Action"),
-    (r"User\s+\w+\d*\s+(updated\s+profile|changed\s+password|enabled\s+two|downloaded|exported)", "User Action"),
-    (r"(New\s+user|user\s+\w+\d*)\s+registered", "User Action"),
-    (r"Account\s+\S+\s+deleted\s+by\s+(administrator|admin)", "User Action"),
-    (r"User\s+\w+\d*\s+(tried|attempted)", "User Action"),
-    # ── System Notification ─────────────────────────────────────────────────
-    # Backup events
-    (r"Backup\s+(started|ended|completed\s+successfully|failed|aborted)", "System Notification"),
-    (r"System\s+updated\s+to\s+version", "System Notification"),
-    (r"File\s+\S+\s+uploaded\s+successfully\s+by\s+user", "System Notification"),
-    (r"Disk\s+cleanup\s+completed\s+successfully", "System Notification"),
-    (r"System\s+reboot\s+initiated\s+by\s+user", "System Notification"),
-    (r"Scheduled\s+maintenance\s+(started|completed)", "System Notification"),
-    (r"Service\s+\w+\s+restarted\s+successfully", "System Notification"),
-    # NEW: cache, cron, health check, cert, log rotation
-    (r"Cache\s+cleared\s+successfully", "System Notification"),
-    (r"Log\s+rotation\s+completed", "System Notification"),
-    (r"Health\s+check\s+(passed|failed)\s+for\s+service", "System Notification"),
-    (r"Certificate\s+(renewed|expired|revoked)\s+successfully", "System Notification"),
-    (r"Cron\s+job\s+\S+\s+(executed|failed|completed)\s+successfully", "System Notification"),
-    (r"(Disk|Storage)\s+(usage|space)\s+(at|reached|exceeded)\s+\d+%", "System Notification"),
-    (r"CPU\s+usage\s+at\s+\d+%", "System Notification"),
-    (r"Memory\s+(usage|limit)\s+(at|reached|exceeded)\s+\d+%", "System Notification"),
-    # Deployment / config
-    (r"Deployment\s+(of|for)\s+\S+\s+(completed|failed|started)", "System Notification"),
-    (r"Configuration\s+(reloaded|updated|applied)\s+successfully", "System Notification"),
-    # ── Error ───────────────────────────────────────────────────────────────
-    (r"\bERROR\b.*\b(exception|failed|failure|crash|timeout|unavailable)\b", "Error"),
-    (r"System\s+crashed\s+due\s+to", "Error"),
-    (r"(connection|request|task|job)\s+(timed?\s*out|timeout)", "Error"),
-    (r"service\s+\S+\s+(is\s+down|unavailable|unreachable)", "Error"),
-    (r"database\s+connection\s+(failed|refused|lost|dropped)", "Error"),
-    (r"disk\s+(I/O\s+)?failure", "Error"),
-    (r"driver\s+error(s)?\s+(when|during|on)", "Error"),
-    (r"(replication|sync)\s+task\s+(did\s+not\s+complete|failed)", "Error"),
-    (r"null\s+pointer|segmentation\s+fault|stack\s+overflow", "Error"),
-    # ── Critical Error ──────────────────────────────────────────────────────
-    (r"\bCRITICAL\b", "Critical Error"),
-    (r"(FATAL|PANIC)\b", "Critical Error"),
-    (r"(data\s+loss|data\s+corruption)\s+(detected|occurred)", "Critical Error"),
-    (r"(cluster|node|shard)\s+(failure|crashed|went\s+down)", "Critical Error"),
-    (r"(catastrophic|unrecoverable)\s+(failure|error)", "Critical Error"),
-    (r"kernel\s+panic", "Critical Error"),
-    (r"out[\s-]of[\s-](memory|disk)\s+(error|killed|OOM)", "Critical Error"),
-]
-# Pre-compile all patterns at import time (not per-call)
-REGEX_PATTERNS: list[tuple[re.Pattern, str]] = [
-    (re.compile(pat, re.IGNORECASE), label)
-    for pat, label in _RAW_PATTERNS
-]
-def classify_with_regex(log_message: str) -> Optional[str]:
-    """
-    Tier 1: Rule-based classifier.
-    Returns category label, or None if no pattern matches.
-    Latency: sub-millisecond (patterns pre-compiled at import).
-    """
-    for pattern, label in REGEX_PATTERNS:
-        if pattern.search(log_message):
-            return label
-    return None
-def get_regex_coverage(log_messages: list[str]) -> dict:
-    """Measure regex tier coverage and per-label breakdown."""
-    label_counts: dict[str, int] = {}
-    missed = 0
-    for msg in log_messages:
-        label = classify_with_regex(msg)
-        if label:
-            label_counts[label] = label_counts.get(label, 0) + 1
-        else:
-            missed += 1
-    total   = len(log_messages)
-    matched = total - missed
-    return {
-        "total":        total,
-        "matched":      matched,
-        "missed":       missed,
-        "coverage_pct": round(matched / total * 100, 2) if total else 0.0,
-        "label_breakdown": label_counts,
-    }
-def benchmark_regex(log_messages: list[str], runs: int = 3) -> dict:
-    """Measure regex tier latency (p50 / p95 / p99) over multiple runs."""
-    import statistics
-    per_log_ms: list[float] = []
-    for _ in range(runs):
-        for msg in log_messages:
-            t0 = time.perf_counter()
-            classify_with_regex(msg)
-            per_log_ms.append((time.perf_counter() - t0) * 1000)
-    per_log_ms.sort()
-    return {
-        "p50_ms":  round(statistics.median(per_log_ms), 4),
-        "p95_ms":  round(per_log_ms[int(len(per_log_ms) * 0.95)], 4),
-        "p99_ms":  round(per_log_ms[int(len(per_log_ms) * 0.99)], 4),
-        "mean_ms": round(statistics.mean(per_log_ms), 4),
-    }
-# ── CLI self-test ────────────────────────────────────────────────────────────
-if __name__ == "__main__":
-    test_cases: list[tuple[str, str]] = [
-        # HTTP
-        ("GET /api/v2/resource HTTP/1.1 status: 200 len: 1583 time: 0.19", "HTTP Status"),
-        ("POST /v1/users HTTP/1.1 status: 201 len: 42 time: 0.05", "HTTP Status"),
-        ("nova.osapi_compute.wsgi.server GET /v2/servers/detail HTTP/1.1 status: 404", "HTTP Status"),
-        # Security
-        ("Multiple login failures occurred on user 6454 account", "Security Alert"),
-        ("IP 192.168.133.114 blocked due to potential attack", "Security Alert"),
-        ("Brute force login attempt from 10.0.0.5 detected", "Security Alert"),
-        ("Admin access escalation detected for user 9429", "Security Alert"),
-        # User Action
-        ("User User12345 logged in.", "User Action"),
-        ("Account with ID 456 created by Admin.", "User Action"),
-        # System Notification
-        ("Backup completed successfully.", "System Notification"),
-        ("CPU usage at 98% for the last 10 minutes on node-7", "System Notification"),
-        ("Health check passed for service payments-api", "System Notification"),
-        # Error
-        ("System crashed due to disk I/O failure on node-3", "Error"),
-        ("Database connection failed after 3 retries", "Error"),
-        # Critical
-        ("CRITICAL: data corruption detected on shard-14", "Critical Error"),
-        ("kernel panic: not syncing: VFS: unable to mount root fs", "Critical Error"),
-        # Should be None (unmatched)
-        ("The 'BulkEmailSender' feature will be deprecated in v5.0.", None),
-        ("Case escalation for ticket 7324 failed.", None),
-    ]
-    correct = 0
-    print(f"{'Expected':<22} {'Got':<22} {'✓/✗'} | Log")
-    print("─" * 100)
-    for log, expected in test_cases:
-        got = classify_with_regex(log)
-        ok  = got == expected
-        correct += ok
-        icon = "✓" if ok else "✗"
-        print(f"{str(expected):<22} {str(got):<22} {icon}   | {log[:55]}")
-    print(f"\n{correct}/{len(test_cases)} correct")
-    # Coverage demo
-    all_logs = [log for log, _ in test_cases]
-    cov = get_regex_coverage(all_logs)
-    print(f"\nCoverage: {cov['coverage_pct']}%  ({cov['matched']}/{cov['total']} matched)")
-    print("Label breakdown:", cov["label_breakdown"])
-    # Latency benchmark
-    lat = benchmark_regex(all_logs * 100)
-    print(f"\nLatency (p50/p95/p99): {lat['p50_ms']}ms / {lat['p95_ms']}ms / {lat['p99_ms']}ms")

HF/test/__init__.py DELETED Viewed

File without changes

HF/test/__pycache__/__init__.cpython-312.pyc DELETED Viewed

Binary file (139 Bytes)

HF/test/__pycache__/test_regex.cpython-312-pytest-9.0.3.pyc DELETED Viewed

Binary file (36.5 kB)

HF/test/test_llm.py DELETED Viewed

@@ -1,197 +0,0 @@
-"""
-tests/test_llm.py — Tests for Tier 3: LLM Classifier
-Tests verify:
-  1. Cache hit avoids API call
-  2. Retry logic on transient failure
-  3. Returns "Unclassified" on all error paths (never crashes pipeline)
-  4. Response normalization handles edge cases
-  5. No HF_TOKEN → returns Unclassified gracefully
-Run:
-  pytest tests/test_llm.py -v
-"""
-import sys, os
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
-import pytest
-from unittest.mock import patch, MagicMock, call
-import processor_llm as llm_module
-from processor_llm import (
-    classify_with_llm, get_cache_stats,
-    _cache_key, _cache_get, _cache_set, _normalize,
-    _RESPONSE_CACHE,
-)
-# ── Setup / teardown ──────────────────────────────────────────────────────────
-@pytest.fixture(autouse=True)
-def clear_cache():
-    """Clear LLM cache before each test."""
-    _RESPONSE_CACHE.clear()
-    yield
-    _RESPONSE_CACHE.clear()
-# ── Normalize ─────────────────────────────────────────────────────────────────
-class TestNormalize:
-    def test_exact_match(self):
-        assert _normalize("Workflow Error") == "Workflow Error"
-    def test_case_insensitive(self):
-        assert _normalize("workflow error") == "Workflow Error"
-    def test_deprecation_warning(self):
-        assert _normalize("Deprecation Warning") == "Deprecation Warning"
-    def test_random_text_returns_unclassified(self):
-        assert _normalize("I don't know") == "Unclassified"
-    def test_empty_string_returns_unclassified(self):
-        assert _normalize("") == "Unclassified"
-    def test_partial_match(self):
-        # Model might return "Category: Workflow Error" → still should match
-        assert _normalize("Category: Workflow Error") == "Workflow Error"
-    def test_strips_quotes(self):
-        assert _normalize('"Deprecation Warning"') == "Deprecation Warning"
-# ── Cache ─────────────────────────────────────────────────────────────────────
-class TestCache:
-    def test_cache_miss_returns_none(self):
-        assert _cache_get("totally new log message xyz") is None
-    def test_cache_set_and_get(self):
-        log = "test log message for caching"
-        _cache_set(log, "Workflow Error")
-        assert _cache_get(log) == "Workflow Error"
-    def test_cache_key_is_deterministic(self):
-        log = "same log every time"
-        assert _cache_key(log) == _cache_key(log)
-    def test_different_logs_different_keys(self):
-        k1 = _cache_key("log message A")
-        k2 = _cache_key("log message B")
-        assert k1 != k2
-    def test_cache_hit_avoids_api_call(self):
-        log = "Case escalation for ticket 7324 failed."
-        _cache_set(log, "Workflow Error")  # Pre-populate cache
-        with patch("processor_llm.InferenceClient") as mock_client:
-            result = classify_with_llm(log)
-        mock_client.assert_not_called()
-        assert result == "Workflow Error"
-    def test_cache_stats_size(self):
-        _cache_set("log1", "Workflow Error")
-        _cache_set("log2", "Deprecation Warning")
-        stats = get_cache_stats()
-        assert stats["size"] == 2
-# ── No token ──────────────────────────────────────────────────────────────────
-class TestNoToken:
-    def test_no_hf_token_returns_unclassified(self, monkeypatch):
-        monkeypatch.setattr(llm_module, "HF_TOKEN", None)
-        result = classify_with_llm("Case escalation for ticket 1234.")
-        assert result == "Unclassified"
-# ── Retry logic ───────────────────────────────────────────────────────────────
-class TestRetry:
-    def _make_mock_client(self, responses):
-        """responses: list of (Exception | str) — raised or returned in order."""
-        call_count = [0]
-        def mock_create(**kwargs):
-            idx = call_count[0]
-            call_count[0] += 1
-            if isinstance(responses[idx], Exception):
-                raise responses[idx]
-            mock_resp = MagicMock()
-            mock_resp.choices[0].message.content = responses[idx]
-            return mock_resp
-        mock_client = MagicMock()
-        mock_client.chat.completions.create.side_effect = mock_create
-        return mock_client
-    def test_success_on_first_try(self, monkeypatch):
-        monkeypatch.setattr(llm_module, "HF_TOKEN", "fake-token")
-        monkeypatch.setattr(llm_module, "RETRY_DELAY_SEC", 0)  # no sleep
-        client = self._make_mock_client(["Workflow Error"])
-        with patch("processor_llm.InferenceClient", return_value=client):
-            result = classify_with_llm("Case escalation for ticket 7324.")
-        assert result == "Workflow Error"
-        assert client.chat.completions.create.call_count == 1
-    def test_retry_on_transient_failure(self, monkeypatch):
-        monkeypatch.setattr(llm_module, "HF_TOKEN", "fake-token")
-        monkeypatch.setattr(llm_module, "RETRY_DELAY_SEC", 0)
-        monkeypatch.setattr(llm_module, "MAX_RETRIES", 2)
-        # Fail once, succeed on second attempt
-        client = self._make_mock_client([
-            ConnectionError("timeout"),
-            "Deprecation Warning",
-        ])
-        with patch("processor_llm.InferenceClient", return_value=client), \
-             patch("processor_llm.time.sleep"):  # skip actual sleep
-            result = classify_with_llm("Module will be retired in v4.")
-        assert result == "Deprecation Warning"
-        assert client.chat.completions.create.call_count == 2
-    def test_all_retries_exhausted_returns_unclassified(self, monkeypatch):
-        monkeypatch.setattr(llm_module, "HF_TOKEN", "fake-token")
-        monkeypatch.setattr(llm_module, "RETRY_DELAY_SEC", 0)
-        monkeypatch.setattr(llm_module, "MAX_RETRIES", 2)
-        client = self._make_mock_client([
-            ConnectionError("timeout"),
-            ConnectionError("timeout"),
-            ConnectionError("timeout"),
-        ])
-        with patch("processor_llm.InferenceClient", return_value=client), \
-             patch("processor_llm.time.sleep"):
-            result = classify_with_llm("Something that keeps failing.")
-        assert result == "Unclassified"
-        assert client.chat.completions.create.call_count == 3  # 1 initial + 2 retries
-    def test_successful_result_gets_cached(self, monkeypatch):
-        monkeypatch.setattr(llm_module, "HF_TOKEN", "fake-token")
-        monkeypatch.setattr(llm_module, "RETRY_DELAY_SEC", 0)
-        client = self._make_mock_client(["Workflow Error"])
-        log = "Case escalation for unique ticket 99999."
-        with patch("processor_llm.InferenceClient", return_value=client):
-            result = classify_with_llm(log)
-        assert result == "Workflow Error"
-        # Should now be in cache
-        assert _cache_get(log) == "Workflow Error"
-# ── Pipeline safety ───────────────────────────────────────────────────────────
-class TestPipelineSafety:
-    def test_classify_never_raises(self, monkeypatch):
-        """LLM failures must NEVER propagate as exceptions to the pipeline."""
-        monkeypatch.setattr(llm_module, "HF_TOKEN", "fake-token")
-        monkeypatch.setattr(llm_module, "RETRY_DELAY_SEC", 0)
-        with patch("processor_llm.InferenceClient", side_effect=RuntimeError("catastrophic")):
-            result = classify_with_llm("Any log message here.")
-        assert result == "Unclassified"  # Never raises, always returns string

HF/test/test_regex.py DELETED Viewed

@@ -1,222 +0,0 @@
-"""
-tests/test_regex.py — Unit tests for Tier 1: Regex Classifier
-Tests verify:
-  1. Every pattern category has positive matches
-  2. No false positives on known non-matching logs
-  3. Pattern order doesn't cause mis-labeling
-  4. Coverage improvement (should be > 35% on balanced test set)
-Run:
-  pytest tests/ -v
-  pytest tests/test_regex.py -v --tb=short
-"""
-import sys, os
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
-import pytest
-from processor_regex import classify_with_regex, get_regex_coverage
-# ── Positive cases: must match and return correct label ───────────────────────
-class TestHTTPStatus:
-    def test_get_request(self):
-        assert classify_with_regex("GET /api/v2/resource HTTP/1.1 status: 200 len: 100 time: 0.1") == "HTTP Status"
-    def test_post_request(self):
-        assert classify_with_regex("POST /v1/users HTTP/1.1 status: 201 len: 42 time: 0.05") == "HTTP Status"
-    def test_delete_request(self):
-        assert classify_with_regex("DELETE /v1/users/123 HTTP/1.1 status: 204 len: 0 time: 0.02") == "HTTP Status"
-    def test_nova_style(self):
-        assert classify_with_regex(
-            "nova.osapi_compute.wsgi.server GET /v2/servers/detail HTTP/1.1 status: 404 len: 1583 time: 0.19"
-        ) == "HTTP Status"
-    def test_status_code_only(self):
-        assert classify_with_regex("API call /invoices returned HTTP 500 in 2.1s") == "HTTP Status"
-    def test_patch_request(self):
-        assert classify_with_regex("PATCH /v1/config HTTP/1.1 status: 200 len: 55 time: 0.04") == "HTTP Status"
-class TestSecurityAlert:
-    def test_login_failures(self):
-        assert classify_with_regex("Multiple login failures occurred on user 6454 account") == "Security Alert"
-    def test_ip_blocked(self):
-        assert classify_with_regex("IP 192.168.133.114 blocked due to potential attack") == "Security Alert"
-    def test_brute_force(self):
-        assert classify_with_regex("Alert: brute force login attempt from 10.0.0.5 detected") == "Security Alert"
-    def test_admin_escalation(self):
-        assert classify_with_regex("Admin access escalation detected for user 9429") == "Security Alert"
-    def test_privilege_elevation(self):
-        assert classify_with_regex("Privilege elevation detected for user Admin99") == "Security Alert"
-    def test_ddos(self):
-        assert classify_with_regex("Potential DDoS attack from 1.2.3.4 detected") == "Security Alert"
-    def test_suspicious_activity(self):
-        assert classify_with_regex("Suspicious login activity detected from 203.0.113.1") == "Security Alert"
-    def test_unauthorized_access(self):
-        assert classify_with_regex("Unauthorized access to data was attempted by User123") == "Security Alert"
-class TestUserAction:
-    def test_login(self):
-        assert classify_with_regex("User User12345 logged in.") == "User Action"
-    def test_logout(self):
-        assert classify_with_regex("User User99 logged out.") == "User Action"
-    def test_account_created(self):
-        assert classify_with_regex("Account with ID 456 created by Admin.") == "User Action"
-    def test_password_changed(self):
-        assert classify_with_regex("User User42 changed password successfully.") == "User Action"
-    def test_new_user_registered(self):
-        assert classify_with_regex("New user User9999 registered with email u@e.com.") == "User Action"
-class TestSystemNotification:
-    def test_backup_completed(self):
-        assert classify_with_regex("Backup completed successfully.") == "System Notification"
-    def test_backup_started(self):
-        assert classify_with_regex("Backup started at 2024-01-14 03:00:00.") == "System Notification"
-    def test_system_updated(self):
-        assert classify_with_regex("System updated to version 4.2.1.") == "System Notification"
-    def test_disk_cleanup(self):
-        assert classify_with_regex("Disk cleanup completed successfully.") == "System Notification"
-    def test_service_restarted(self):
-        assert classify_with_regex("Service payments restarted successfully.") == "System Notification"
-    def test_cpu_usage(self):
-        assert classify_with_regex("CPU usage at 98% for the last 10 minutes on node-7") == "System Notification"
-    def test_health_check_passed(self):
-        assert classify_with_regex("Health check passed for service auth-api") == "System Notification"
-    def test_cron_executed(self):
-        assert classify_with_regex("Cron job cleanup-tokens executed successfully.") == "System Notification"
-    def test_certificate_renewed(self):
-        assert classify_with_regex("Certificate renewed successfully for domain api.example.com") == "System Notification"
-class TestError:
-    def test_system_crashed(self):
-        assert classify_with_regex("System crashed due to disk I/O failure on node-3") == "Error"
-    def test_db_connection_failed(self):
-        assert classify_with_regex("Database connection failed after 3 retries") == "Error"
-    def test_service_down(self):
-        assert classify_with_regex("Service payments-api is down") == "Error"
-    def test_request_timeout(self):
-        assert classify_with_regex("Connection timed out after 30s on shard-7") == "Error"
-class TestCriticalError:
-    def test_critical_prefix(self):
-        assert classify_with_regex("CRITICAL: data corruption detected on shard-14") == "Critical Error"
-    def test_fatal(self):
-        assert classify_with_regex("FATAL: kernel panic — system halted") == "Critical Error"
-    def test_data_loss(self):
-        assert classify_with_regex("data loss detected during write to replica-3") == "Critical Error"
-    def test_oom(self):
-        assert classify_with_regex("out-of-memory error: process killed (OOM)") == "Critical Error"
-# ── Negative cases: must return None (don't mis-classify) ────────────────────
-class TestNegativeCases:
-    @pytest.mark.parametrize("log", [
-        "Case escalation for ticket ID 7324 failed because the assigned support agent is no longer active.",
-        "The 'ReportGenerator' module will be retired in version 4.0.",
-        "The 'BulkEmailSender' feature will be deprecated in v5.0.",
-        "Invoice generation aborted for order ID 8910 due to invalid tax calculation module.",
-        "Hey bro chill ya!",
-    ])
-    def test_no_false_positives(self, log):
-        result = classify_with_regex(log)
-        assert result is None, f"Expected None but got '{result}' for: {log[:80]}"
-# ── Coverage test ─────────────────────────────────────────────────────────────
-class TestCoverage:
-    BALANCED_SAMPLE = [
-        # HTTP (6)
-        "GET /api/v2/resource HTTP/1.1 status: 200 len: 100 time: 0.1",
-        "POST /v1/users HTTP/1.1 status: 201 len: 42 time: 0.05",
-        "nova.osapi_compute.wsgi.server GET /v2/servers/detail HTTP/1.1 status: 200",
-        "DELETE /v1/items/99 HTTP/1.1 status: 204 len: 0 time: 0.01",
-        "PATCH /v1/config HTTP/1.1 status: 200 len: 55 time: 0.04",
-        "API call /invoices returned HTTP 500 in 2.1s",
-        # Security (6)
-        "Multiple login failures occurred on user 6454 account",
-        "IP 10.0.0.5 blocked due to potential attack",
-        "Brute force login attempt from 192.168.1.1 detected",
-        "Admin access escalation detected for user 9429",
-        "Suspicious login activity detected from 1.2.3.4",
-        "Potential DDoS attack from 203.0.113.1 detected",
-        # User Action (5)
-        "User User12345 logged in.",
-        "User User99 logged out.",
-        "Account with ID 456 created by Admin.",
-        "User User42 changed password successfully.",
-        "New user User9999 registered with email u@e.com.",
-        # System Notification (5)
-        "Backup completed successfully.",
-        "System updated to version 4.2.1.",
-        "Disk cleanup completed successfully.",
-        "CPU usage at 98% for the last 10 minutes on node-7",
-        "Cron job cleanup-tokens executed successfully.",
-        # Error (4)
-        "System crashed due to disk I/O failure on node-3",
-        "Database connection failed after 3 retries",
-        "Service auth-api is down",
-        "Connection timed out after 30s",
-        # Critical (3)
-        "CRITICAL: data corruption detected on shard-14",
-        "FATAL: kernel panic — system halted",
-        "data loss detected during write to replica-3",
-        # LegacyCRM / unmatched (5) → should NOT match
-        "Case escalation for ticket ID 7324 failed.",
-        "The 'BulkEmailSender' feature will be deprecated in v5.0.",
-        "Invoice generation aborted for order ID 8910.",
-        "Workflow stalled at approval step 3 for case 9021.",
-        "SLA breach detected for case ID 7701 (P1 4h breach).",
-    ]
-    def test_coverage_above_35_percent(self):
-        result = get_regex_coverage(self.BALANCED_SAMPLE)
-        pct    = result["coverage_pct"]
-        # 29 of 34 logs should match regex (29/34 = 85%)
-        # 5 LegacyCRM logs should NOT match → ~85% expected
-        assert pct >= 35.0, (
-            f"Regex coverage {pct}% is below 35% minimum. "
-            f"Check pattern additions in processor_regex.py"
-        )
-    def test_no_false_positive_on_legacy_logs(self):
-        legacy_logs = [
-            "Case escalation for ticket ID 7324 failed.",
-            "The 'BulkEmailSender' feature will be deprecated in v5.0.",
-            "Invoice generation aborted for order ID 8910.",
-        ]
-        for log in legacy_logs:
-            result = classify_with_regex(log)
-            assert result is None, f"False positive: '{result}' on legacy log: {log}"

HF/test/test_routing.py DELETED Viewed

@@ -1,179 +0,0 @@
-"""
-tests/test_routing.py — Pipeline Routing Tests
-Tests verify:
-  1. LegacyCRM source → LLM tier (always)
-  2. Regex match → Regex tier (never reaches BERT)
-  3. High-confidence BERT → BERT tier
-  4. Unclassified BERT → LLM fallback tier
-  5. Result schema is complete (all keys present)
-Run:
-  pytest tests/test_routing.py -v
-"""
-import sys, os
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
-import pytest
-from unittest.mock import patch, MagicMock
-from classify import classify_log, classify_logs, pipeline_summary
-# ── Fixtures ──────────────────────────────────────────────────────────────────
-REGEX_HIT_LOG    = ("ModernCRM",   "User User123 logged in.")
-REGEX_HIT_LOG2   = ("BillingSystem", "GET /api/v1/invoices HTTP/1.1 status: 200 len: 100 time: 0.1")
-LEGACY_LOG       = ("LegacyCRM",   "Case escalation for ticket 9021 failed.")
-NON_REGEX_LOG    = ("ModernHR",    "The inventory sync completed without matching standard patterns.")
-# ── Schema completeness ───────────────────────────────────────────────────────
-class TestResultSchema:
-    def test_classify_log_has_required_keys(self):
-        with patch("classify.bert_batch", return_value=[("Error", 0.95)]):
-            result = classify_log(*NON_REGEX_LOG)
-        assert "label"      in result
-        assert "tier"       in result
-        assert "confidence" in result
-        assert "latency_ms" in result
-    def test_latency_ms_is_positive(self):
-        result = classify_log(*REGEX_HIT_LOG)
-        assert result["latency_ms"] > 0
-    def test_confidence_is_float_or_none(self):
-        result = classify_log(*REGEX_HIT_LOG)
-        assert result["confidence"] is None or isinstance(result["confidence"], float)
-# ── Regex tier routing ─────────────────────────────────────────────────────────
-class TestRegexRouting:
-    def test_regex_match_returns_regex_tier(self):
-        result = classify_log(*REGEX_HIT_LOG)
-        assert result["tier"] == "Regex"
-    def test_regex_match_has_full_confidence(self):
-        result = classify_log(*REGEX_HIT_LOG)
-        assert result["confidence"] == 1.0
-    def test_regex_match_http_log(self):
-        result = classify_log(*REGEX_HIT_LOG2)
-        assert result["tier"] == "Regex"
-        assert result["label"] == "HTTP Status"
-    def test_regex_match_skips_bert(self):
-        """If regex matches, bert_batch should never be called."""
-        with patch("classify.bert_batch") as mock_bert:
-            classify_log(*REGEX_HIT_LOG)
-            mock_bert.assert_not_called()
-# ── LegacyCRM routing ─────────────────────────────────────────────────────────
-class TestLegacyCRMRouting:
-    def test_legacy_crm_goes_to_llm(self):
-        with patch("classify.classify_with_llm", return_value="Workflow Error") as mock_llm:
-            result = classify_log(*LEGACY_LOG)
-        assert result["tier"] == "LLM"
-        mock_llm.assert_called_once()
-    def test_legacy_crm_skips_regex(self):
-        """LegacyCRM should skip regex entirely — go straight to LLM."""
-        with patch("classify.classify_with_regex") as mock_regex, \
-             patch("classify.classify_with_llm", return_value="Workflow Error"):
-            classify_log(*LEGACY_LOG)
-            mock_regex.assert_not_called()
-    def test_legacy_crm_skips_bert(self):
-        with patch("classify.bert_batch") as mock_bert, \
-             patch("classify.classify_with_llm", return_value="Workflow Error"):
-            classify_log(*LEGACY_LOG)
-            mock_bert.assert_not_called()
-# ── BERT routing ──────────────────────────────────────────────────────────────
-class TestBERTRouting:
-    def test_high_confidence_bert_stays_bert(self):
-        with patch("classify.bert_batch", return_value=[("Security Alert", 0.95)]):
-            result = classify_log(*NON_REGEX_LOG)
-        assert result["tier"] == "BERT"
-        assert result["label"] == "Security Alert"
-        assert result["confidence"] == pytest.approx(0.95)
-    def test_low_confidence_bert_falls_back_to_llm(self):
-        """BERT returning 'Unclassified' should escalate to LLM."""
-        with patch("classify.bert_batch", return_value=[("Unclassified", 0.20)]), \
-             patch("classify.classify_with_llm", return_value="Workflow Error") as mock_llm:
-            result = classify_log(*NON_REGEX_LOG)
-        assert "LLM" in result["tier"]
-        mock_llm.assert_called_once()
-    def test_bert_batch_called_for_non_regex_log(self):
-        with patch("classify.bert_batch", return_value=[("Error", 0.88)]) as mock_bert:
-            classify_log(*NON_REGEX_LOG)
-            mock_bert.assert_called_once()
-# ── Batch routing ──────────────────────────────────────────────────────────────
-class TestBatchRouting:
-    def test_batch_returns_correct_length(self):
-        logs = [REGEX_HIT_LOG, REGEX_HIT_LOG2, LEGACY_LOG]
-        with patch("classify.classify_with_llm", return_value="Workflow Error"):
-            results = classify_logs(logs)
-        assert len(results) == len(logs)
-    def test_batch_mixed_tiers(self):
-        logs = [
-            REGEX_HIT_LOG,                # → Regex
-            ("ModernCRM", "GET /api HTTP/1.1 status: 200"),  # → Regex (HTTP)
-            LEGACY_LOG,                   # → LLM
-        ]
-        with patch("classify.classify_with_llm", return_value="Workflow Error"):
-            results = classify_logs(logs)
-        assert results[0]["tier"] == "Regex"
-        assert results[1]["tier"] == "Regex"
-        assert results[2]["tier"] == "LLM"
-    def test_pipeline_summary_structure(self):
-        logs = [REGEX_HIT_LOG, REGEX_HIT_LOG2]
-        results = classify_logs(logs)
-        summary = pipeline_summary(results)
-        assert "total"        in summary
-        assert "tier_stats"   in summary
-        assert "label_counts" in summary
-        assert summary["total"] == 2
-    def test_pipeline_summary_tier_pcts_sum_to_100(self):
-        logs = [REGEX_HIT_LOG, REGEX_HIT_LOG2, REGEX_HIT_LOG]
-        results = classify_logs(logs)
-        summary = pipeline_summary(results)
-        total_pct = sum(s["pct"] for s in summary["tier_stats"].values())
-        assert abs(total_pct - 100.0) < 1.0, f"Tier pcts don't sum to 100: {total_pct}"
-# ── Edge cases ────────────────────────────────────────────────────────────────
-class TestEdgeCases:
-    def test_empty_batch_returns_empty(self):
-        results = classify_logs([])
-        assert results == []
-    def test_single_log_batch(self):
-        with patch("classify.bert_batch", return_value=[("Error", 0.85)]):
-            results = classify_logs([NON_REGEX_LOG])
-        assert len(results) == 1
-    def test_all_regex_batch_never_calls_bert(self):
-        logs = [REGEX_HIT_LOG] * 10
-        with patch("classify.bert_batch") as mock_bert:
-            classify_logs(logs)
-            mock_bert.assert_not_called()
-    def test_llm_failure_returns_unclassified(self):
-        """LLM crashing should return Unclassified, not raise."""
-        with patch("classify.classify_with_llm", side_effect=Exception("LLM down")):
-            try:
-                result = classify_log(*LEGACY_LOG)
-                # If it doesn't raise, Unclassified should be label
-                assert result["label"] == "Unclassified"
-            except Exception:
-                pytest.fail("classify_log raised an exception — should have returned Unclassified")