Spaces:

NOT-OMEGA
/

LogAI-Engine

Sleeping

App Files Files Community

NOT-OMEGA commited on Apr 14

Commit

4561114

verified ·

1 Parent(s): 0c1b7a7

Upload 10 files

Browse files

Files changed (10) hide show

HF/Dockerfile +32 -0
HF/api.py +314 -0
HF/app_gradio.py +187 -0
HF/benchmark.py +214 -0
HF/classify.py +198 -0
HF/error_analysis.py +250 -0
HF/processor_bert.py +216 -0
HF/processor_llm.py +192 -0
HF/processor_regex.py +220 -0
HF/requirements.txt +30 -0

HF/Dockerfile ADDED Viewed

	@@ -0,0 +1,32 @@

+FROM python:3.11-slim
+# Build args
+ARG PORT=8000
+# System deps
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+# Install Python deps first (layer cache)
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy source
+COPY . .
+# Non-root user
+RUN adduser --disabled-password --gecos "" appuser \
+    && chown -R appuser:appuser /app
+USER appuser
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
+    CMD curl -f http://localhost:${PORT}/health || exit 1
+EXPOSE ${PORT}
+# Production: single worker (CPU-bound inference — scale via replicas, not threads)
+CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "1", "--log-level", "info"]

HF/api.py ADDED Viewed

	@@ -0,0 +1,314 @@

+"""
+api.py — Async FastAPI Inference Service
+Endpoints:
+  POST /classify          — Single log
+  POST /classify/batch    — Batch of logs (up to 512)
+  GET  /health            — Liveness check
+  GET  /ready             — Readiness check (model loaded?)
+  GET  /metrics           — Request counts, throughput, latency stats
+Features:
+  - Async request handling (non-blocking)
+  - Worker pool via asyncio semaphore (bounded concurrency)
+  - Structured JSON logs with request_id
+  - Rate limiting (configurable)
+  - Request ID tracing
+  - Batch queue aggregation for small requests
+Run:
+  uvicorn api:app --host 0.0.0.0 --port 8000 --workers 1
+Example:
+  curl -X POST http://localhost:8000/classify \
+       -H "Content-Type: application/json" \
+       -d '{"source": "ModernCRM", "log_message": "User User123 logged in."}'
+"""
+from __future__ import annotations
+import asyncio
+import logging
+import os
+import time
+import uuid
+import statistics
+from collections import deque
+from contextlib import asynccontextmanager
+from typing import Optional
+from fastapi import FastAPI, HTTPException, Request, status
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel, Field, field_validator
+# ── Logging setup ─────────────────────────────────────────────────────────────
+logging.basicConfig(
+    level=logging.INFO,
+    format='{"time":"%(asctime)s","level":"%(levelname)s","logger":"%(name)s","msg":"%(message)s"}'
+)
+logger = logging.getLogger("log-classifier-api")
+# ── Config ─────────────────────────────────────────────────────────────────────
+MAX_BATCH_SIZE      = int(os.getenv("MAX_BATCH_SIZE", "512"))
+MAX_CONCURRENT      = int(os.getenv("MAX_CONCURRENT", "4"))   # concurrency cap
+RATE_LIMIT_PER_MIN  = int(os.getenv("RATE_LIMIT_PER_MIN", "1000"))
+LOG_MAX_CHARS       = 2048   # truncate huge logs before classify
+# ── Global state ───────────────────────────────────────────────────────────────
+_semaphore: asyncio.Semaphore = None   # type: ignore
+_model_ready: bool = False
+# Metrics ring buffer (last 1000 requests)
+_latencies_ms: deque = deque(maxlen=1000)
+_request_count = 0
+_error_count   = 0
+_start_time    = time.time()
+# Rate limiter (simple sliding window per process)
+_rate_window: deque = deque(maxlen=RATE_LIMIT_PER_MIN)
+# ── Lifespan: load models on startup ──────────────────────────────────────────
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    global _semaphore, _model_ready
+    logger.info("Starting up — loading models…")
+    _semaphore = asyncio.Semaphore(MAX_CONCURRENT)
+    # Load models in a thread pool (blocking I/O, don't block event loop)
+    loop = asyncio.get_event_loop()
+    try:
+        await loop.run_in_executor(None, _load_models_blocking)
+        _model_ready = True
+        logger.info("✅ Models loaded — API ready")
+    except Exception as e:
+        logger.error(f"❌ Model load failed: {e}")
+        # Service starts but /ready will return 503
+    yield
+    logger.info("Shutting down")
+def _load_models_blocking():
+    """Load BERT + classifier (blocks — run in executor)."""
+    from processor_bert import classify_batch as _
+    logger.info("BERT model loaded")
+# ── App factory ────────────────────────────────────────────────────────────────
+app = FastAPI(
+    title="Log Classification API",
+    description="3-tier hybrid pipeline: Regex → BERT → LLM",
+    version="3.0.0",
+    lifespan=lifespan,
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# ── Request / Response schemas ─────────────────────────────────────────────────
+class LogRequest(BaseModel):
+    source:      str = Field(..., example="ModernCRM")
+    log_message: str = Field(..., example="User User123 logged in.", min_length=1)
+    @field_validator("log_message")
+    @classmethod
+    def truncate_long_logs(cls, v: str) -> str:
+        return v[:LOG_MAX_CHARS]
+class LogResponse(BaseModel):
+    request_id:  str
+    label:       str
+    tier:        str
+    confidence:  Optional[float]
+    latency_ms:  float
+    cached:      bool = False
+class BatchRequest(BaseModel):
+    logs: list[LogRequest] = Field(..., max_length=MAX_BATCH_SIZE)
+class BatchResponse(BaseModel):
+    request_id:   str
+    total:        int
+    elapsed_ms:   float
+    throughput:   float
+    results:      list[LogResponse]
+class HealthResponse(BaseModel):
+    status:    str
+    uptime_s:  float
+class MetricsResponse(BaseModel):
+    total_requests:  int
+    total_errors:    int
+    uptime_s:        float
+    requests_per_min: float
+    latency_p50_ms:  Optional[float]
+    latency_p95_ms:  Optional[float]
+    latency_p99_ms:  Optional[float]
+# ── Rate limiter ───────────────────────────────────────────────────────────────
+def _check_rate_limit() -> None:
+    now = time.time()
+    _rate_window.append(now)
+    # Window = last 60 seconds
+    recent = [t for t in _rate_window if now - t < 60]
+    if len(recent) > RATE_LIMIT_PER_MIN:
+        raise HTTPException(
+            status_code=status.HTTP_429_TOO_MANY_REQUESTS,
+            detail=f"Rate limit exceeded: {RATE_LIMIT_PER_MIN} req/min",
+        )
+# ── Middleware: request logging ────────────────────────────────────────────────
+@app.middleware("http")
+async def log_requests(request: Request, call_next):
+    rid = request.headers.get("X-Request-ID", str(uuid.uuid4())[:8])
+    request.state.request_id = rid
+    t0 = time.perf_counter()
+    response = await call_next(request)
+    elapsed = (time.perf_counter() - t0) * 1000
+    logger.info(
+        f"method={request.method} path={request.url.path} "
+        f"status={response.status_code} latency={elapsed:.1f}ms rid={rid}"
+    )
+    response.headers["X-Request-ID"] = rid
+    return response
+# ── Health & readiness ─────────────────────────────────────────────────────────
+@app.get("/health", response_model=HealthResponse, tags=["ops"])
+async def health():
+    return {"status": "ok", "uptime_s": round(time.time() - _start_time, 1)}
+@app.get("/ready", tags=["ops"])
+async def ready():
+    if not _model_ready:
+        raise HTTPException(status_code=503, detail="Models not yet loaded")
+    return {"status": "ready"}
+# ── Metrics ────────────────────────────────────────────────────────────────────
+@app.get("/metrics", response_model=MetricsResponse, tags=["ops"])
+async def metrics():
+    uptime = time.time() - _start_time
+    lats   = sorted(_latencies_ms) if _latencies_ms else []
+    n      = len(lats)
+    def pct(p):
+        return round(lats[min(int(n * p), n - 1)], 2) if n else None
+    return {
+        "total_requests":  _request_count,
+        "total_errors":    _error_count,
+        "uptime_s":        round(uptime, 1),
+        "requests_per_min": round(_request_count / max(uptime / 60, 1), 1),
+        "latency_p50_ms":  pct(0.50),
+        "latency_p95_ms":  pct(0.95),
+        "latency_p99_ms":  pct(0.99),
+    }
+# ── Classify single ────────────────────────────────────────────────────────────
+@app.post("/classify", response_model=LogResponse, tags=["inference"])
+async def classify_single(req: LogRequest, request: Request):
+    global _request_count, _error_count
+    _check_rate_limit()
+    _request_count += 1
+    rid = getattr(request.state, "request_id", str(uuid.uuid4())[:8])
+    async with _semaphore:
+        loop = asyncio.get_event_loop()
+        t0   = time.perf_counter()
+        try:
+            result = await loop.run_in_executor(
+                None, _classify_blocking, req.source, req.log_message
+            )
+        except Exception as e:
+            _error_count += 1
+            logger.error(f"rid={rid} classify error: {e}")
+            raise HTTPException(status_code=500, detail=str(e))
+    latency = (time.perf_counter() - t0) * 1000
+    _latencies_ms.append(latency)
+    return LogResponse(
+        request_id = rid,
+        label      = result["label"],
+        tier       = result["tier"],
+        confidence = result.get("confidence"),
+        latency_ms = round(latency, 2),
+    )
+def _classify_blocking(source: str, log_message: str) -> dict:
+    from classify import classify_log
+    return classify_log(source, log_message)
+# ── Classify batch ─────────────────────────────────────────────────────────────
+@app.post("/classify/batch", response_model=BatchResponse, tags=["inference"])
+async def classify_batch_endpoint(req: BatchRequest, request: Request):
+    global _request_count, _error_count
+    _check_rate_limit()
+    _request_count += 1
+    rid = getattr(request.state, "request_id", str(uuid.uuid4())[:8])
+    log_pairs = [(r.source, r.log_message) for r in req.logs]
+    async with _semaphore:
+        loop = asyncio.get_event_loop()
+        t0   = time.perf_counter()
+        try:
+            results = await loop.run_in_executor(
+                None, _classify_batch_blocking, log_pairs
+            )
+        except Exception as e:
+            _error_count += 1
+            logger.error(f"rid={rid} batch error: {e}")
+            raise HTTPException(status_code=500, detail=str(e))
+    elapsed_ms = (time.perf_counter() - t0) * 1000
+    throughput = round(len(log_pairs) / (elapsed_ms / 1000), 1)
+    _latencies_ms.extend([elapsed_ms / len(log_pairs)] * len(log_pairs))
+    return BatchResponse(
+        request_id = rid,
+        total      = len(log_pairs),
+        elapsed_ms = round(elapsed_ms, 2),
+        throughput = throughput,
+        results    = [
+            LogResponse(
+                request_id = rid,
+                label      = r["label"],
+                tier       = r["tier"],
+                confidence = r.get("confidence"),
+                latency_ms = round(elapsed_ms / len(log_pairs), 2),
+            )
+            for r in results
+        ],
+    )
+def _classify_batch_blocking(log_pairs: list[tuple[str, str]]) -> list[dict]:
+    from classify import classify_logs
+    return classify_logs(log_pairs)
+# ── Dev runner ──────────────────────────────────────────────────────────────────
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run("api:app", host="0.0.0.0", port=8000, reload=False, workers=1)

HF/app_gradio.py ADDED Viewed

	@@ -0,0 +1,187 @@

+"""
+Log Classification System — HuggingFace Spaces
+Gradio UI for the 3-tier hybrid log classification pipeline.
+"""
+from __future__ import annotations
+import io
+import time
+import pandas as pd
+import gradio as gr
+from classify import classify_log, classify_csv
+# ── Source options ──────────────────────────────────────────────────────────
+SOURCES = [
+    "ModernCRM",
+    "ModernHR",
+    "BillingSystem",
+    "AnalyticsEngine",
+    "ThirdPartyAPI",
+    "LegacyCRM",
+]
+TIER_COLORS = {
+    "Regex":        "🟢",
+    "BERT":         "🔵",
+    "LLM":          "🟡",
+    "LLM (fallback)": "🟠",
+}
+EXAMPLE_LOGS = [
+    ["ModernCRM",       "User User12345 logged in."],
+    ["ModernHR",        "Multiple login failures occurred on user 6454 account"],
+    ["BillingSystem",   "GET /v2/servers/detail HTTP/1.1 status: 200 len: 1583 time: 0.19"],
+    ["AnalyticsEngine", "System crashed due to disk I/O failure on node-3"],
+    ["LegacyCRM",       "Case escalation for ticket ID 7324 failed — support agent is no longer active."],
+    ["LegacyCRM",       "The 'BulkEmailSender' feature will be deprecated in v5.0. Use 'EmailCampaignManager'."],
+]
+# ── Single log tab ──────────────────────────────────────────────────────────
+def classify_single(source: str, log_message: str):
+    if not log_message.strip():
+        return "—", "—", "—", "—"
+    t0 = time.perf_counter()
+    result = classify_log(source, log_message)
+    latency_ms = (time.perf_counter() - t0) * 1000
+    label      = result["label"]
+    tier       = result["tier"]
+    confidence = f"{result['confidence']:.1%}" if result["confidence"] is not None else "N/A"
+    icon       = TIER_COLORS.get(tier, "⚪")
+    return (
+        label,
+        f"{icon} {tier}",
+        confidence,
+        f"{latency_ms:.1f} ms",
+    )
+# ── Batch CSV tab ───────────────────────────────────────────────────────────
+def classify_batch(file):
+    if file is None:
+        return None, "⚠️ Please upload a CSV file."
+    try:
+        output_path, df = classify_csv(file.name, "/tmp/classified_output.csv")
+    except ValueError as e:
+        return None, f"⚠️ {e}"
+    except Exception as e:
+        return None, f"❌ Error: {e}"
+    total = len(df)
+    tier_counts  = df["tier_used"].value_counts().to_dict()
+    label_counts = df["predicted_label"].value_counts().to_dict()
+    tier_lines  = "\n".join(f"  {TIER_COLORS.get(k,'⚪')} {k}: {v} ({v/total:.0%})" for k, v in tier_counts.items())
+    label_lines = "\n".join(f"  • {k}: {v}" for k, v in label_counts.items())
+    stats = (
+        f"✅ Classified {total} logs\n\n"
+        f"📊 Tier breakdown:\n{tier_lines}\n\n"
+        f"🏷️ Label distribution:\n{label_lines}"
+    )
+    return output_path, stats
+# ── UI ──────────────────────────────────────────────────────────────────────
+with gr.Blocks(title="Log Classification System", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""
+# 🔍 Log Classification System
+**3-tier hybrid pipeline** → 🟢 Regex · 🔵 BERT + LogReg · 🟡 LLM
+Built to mimic production enterprise log monitoring architecture.
+""")
+    with gr.Tabs():
+        # ── Tab 1: Single Log ────────────────────────────────────────────
+        with gr.Tab("Single Log"):
+            with gr.Row():
+                source_input = gr.Dropdown(
+                    choices=SOURCES,
+                    value="ModernCRM",
+                    label="Source System",
+                )
+                log_input = gr.Textbox(
+                    label="Log Message",
+                    placeholder="Paste a log message here...",
+                    lines=3,
+                )
+            classify_btn = gr.Button("Classify", variant="primary")
+            with gr.Row():
+                label_out      = gr.Textbox(label="🏷️ Predicted Label",     interactive=False)
+                tier_out       = gr.Textbox(label="⚙️ Tier Used",           interactive=False)
+                confidence_out = gr.Textbox(label="📈 Confidence",          interactive=False)
+                latency_out    = gr.Textbox(label="⏱️ Latency",             interactive=False)
+            classify_btn.click(
+                fn=classify_single,
+                inputs=[source_input, log_input],
+                outputs=[label_out, tier_out, confidence_out, latency_out],
+            )
+            gr.Examples(
+                examples=EXAMPLE_LOGS,
+                inputs=[source_input, log_input],
+                label="📋 Example Logs (click to try)",
+            )
+        # ── Tab 2: Batch CSV ─────────────────────────────────────────────
+        with gr.Tab("Batch CSV Upload"):
+            gr.Markdown("""
+Upload a CSV with columns: **`source`**, **`log_message`**
+Download the classified CSV with added columns: `predicted_label`, `tier_used`, `confidence`.
+""")
+            with gr.Row():
+                with gr.Column():
+                    csv_input  = gr.File(label="📂 Upload CSV", file_types=[".csv"])
+                    batch_btn  = gr.Button("Classify All", variant="primary")
+                with gr.Column():
+                    csv_output = gr.File(label="📥 Download Classified CSV")
+                    stats_out  = gr.Textbox(label="📊 Stats", lines=12, interactive=False)
+            batch_btn.click(
+                fn=classify_batch,
+                inputs=[csv_input],
+                outputs=[csv_output, stats_out],
+            )
+            gr.Markdown("""
+**Sample CSV format:**
+```
+source,log_message
+ModernCRM,User User123 logged in.
+LegacyCRM,Case escalation for ticket ID 7324 failed.
+BillingSystem,GET /api/v2/invoice HTTP/1.1 status: 500
+```
+""")
+        # ── Tab 3: Architecture ──────────────────────────────────────────
+        with gr.Tab("Architecture"):
+            gr.Markdown("""
+## 🏗️ 3-Tier Hybrid Pipeline
+| Tier | Method | Coverage | Latency | When Used |
+|------|--------|----------|---------|-----------|
+| 🟢 Regex | Python `re` patterns | ~21% | < 1ms | Fixed patterns (login, backup, etc.) |
+| 🔵 BERT | `all-MiniLM-L6-v2` + LogReg | ~79% | 20–80ms | High-volume categories with 150+ samples |
+| 🟡 LLM | HuggingFace Inference API | ~0.3% | 500–2000ms | LegacyCRM logs, rare patterns |
+## 📊 Model Performance (from training)
+- **BERT + LogReg** trained on 2,410 synthetic enterprise logs
+- **Confidence threshold**: 0.5 (below → escalate to LLM)
+- **Source-aware routing**: `LegacyCRM` bypasses ML entirely (only 7 training samples)
+## 🔑 Environment Variables
+| Secret | Required For |
+|--------|-------------|
+| `HF_TOKEN` | LLM inference (LegacyCRM logs) |
+""")
+if __name__ == "__main__":
+    demo.launch()

HF/benchmark.py ADDED Viewed

	@@ -0,0 +1,214 @@

+"""
+benchmark.py — Full Benchmark Harness
+Outputs a CSV with columns:
+  batch_size, mode, throughput_logs_sec, p50_ms, p95_ms, p99_ms, cpu_pct, ram_mb, tier_regex_pct, tier_bert_pct, tier_llm_pct
+Usage:
+  python benchmark.py --logs 5000 --output benchmark_results.csv
+What it measures:
+  - Batch size sweep: 1, 8, 16, 32, 64, 128
+  - Throughput (logs/sec)
+  - Latency: p50 / p95 / p99 (per-log)
+  - CPU and RAM during inference
+  - Tier distribution (Regex % / BERT % / LLM %)
+Google interview talking point:
+  "I designed a benchmark harness that sweeps batch sizes and measures
+   latency percentiles + resource utilization, so I can show the
+   throughput-latency tradeoff curve empirically."
+"""
+from __future__ import annotations
+import argparse
+import csv
+import os
+import random
+import sys
+import time
+import statistics
+from pathlib import Path
+import psutil
+# ── Synthetic log generator (no external deps needed) ────────────────────────
+SOURCES = ["ModernCRM", "ModernHR", "BillingSystem", "AnalyticsEngine", "ThirdPartyAPI"]
+_LOG_TEMPLATES = [
+    ("ModernCRM",       "User User{id} logged in."),
+    ("ModernCRM",       "IP {ip} blocked due to potential attack"),
+    ("ModernHR",        "Multiple login failures occurred on user {id} account"),
+    ("ModernHR",        "Admin access escalation detected for user {id}"),
+    ("BillingSystem",   "GET /api/v2/invoices HTTP/1.1 status: {code} len: {len} time: {t}"),
+    ("BillingSystem",   "POST /api/v1/payments HTTP/1.1 status: {code} len: {len} time: {t}"),
+    ("AnalyticsEngine", "System crashed due to disk I/O failure on node-{n}"),
+    ("AnalyticsEngine", "Backup completed successfully."),
+    ("ThirdPartyAPI",   "Service payments-api is unreachable after 3 retries"),
+    ("ThirdPartyAPI",   "CPU usage at {pct}% for the last 10 minutes on node-{n}"),
+    ("AnalyticsEngine", "CRITICAL: data corruption detected on shard-{n}"),
+    ("ModernCRM",       "Health check passed for service {svc}"),
+]
+def _rand_ip():
+    return f"{random.randint(10,192)}.{random.randint(0,255)}.{random.randint(0,255)}.{random.randint(1,254)}"
+def _fill(template: str) -> str:
+    return (template
+        .replace("{id}",  str(random.randint(100, 99999)))
+        .replace("{ip}",  _rand_ip())
+        .replace("{code}", random.choice(["200", "201", "400", "404", "500", "503"]))
+        .replace("{len}",  str(random.randint(100, 9999)))
+        .replace("{t}",    f"{random.uniform(0.01, 2.5):.2f}")
+        .replace("{n}",    str(random.randint(1, 20)))
+        .replace("{pct}",  str(random.randint(60, 99)))
+        .replace("{svc}",  random.choice(["auth-api", "billing", "analytics", "events"]))
+    )
+def generate_logs(n: int) -> list[tuple[str, str]]:
+    random.seed(42)
+    return [
+        (src, _fill(tmpl))
+        for src, tmpl in random.choices(_LOG_TEMPLATES, k=n)
+    ]
+# ── Benchmark runner ─────────────────────────────────────────────────────────
+def run_benchmark(
+    logs: list[tuple[str, str]],
+    batch_sizes: list[int],
+    output_csv: str,
+    warmup_n: int = 50,
+) -> list[dict]:
+    from classify import classify_logs, pipeline_summary
+    proc = psutil.Process(os.getpid())
+    rows: list[dict] = []
+    # Warmup (model load, JIT, etc.)
+    print(f"🔥 Warming up with {warmup_n} logs…")
+    classify_logs(logs[:warmup_n])
+    for bs in batch_sizes:
+        # Slice logs into batches of size `bs`
+        batches = [logs[i:i + bs] for i in range(0, len(logs), bs)]
+        if not batches:
+            continue
+        per_log_latencies: list[float] = []
+        cpu_samples: list[float] = []
+        ram_samples: list[float] = []
+        all_results: list[dict] = []
+        print(f"\n📐 Batch size = {bs} ({len(batches)} batches × {bs} logs)…")
+        wall_start = time.perf_counter()
+        for batch in batches:
+            t0         = time.perf_counter()
+            results    = classify_logs(batch)
+            t1         = time.perf_counter()
+            batch_ms   = (t1 - t0) * 1000
+            per_log_ms = batch_ms / len(batch)
+            per_log_latencies.extend([per_log_ms] * len(batch))
+            all_results.extend(results)
+            # Resource snapshot
+            cpu_samples.append(proc.cpu_percent(interval=None))
+            ram_samples.append(proc.memory_info().rss / 1_048_576)  # MB
+        wall_elapsed = time.perf_counter() - wall_start
+        total_logs   = len(logs)
+        throughput   = round(total_logs / wall_elapsed, 1)
+        per_log_latencies.sort()
+        n = len(per_log_latencies)
+        summary = pipeline_summary(all_results)
+        tier_stats = summary["tier_stats"]
+        def tier_pct(name):
+            return tier_stats.get(name, {}).get("pct", 0.0)
+        row = {
+            "batch_size":       bs,
+            "total_logs":       total_logs,
+            "elapsed_sec":      round(wall_elapsed, 2),
+            "throughput_logs_sec": throughput,
+            "p50_ms":           round(statistics.median(per_log_latencies), 3),
+            "p95_ms":           round(per_log_latencies[min(int(n * 0.95), n - 1)], 3),
+            "p99_ms":           round(per_log_latencies[min(int(n * 0.99), n - 1)], 3),
+            "mean_ms":          round(statistics.mean(per_log_latencies), 3),
+            "cpu_mean_pct":     round(statistics.mean(cpu_samples), 1) if cpu_samples else 0,
+            "cpu_max_pct":      round(max(cpu_samples), 1) if cpu_samples else 0,
+            "ram_mean_mb":      round(statistics.mean(ram_samples), 1) if ram_samples else 0,
+            "ram_max_mb":       round(max(ram_samples), 1) if ram_samples else 0,
+            "tier_regex_pct":   tier_pct("Regex"),
+            "tier_bert_pct":    tier_pct("BERT"),
+            "tier_llm_pct":     tier_pct("LLM") + tier_pct("LLM (fallback)"),
+        }
+        rows.append(row)
+        print(f"  ✅ Throughput: {throughput} logs/sec | "
+              f"p50={row['p50_ms']}ms p95={row['p95_ms']}ms p99={row['p99_ms']}ms | "
+              f"CPU={row['cpu_mean_pct']}% RAM={row['ram_mean_mb']}MB")
+        print(f"  📊 Tiers: Regex={row['tier_regex_pct']}% "
+              f"BERT={row['tier_bert_pct']}% "
+              f"LLM={row['tier_llm_pct']}%")
+    # Write CSV
+    Path(output_csv).parent.mkdir(parents=True, exist_ok=True)
+    with open(output_csv, "w", newline="") as f:
+        writer = csv.DictWriter(f, fieldnames=rows[0].keys())
+        writer.writeheader()
+        writer.writerows(rows)
+    print(f"\n✅ Benchmark results saved → {output_csv}")
+    return rows
+# ── Scaling stress test ──────────────────────────────────────────────────────
+def stress_test(sizes: list[int] = [5_000, 20_000, 50_000, 100_000]) -> None:
+    """Quick throughput check at different total log counts."""
+    from classify import classify_logs
+    print("\n🔥 Stress Test — Scaling")
+    print(f"{'N logs':>10} {'Elapsed(s)':>12} {'Throughput':>12} {'p95_ms':>10}")
+    print("─" * 50)
+    for n in sizes:
+        logs = generate_logs(n)
+        t0   = time.perf_counter()
+        classify_logs(logs)
+        elapsed = time.perf_counter() - t0
+        tput = n / elapsed
+        # Rough p95 approximation: time / n * correction factor
+        p95_approx = (elapsed / n * 1000) * 1.5
+        print(f"{n:>10,} {elapsed:>12.2f}s {tput:>12.1f}/s {p95_approx:>10.1f}ms")
+# ── CLI ──────────────────────────────────────────────────────────────────────
+def main():
+    parser = argparse.ArgumentParser(description="Log pipeline benchmark harness")
+    parser.add_argument("--logs",    type=int, default=5_000,
+                        help="Number of logs to benchmark (default: 5000)")
+    parser.add_argument("--output",  default="benchmark_results.csv",
+                        help="Output CSV path")
+    parser.add_argument("--stress",  action="store_true",
+                        help="Run scaling stress test (5k, 20k, 50k, 100k)")
+    parser.add_argument("--batches", default="1,8,16,32,64,128",
+                        help="Comma-separated batch sizes to sweep")
+    args = parser.parse_args()
+    batch_sizes = [int(x) for x in args.batches.split(",")]
+    logs = generate_logs(args.logs)
+    print(f"📦 Generated {len(logs):,} synthetic logs")
+    run_benchmark(logs, batch_sizes, args.output)
+    if args.stress:
+        stress_test()
+if __name__ == "__main__":
+    main()

HF/classify.py ADDED Viewed

	@@ -0,0 +1,198 @@

+"""
+classify.py — 3-Tier Hybrid Pipeline (V3 — Latency-Tracked)
+Architecture:
+  LegacyCRM → LLM directly
+  Others    → Regex → BERT (batch) → LLM fallback
+Changes in V3:
+  - Tier-wise latency tracking (regex_ms, bert_ms, llm_ms)
+  - Pipeline summary with p50/p95 per tier
+  - Defensive: LLM timeout + retry baked in via processor_llm
+  - classify_logs returns richer result dict
+"""
+from __future__ import annotations
+import time
+import statistics
+import pandas as pd
+from processor_regex import classify_with_regex
+from processor_bert  import classify_batch as bert_batch
+from processor_llm   import classify_with_llm
+LEGACY_SOURCE = "LegacyCRM"
+# ── Result type ─────────────────────────────────────────────────────────────
+def _make_result(label: str, tier: str, confidence, latency_ms: float) -> dict:
+    return {
+        "label":      label,
+        "tier":       tier,
+        "confidence": confidence,
+        "latency_ms": round(latency_ms, 3),
+    }
+# ── Single log (backward-compatible) ────────────────────────────────────────
+def classify_log(source: str, log_msg: str) -> dict:
+    """Single log classify karo. Returns label, tier, confidence, latency_ms."""
+    results = classify_logs([(source, log_msg)])
+    return results[0]
+# ── Batch pipeline (main entry point) ───────────────────────────────────────
+def classify_logs(logs: list[tuple[str, str]]) -> list[dict]:
+    """
+    Batch classify with 3-tier routing + per-result latency.
+    Returns list of dicts:
+      { label, tier, confidence, latency_ms }
+    Tier routing:
+      LegacyCRM source → LLM directly
+      Regex match      → done (sub-ms)
+      Remainder        → BERT batch → LLM if low confidence
+    """
+    n       = len(logs)
+    results = [None] * n
+    # ── Step 1: Route to groups ─────────────────────────────────────────────
+    llm_indices   = []
+    bert_indices  = []
+    entry_times   = [time.perf_counter()] * n  # approximate per-log start
+    t_route_start = time.perf_counter()
+    for i, (source, log_msg) in enumerate(logs):
+        entry_times[i] = time.perf_counter()
+        if source == LEGACY_SOURCE:
+            llm_indices.append(i)
+        else:
+            t0    = time.perf_counter()
+            label = classify_with_regex(log_msg)
+            t1    = time.perf_counter()
+            if label:
+                results[i] = _make_result(label, "Regex", 1.0, (t1 - t0) * 1000)
+            else:
+                bert_indices.append(i)
+    # ── Step 2: BERT batch ──────────────────────────────────────────────────
+    if bert_indices:
+        bert_msgs = [logs[i][1] for i in bert_indices]
+        t_bert_start = time.perf_counter()
+        bert_results = bert_batch(bert_msgs)
+        t_bert_end   = time.perf_counter()
+        bert_ms_per_log = (t_bert_end - t_bert_start) * 1000 / len(bert_msgs)
+        for idx, (label, conf) in zip(bert_indices, bert_results):
+            if label != "Unclassified":
+                results[idx] = _make_result(label, "BERT", conf, bert_ms_per_log)
+            else:
+                llm_indices.append(idx)
+    # ── Step 3: LLM (LegacyCRM + BERT fallback) ────────────────────────────
+    for i in llm_indices:
+        _, log_msg = logs[i]
+        t0    = time.perf_counter()
+        label = classify_with_llm(log_msg)
+        t1    = time.perf_counter()
+        tier  = "LLM" if logs[i][0] == LEGACY_SOURCE else "LLM (fallback)"
+        results[i] = _make_result(label, tier, None, (t1 - t0) * 1000)
+    return results
+# ── Pipeline summary ─────────────────────────────────────────────────────────
+def pipeline_summary(results: list[dict]) -> dict:
+    """
+    Aggregate stats from classify_logs output.
+    Useful for dashboard and benchmark reporting.
+    """
+    tier_groups: dict[str, list[float]] = {}
+    label_counts: dict[str, int] = {}
+    for r in results:
+        tier = r["tier"]
+        tier_groups.setdefault(tier, []).append(r["latency_ms"])
+        label_counts[r["label"]] = label_counts.get(r["label"], 0) + 1
+    total = len(results)
+    tier_stats = {}
+    for tier, latencies in tier_groups.items():
+        latencies_sorted = sorted(latencies)
+        n = len(latencies_sorted)
+        tier_stats[tier] = {
+            "count":    n,
+            "pct":      round(n / total * 100, 1),
+            "p50_ms":   round(statistics.median(latencies_sorted), 2),
+            "p95_ms":   round(latencies_sorted[min(int(n * 0.95), n - 1)], 2),
+            "p99_ms":   round(latencies_sorted[min(int(n * 0.99), n - 1)], 2),
+            "mean_ms":  round(statistics.mean(latencies_sorted), 2),
+        }
+    return {
+        "total":        total,
+        "tier_stats":   tier_stats,
+        "label_counts": label_counts,
+    }
+# ── CSV batch classify ───────────────────────────────────────────────────────
+def classify_csv(input_path: str, output_path: str = "output.csv") -> tuple[str, pd.DataFrame]:
+    """
+    CSV file classify karo.
+    Required columns: 'source', 'log_message'
+    Output: adds 'predicted_label', 'tier_used', 'confidence', 'latency_ms'
+    """
+    df = pd.read_csv(input_path)
+    required = {"source", "log_message"}
+    if not required.issubset(df.columns):
+        raise ValueError(f"CSV mein ye columns chahiye: {required}. Mila: {set(df.columns)}")
+    log_pairs = list(zip(df["source"], df["log_message"]))
+    results   = classify_logs(log_pairs)
+    df["predicted_label"] = [r["label"]      for r in results]
+    df["tier_used"]        = [r["tier"]       for r in results]
+    df["latency_ms"]       = [r["latency_ms"] for r in results]
+    df["confidence"]       = [
+        f"{r['confidence']:.1%}" if r["confidence"] is not None else "N/A"
+        for r in results
+    ]
+    df.to_csv(output_path, index=False)
+    return output_path, df
+# Aliases
+classify = classify_logs
+# ── Self-test ────────────────────────────────────────────────────────────────
+if __name__ == "__main__":
+    sample = [
+        ("ModernCRM",       "IP 192.168.133.114 blocked due to potential attack"),
+        ("BillingSystem",   "User User12345 logged in."),
+        ("AnalyticsEngine", "File data_6957.csv uploaded successfully by user User265."),
+        ("ModernHR",        "GET /v2/servers/detail HTTP/1.1 status: 200 len: 1583 time: 0.19"),
+        ("ModernHR",        "Admin access escalation detected for user 9429"),
+        ("LegacyCRM",       "Case escalation for ticket ID 7324 failed because the assigned support agent is no longer active."),
+        ("LegacyCRM",       "The 'ReportGenerator' module will be retired in version 4.0."),
+    ]
+    print(f'{"Source":<20} {"Tier":<18} {"Conf":>6} {"Lat(ms)":>8}  {"Label":<25} Log')
+    print("─" * 115)
+    results = classify_logs(sample)
+    for (source, log), r in zip(sample, results):
+        conf = f"{r['confidence']:.0%}" if r["confidence"] else "  N/A"
+        print(f'{source:<20} {r["tier"]:<18} {conf:>6} {r["latency_ms"]:>8.1f}  {r["label"]:<25} {log[:40]}')
+    summary = pipeline_summary(results)
+    print("\n📊 Pipeline Summary:")
+    for tier, stats in summary["tier_stats"].items():
+        print(f"  {tier}: {stats['count']} logs ({stats['pct']}%) | "
+              f"p50={stats['p50_ms']}ms p95={stats['p95_ms']}ms p99={stats['p99_ms']}ms")
+    print("\n🏷️  Label distribution:")
+    for label, count in sorted(summary["label_counts"].items(), key=lambda x: -x[1]):
+        print(f"  • {label}: {count}")

HF/error_analysis.py ADDED Viewed

	@@ -0,0 +1,250 @@

+"""
+error_analysis.py — Deep Dive into Unclassified / Misclassified Logs
+This script addresses the 76 unclassified logs from the 20k run.
+It answers:
+  1. What do these logs look like? (print + group)
+  2. Why did the model fail? (pattern analysis)
+  3. What should we do? (actionable fix suggestions)
+Google interview talking point:
+  "I performed structured error analysis on my model's failure cases.
+   I grouped them by failure type — vocabulary mismatch, ambiguous intent,
+   formatting noise — and used that to drive targeted improvements."
+Usage:
+  python error_analysis.py --input output.csv   # post-classify CSV
+  python error_analysis.py --simulate           # demo with synthetic data
+"""
+from __future__ import annotations
+import argparse
+import re
+import sys
+from collections import Counter, defaultdict
+from typing import Optional
+import pandas as pd
+# ── Failure mode taxonomy ────────────────────────────────────────────────────
+class FailureMode:
+    RARE_VOCAB      = "rare_vocabulary"       # domain-specific terms not in training
+    AMBIGUOUS       = "ambiguous_intent"      # log could match multiple categories
+    LEGACY_FORMAT   = "legacy_format"         # non-standard / old-school formatting
+    TRUNCATED       = "truncated_or_noisy"    # partial / malformed log line
+    NUMERIC_ONLY    = "mostly_numeric"        # ID/code-heavy, no semantic signal
+    MULTI_EVENT     = "multi_event"           # one line, multiple events
+    UNKNOWN         = "unknown"
+def _detect_failure_mode(log: str) -> str:
+    """Heuristic: guess WHY this log was unclassified."""
+    log_l = log.lower()
+    if len(log) < 20:
+        return FailureMode.TRUNCATED
+    # Check ratio of digits to total chars
+    digit_ratio = sum(c.isdigit() for c in log) / max(len(log), 1)
+    if digit_ratio > 0.40:
+        return FailureMode.NUMERIC_ONLY
+    # Looks like it has 2+ events joined
+    if log.count(";") >= 2 or log.count(" AND ") >= 1 or log.count(" | ") >= 2:
+        return FailureMode.MULTI_EVENT
+    # Legacy / unusual format signals
+    legacy_signals = ["ticket", "escalation", "crm", "deprecated", "retire",
+                      "module will be", "workflow", "assigned agent"]
+    if any(s in log_l for s in legacy_signals):
+        return FailureMode.LEGACY_FORMAT
+    # Ambiguity signals — could be error OR security
+    ambiguous_signals = ["failed", "error", "unauthorized", "denied", "blocked"]
+    if sum(1 for s in ambiguous_signals if s in log_l) >= 2:
+        return FailureMode.AMBIGUOUS
+    # Rare vocabulary
+    rare_signals = ["sla", "oncall", "runbook", "pagerduty", "janitor", "gc ", "eviction"]
+    if any(s in log_l for s in rare_signals):
+        return FailureMode.RARE_VOCAB
+    return FailureMode.UNKNOWN
+def _suggest_fix(mode: str) -> str:
+    fixes = {
+        FailureMode.RARE_VOCAB:    "Add 5–10 training examples covering this vocabulary; or add regex rule.",
+        FailureMode.AMBIGUOUS:     "Use multi-label or add a dedicated 'Ambiguous' class; review confidence threshold.",
+        FailureMode.LEGACY_FORMAT: "Route all legacy-format logs to LLM tier; add few-shot examples for LLM prompt.",
+        FailureMode.TRUNCATED:     "Add input validation: reject/flag logs under 15 chars before classification.",
+        FailureMode.NUMERIC_ONLY:  "Add regex patterns for structured numeric formats (job IDs, error codes, etc.).",
+        FailureMode.MULTI_EVENT:   "Pre-process: split multi-event lines on ';' or ' | ' before classifying.",
+        FailureMode.UNKNOWN:       "Manually review and add to training data or LLM few-shot examples.",
+    }
+    return fixes.get(mode, "Manual review required.")
+# ── Core analysis ────────────────────────────────────────────────────────────
+def analyze_unclassified(df: pd.DataFrame, label_col: str = "predicted_label") -> None:
+    """Full error analysis on a classified CSV DataFrame."""
+    unclassified = df[df[label_col] == "Unclassified"].copy()
+    total_unclassified = len(unclassified)
+    if total_unclassified == 0:
+        print("✅ No unclassified logs found!")
+        return
+    print(f"\n{'='*70}")
+    print(f"🔍 ERROR ANALYSIS: {total_unclassified} Unclassified Logs")
+    print(f"{'='*70}\n")
+    # ── Step 1: Print all unclassified logs ─────────────────────────────────
+    log_col = "log_message" if "log_message" in df.columns else df.columns[-1]
+    print(f"{'#':>4}  {'Log Message'}")
+    print("─" * 80)
+    for i, (_, row) in enumerate(unclassified.iterrows(), 1):
+        log = str(row.get(log_col, ""))
+        print(f"{i:>4}. {log[:120]}")
+    # ── Step 2: Group by failure mode ───────────────────────────────────────
+    print(f"\n{'='*70}")
+    print("📂 GROUPING BY FAILURE MODE")
+    print("─" * 70)
+    groups: dict[str, list[str]] = defaultdict(list)
+    for _, row in unclassified.iterrows():
+        log  = str(row.get(log_col, ""))
+        mode = _detect_failure_mode(log)
+        groups[mode].append(log)
+    for mode, logs in sorted(groups.items(), key=lambda x: -len(x[1])):
+        pct = len(logs) / total_unclassified * 100
+        print(f"\n🔹 {mode} — {len(logs)} logs ({pct:.1f}%)")
+        print(f"   💡 Fix: {_suggest_fix(mode)}")
+        print(f"   Examples:")
+        for log in logs[:3]:
+            print(f"     • {log[:110]}")
+    # ── Step 3: Token frequency analysis ────────────────────────────────────
+    print(f"\n{'='*70}")
+    print("📊 COMMON TOKENS IN UNCLASSIFIED LOGS")
+    print("─" * 70)
+    STOPWORDS = {"the", "a", "an", "is", "in", "on", "for", "to", "of",
+                 "and", "or", "by", "at", "with", "has", "was", "be",
+                 "this", "that", "it", "not", "are", "from", "as"}
+    all_tokens: list[str] = []
+    for _, row in unclassified.iterrows():
+        log    = str(row.get(log_col, "")).lower()
+        tokens = re.findall(r"[a-z]{3,}", log)
+        all_tokens.extend(t for t in tokens if t not in STOPWORDS)
+    counter = Counter(all_tokens)
+    print("Top 20 tokens in unclassified logs:")
+    for token, count in counter.most_common(20):
+        bar = "█" * min(count, 40)
+        print(f"  {token:<20} {count:>4}  {bar}")
+    # ── Step 4: Length distribution ─────────────────────────────────────────
+    lengths = unclassified[log_col].apply(lambda x: len(str(x)))
+    print(f"\n{'='*70}")
+    print("📏 LOG LENGTH DISTRIBUTION (Unclassified)")
+    print(f"  Mean:   {lengths.mean():.1f} chars")
+    print(f"  Median: {lengths.median():.1f} chars")
+    print(f"  Min:    {lengths.min()} chars")
+    print(f"  Max:    {lengths.max()} chars")
+    short = (lengths < 30).sum()
+    if short:
+        print(f"  ⚠️  {short} logs under 30 chars — likely truncated/noisy")
+    # ── Step 5: Source breakdown ─────────────────────────────────────────────
+    if "source" in df.columns:
+        print(f"\n{'='*70}")
+        print("🏷️  UNCLASSIFIED BY SOURCE")
+        src_counts = unclassified["source"].value_counts()
+        for src, cnt in src_counts.items():
+            bar = "█" * min(cnt, 40)
+            print(f"  {src:<22} {cnt:>4}  {bar}")
+    # ── Step 6: Actionable summary ───────────────────────────────────────────
+    print(f"\n{'='*70}")
+    print("✅ ACTIONABLE FIXES (Priority Order)")
+    print("─" * 70)
+    dominant_mode = max(groups.items(), key=lambda x: len(x[1]))[0] if groups else FailureMode.UNKNOWN
+    fixes = [
+        (1, "regex",    "Add patterns for top unclassified tokens to processor_regex.py"),
+        (2, "training", "Add 10–20 examples per failure mode to training data"),
+        (3, "llm",      "For LEGACY_FORMAT failures: add to LLM few-shot examples"),
+        (4, "preproc",  "Pre-process: split multi-event logs, reject truncated logs"),
+        (5, "threshold","Tune BERT confidence threshold (currently 0.30 — try 0.40)"),
+    ]
+    for priority, area, fix in fixes:
+        print(f"  {priority}. [{area.upper():^10}] {fix}")
+    print(f"\n📌 Dominant failure mode: '{dominant_mode}' ({len(groups.get(dominant_mode,[]))} logs)")
+    print(f"   Start here: {_suggest_fix(dominant_mode)}\n")
+# ── Simulate 76 unclassified logs for demo ────────────────────────────────────
+def _simulate_unclassified() -> pd.DataFrame:
+    """Generate synthetic 'unclassified' logs that mimic real failure patterns."""
+    logs = [
+        # Legacy format / CRM
+        "Case escalation for ticket ID 9021 failed: agent inactive.",
+        "CRM module 'ReportGenerator' will be retired in v4.1.",
+        "Workflow for approval chain #4421 stalled at step 3.",
+        "SLA breach detected for case ID 7701 (P1, 4h breach).",
+        # Ambiguous
+        "Service auth-api failed and unauthorized access was logged.",
+        "Error: blocked request from 10.0.0.5 — reason unknown.",
+        # Truncated / noisy
+        "ERR",
+        "srv timeout",
+        "node-7",
+        # Numeric-heavy
+        "8821 9001 443 0 0 DROP IN=eth0 OUT= MAC=",
+        "16 0 0 1 2024-01-14 03:21:00.001",
+        # Multi-event
+        "Backup started; disk usage at 92%; health check failed | node-3",
+        # Rare vocab
+        "PagerDuty alert triggered for on-call rotation P1-incident.",
+        "GC eviction: 3.2GB heap compacted in 420ms.",
+        "Janitor job completed: 14,000 stale tokens purged.",
+        "Runbook auto-remediation triggered for alert ALT-9021.",
+    ]
+    # Pad to ~76
+    padded = (logs * 5)[:76]
+    return pd.DataFrame({
+        "source":          ["ModernCRM"] * 30 + ["LegacyCRM"] * 20 + ["AnalyticsEngine"] * 26,
+        "log_message":     padded,
+        "predicted_label": ["Unclassified"] * 76,
+    })
+# ── CLI ──────────────────────────────────────────────────────────────────────
+def main():
+    parser = argparse.ArgumentParser(description="Analyze unclassified/misclassified logs")
+    parser.add_argument("--input",    help="Path to classified CSV from classify_csv()")
+    parser.add_argument("--simulate", action="store_true",
+                        help="Run with synthetic unclassified logs (no CSV needed)")
+    parser.add_argument("--label-col", default="predicted_label",
+                        help="Column name that holds the predicted label")
+    args = parser.parse_args()
+    if args.simulate:
+        df = _simulate_unclassified()
+        print("🎭 Running with SIMULATED 76 unclassified logs…")
+    elif args.input:
+        df = pd.read_csv(args.input)
+    else:
+        parser.print_help()
+        sys.exit(1)
+    analyze_unclassified(df, label_col=args.label_col)
+if __name__ == "__main__":
+    main()

HF/processor_bert.py ADDED Viewed

	@@ -0,0 +1,216 @@

+"""
+processor_bert_fast.py — ONNX Runtime powered BERT classifier
+Speed: 82 logs/s → 2000+ logs/s
+Kaise kaam karta hai:
+1. ONNX Runtime: Normal PyTorch se 3-5x faster
+2. Batch processing: 64 logs ek saath process
+3. Pre-allocated buffers: Memory waste nahi
+"""
+from __future__ import annotations
+import os
+import numpy as np
+import joblib
+# ── Check karo kaunsa method use karna hai ──────────────────
+_USE_ONNX = False
+_embedding_model = None
+_classifier       = None
+_ort_session      = None
+_ort_tokenizer    = None
+MODEL_PATH    = os.path.join(os.path.dirname(__file__), 'models', 'log_classifier.joblib')
+ONNX_DIR      = os.path.join(os.path.dirname(__file__), 'models', 'onnx')
+CONFIDENCE_THRESHOLD = 0.30
+DEFAULT_BATCH = 64
+def _load_models():
+    """Lazily load models — pehli call pe hi load hoga, baar baar nahi."""
+    global _USE_ONNX, _embedding_model, _classifier, _ort_session, _ort_tokenizer
+    if _classifier is not None:
+        return  # Already loaded
+    # ── Classifier load karo ───────────────────────────────
+    if not os.path.exists(MODEL_PATH):
+        raise FileNotFoundError(
+            f'Model nahi mila: {MODEL_PATH}\n'
+            'Pehle Colab notebook run karo aur model download karo.'
+        )
+    _classifier = joblib.load(MODEL_PATH)
+    # ── ONNX try karo (fast), fallback to PyTorch ──────────
+    onnx_model_file = os.path.join(ONNX_DIR, 'model.onnx')
+    if os.path.exists(onnx_model_file):
+        try:
+            import onnxruntime as ort
+            from transformers import AutoTokenizer
+            # CPU optimized session options
+            sess_opts = ort.SessionOptions()
+            sess_opts.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
+            sess_opts.intra_op_num_threads = os.cpu_count()
+            sess_opts.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
+            _ort_session = ort.InferenceSession(
+                onnx_model_file,
+                sess_options=sess_opts,
+                providers=['CPUExecutionProvider']
+            )
+            _ort_tokenizer = AutoTokenizer.from_pretrained(ONNX_DIR)
+            _USE_ONNX = True
+            print('[BERT] ✅ ONNX Runtime loaded — FAST MODE')
+        except Exception as e:
+            print(f'[BERT] ONNX load failed ({e}), fallback to PyTorch')
+            _USE_ONNX = False
+    if not _USE_ONNX:
+        from sentence_transformers import SentenceTransformer
+        _embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
+        print('[BERT] ⚠️  PyTorch mode (install ONNX for 3-5x speedup)')
+def _embed_onnx(texts: list[str]) -> np.ndarray:
+    """ONNX Runtime se embeddings generate karo — FAST."""
+    import torch
+    inputs = _ort_tokenizer(
+        texts,
+        padding=True,
+        truncation=True,
+        max_length=128,
+        return_tensors='np'  # NumPy directly (faster than PyTorch tensors)
+    )
+    # ONNX session run
+    ort_inputs = {
+        'input_ids':      inputs['input_ids'].astype(np.int64),
+        'attention_mask': inputs['attention_mask'].astype(np.int64),
+    }
+    if 'token_type_ids' in [i.name for i in _ort_session.get_inputs()]:
+        ort_inputs['token_type_ids'] = inputs.get(
+            'token_type_ids', np.zeros_like(inputs['input_ids'])
+        ).astype(np.int64)
+    outputs = _ort_session.run(None, ort_inputs)
+    hidden  = outputs[0]  # (batch, seq_len, hidden)
+    # Mean pooling (attention mask weighted)
+    mask    = inputs['attention_mask'][:, :, None].astype(np.float32)
+    summed  = (hidden * mask).sum(axis=1)
+    counts  = mask.sum(axis=1)
+    embeddings = summed / counts
+    # L2 normalize
+    norms  = np.linalg.norm(embeddings, axis=1, keepdims=True)
+    return  embeddings / (norms + 1e-8)
+def _embed_pytorch(texts: list[str]) -> np.ndarray:
+    """PyTorch fallback."""
+    return _embedding_model.encode(
+        texts,
+        batch_size=DEFAULT_BATCH,
+        convert_to_numpy=True,
+        normalize_embeddings=True,
+        show_progress_bar=False
+    )
+# ── PUBLIC API ──────────────────────────────────────────────
+def classify_with_bert(log_message: str) -> tuple[str, float]:
+    """
+    Single log classify karo.
+    Returns: (label, confidence)
+    """
+    _load_models()
+    results = classify_batch([log_message])
+    return results[0]
+def classify_batch(log_messages: list[str]) -> list[tuple[str, float]]:
+    """
+    Multiple logs ek saath classify karo — MUCH FASTER!
+    Returns: list of (label, confidence) tuples
+    Example:
+        results = classify_batch(['log1', 'log2', 'log3'])
+        for label, conf in results:
+            print(f'{label}: {conf:.1%}')
+    """
+    _load_models()
+    if not log_messages:
+        return []
+    results = []
+    # Process in batches
+    for i in range(0, len(log_messages), DEFAULT_BATCH):
+        batch = log_messages[i:i + DEFAULT_BATCH]
+        # Generate embeddings
+        if _USE_ONNX:
+            embeddings = _embed_onnx(batch)
+        else:
+            embeddings = _embed_pytorch(batch)
+        # Classify
+        probs   = _classifier.predict_proba(embeddings)
+        max_probs = probs.max(axis=1)
+        labels    = _classifier.predict(embeddings)
+        for label, conf in zip(labels, max_probs):
+            if conf < CONFIDENCE_THRESHOLD:
+                results.append(('Unclassified', float(conf)))
+            else:
+                results.append((str(label), float(conf)))
+    return results
+def get_classes() -> list[str]:
+    """Classifier ke classes return karo."""
+    _load_models()
+    return list(_classifier.classes_)
+def is_onnx_mode() -> bool:
+    """Check karo ONNX use ho raha hai ya nahi."""
+    _load_models()
+    return _USE_ONNX
+# ── TEST ────────────────────────────────────────────────────
+if __name__ == '__main__':
+    import time
+    test_logs = [
+        'GET /v2/servers/detail HTTP/1.1 status: 404 len: 1583 time: 0.19',
+        'System crashed due to driver errors when restarting the server',
+        'Multiple login failures occurred on user 6454 account',
+        'Admin access escalation detected for user 9429',
+        'CPU usage at 98% for the last 10 minutes on node-7',
+        'Backup completed successfully.',
+        'User User123 logged in.',
+        'Data replication task for shard 14 did not complete',
+        'Hey bro chill ya!',     # should be Unclassified
+    ]
+    print('Single log test:')
+    for log in test_logs:
+        label, conf = classify_with_bert(log)
+        print(f'  [{conf:.0%}] {label:25s} | {log[:60]}')
+    print(f'\nMode: {"ONNX 🚀" if is_onnx_mode() else "PyTorch"}')
+    # Speed test
+    big_batch = test_logs * 100
+    t0 = time.perf_counter()
+    classify_batch(big_batch)
+    elapsed = time.perf_counter() - t0
+    print(f'\nSpeed: {len(big_batch)/elapsed:.0f} logs/s  ({elapsed*1000/len(big_batch):.1f}ms/log)')

HF/processor_llm.py ADDED Viewed

	@@ -0,0 +1,192 @@

+"""
+processor_llm.py — Tier 3: LLM-based Classifier
+Used for:
+  - LegacyCRM logs (Workflow Error, Deprecation Warning)
+  - BERT fallback when confidence < threshold
+Production hardening in V3:
+  - Timeout (configurable, default 5s)
+  - Retry with exponential backoff (max 2 retries)
+  - Explicit failure modes: returns "Unclassified" on all error paths
+  - Caching for repeated log patterns (hash-based, in-memory)
+  - Token budget enforcement (max_tokens=15)
+"""
+from __future__ import annotations
+import os
+import re
+import time
+import hashlib
+import logging
+from functools import lru_cache
+from typing import Optional
+logger = logging.getLogger(__name__)
+# ── Config ─────────────────────────────────────────────────────────────────
+HF_TOKEN   = os.getenv("HF_TOKEN")
+LLM_MODEL  = "mistralai/Mistral-7B-Instruct-v0.3"
+VALID_CATEGORIES = ["Workflow Error", "Deprecation Warning"]
+# Retry / timeout config
+MAX_RETRIES     = 2
+RETRY_DELAY_SEC = 1.0   # doubles on each retry (exponential backoff)
+REQUEST_TIMEOUT = 5     # seconds — fail fast, do not hang pipeline
+# In-memory cache to avoid redundant LLM calls for repeated logs
+_RESPONSE_CACHE: dict[str, str] = {}
+MAX_CACHE_SIZE = 1000  # evict oldest when full (simple FIFO)
+SYSTEM_PROMPT = (
+    "You are an enterprise log classifier. "
+    "Classify log messages into exactly one category. "
+    "Return ONLY the category name — no explanation, no punctuation."
+)
+FEW_SHOT_EXAMPLES = [
+    {
+        "log":   "Case escalation for ticket ID 7324 failed because the assigned support agent is no longer active.",
+        "label": "Workflow Error",
+    },
+    {
+        "log":   "The 'BulkEmailSender' feature is no longer supported. Use 'EmailCampaignManager' instead.",
+        "label": "Deprecation Warning",
+    },
+    {
+        "log":   "Invoice generation aborted for order ID 8910 due to invalid tax calculation module.",
+        "label": "Workflow Error",
+    },
+]
+# ── Cache helpers ────────────────────────────────────────────────────────────
+def _cache_key(log_msg: str) -> str:
+    return hashlib.md5(log_msg.strip().encode()).hexdigest()
+def _cache_get(log_msg: str) -> Optional[str]:
+    return _RESPONSE_CACHE.get(_cache_key(log_msg))
+def _cache_set(log_msg: str, label: str) -> None:
+    key = _cache_key(log_msg)
+    if len(_RESPONSE_CACHE) >= MAX_CACHE_SIZE:
+        # Evict oldest (first inserted) key
+        oldest = next(iter(_RESPONSE_CACHE))
+        del _RESPONSE_CACHE[oldest]
+    _RESPONSE_CACHE[key] = label
+def get_cache_stats() -> dict:
+    return {"size": len(_RESPONSE_CACHE), "max_size": MAX_CACHE_SIZE}
+# ── Prompt builder ───────────────────────────────────────────────────────────
+def _build_messages(log_msg: str) -> list[dict]:
+    categories_str = ", ".join(f'"{c}"' for c in VALID_CATEGORIES)
+    user_content = (
+        f'Classify the following log into one of these categories: {categories_str}.\n'
+        'If none fits, return "Unclassified".\n\n'
+    )
+    for ex in FEW_SHOT_EXAMPLES:
+        user_content += f'Log: {ex["log"]}\nCategory: {ex["label"]}\n\n'
+    user_content += f"Log: {log_msg}\nCategory:"
+    return [
+        {"role": "system", "content": SYSTEM_PROMPT},
+        {"role": "user",   "content": user_content},
+    ]
+# ── Normalize raw LLM output ─────────────────────────────────────────────────
+def _normalize(raw: str) -> str:
+    """Map raw LLM output to a valid category or 'Unclassified'."""
+    raw = raw.strip().strip('"').strip("'")
+    for cat in VALID_CATEGORIES:
+        if cat.lower() in raw.lower():
+            return cat
+    return "Unclassified"
+# ── Main classify function ────────────────────────────────────────────────────
+def classify_with_llm(log_msg: str) -> str:
+    """
+    Tier 3 LLM classifier with:
+      - In-memory cache (avoids duplicate API calls)
+      - Timeout (REQUEST_TIMEOUT seconds)
+      - Retry with exponential backoff (MAX_RETRIES attempts)
+      - Explicit fallback to "Unclassified" on all error paths
+    Latency: 500–2000ms on cache miss; ~0ms on cache hit.
+    """
+    # ── Cache hit ────────────────────────────────────────────────────────────
+    cached = _cache_get(log_msg)
+    if cached is not None:
+        logger.debug(f"[LLM] Cache hit for: {log_msg[:60]}")
+        return cached
+    # ── Inference with retry ─────────────────────────────────────────────────
+    if not HF_TOKEN:
+        logger.warning("[LLM] HF_TOKEN not set — returning Unclassified")
+        return "Unclassified"
+    from huggingface_hub import InferenceClient
+    client  = InferenceClient(token=HF_TOKEN, timeout=REQUEST_TIMEOUT)
+    delay   = RETRY_DELAY_SEC
+    last_err: Optional[Exception] = None
+    for attempt in range(1, MAX_RETRIES + 2):  # +2: initial + MAX_RETRIES
+        try:
+            response = client.chat.completions.create(
+                model=LLM_MODEL,
+                messages=_build_messages(log_msg),
+                max_tokens=15,
+                temperature=0.1,
+            )
+            raw   = response.choices[0].message.content
+            label = _normalize(raw)
+            _cache_set(log_msg, label)
+            logger.debug(f"[LLM] Attempt {attempt}: '{raw.strip()}' → '{label}'")
+            return label
+        except Exception as e:
+            last_err = e
+            if attempt <= MAX_RETRIES:
+                logger.warning(f"[LLM] Attempt {attempt} failed ({e}), retrying in {delay:.1f}s…")
+                time.sleep(delay)
+                delay *= 2  # exponential backoff
+            else:
+                logger.error(f"[LLM] All {MAX_RETRIES + 1} attempts failed. Last error: {e}")
+    return "Unclassified"
+# ── Batch classify (serial — LLM is already rate-limited) ────────────────────
+def classify_batch_llm(log_msgs: list[str]) -> list[str]:
+    """Classify multiple logs through LLM. Each call is sequential to respect rate limits."""
+    return [classify_with_llm(msg) for msg in log_msgs]
+# ── CLI test ─────────────────────────────────────────────────────────────────
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    test_logs = [
+        "Case escalation for ticket ID 7324 failed because the assigned support agent is no longer active.",
+        "The 'ReportGenerator' module will be retired in version 4.0. Migrate to 'AdvancedAnalyticsSuite'.",
+        "System reboot initiated by user 12345.",   # should be Unclassified
+    ]
+    for log in test_logs:
+        result = classify_with_llm(log)
+        print(f"{result:25s} | {log[:80]}")
+    # Cache hit test
+    print("\n── Cache hit test ──")
+    t0 = time.perf_counter()
+    classify_with_llm(test_logs[0])
+    t1 = time.perf_counter()
+    print(f"Cache hit latency: {(t1-t0)*1000:.2f}ms")
+    print(f"Cache stats: {get_cache_stats()}")

HF/processor_regex.py ADDED Viewed

	@@ -0,0 +1,220 @@

+"""
+processor_regex.py — Tier 1: Rule-based Classifier
+Target coverage: 40%+ (up from 15%)
+Latency: sub-millisecond per log
+New pattern groups added:
+  - HTTP request/response logs   (was completely missing!)
+  - Auth / credential events     (login failures, MFA, lockouts)
+  - System/infra events          (disk, CPU, memory, cron)
+  - Network / firewall events    (IP block, port scan)
+  - Structured error codes       (ERROR, CRITICAL prefix logs)
+"""
+from __future__ import annotations
+import re
+import time
+from typing import Optional
+# ---------------------------------------------------------------------------
+# Pattern registry: (compiled_pattern, label)
+# Order matters — more specific patterns FIRST to avoid mis-labeling.
+# ---------------------------------------------------------------------------
+_RAW_PATTERNS: list[tuple[str, str]] = [
+    # ── HTTP Status ─────────────────────────────────────────────────────────
+    # Covers: GET/POST/PUT/DELETE/PATCH + status code in request line
+    (r"\b(GET|POST|PUT|DELETE|PATCH|HEAD|OPTIONS)\s+\S+\s+HTTP/\d", "HTTP Status"),
+    # Nova / OpenStack style
+    (r"nova\.\S+\s+(GET|POST|PUT|DELETE)\s+\S+\s+HTTP/\d", "HTTP Status"),
+    # Status code only style: "returned HTTP 200" or "status: 404"
+    (r"\bstatus[:\s]+\d{3}\b", "HTTP Status"),
+    (r"\breturned\s+HTTP\s+\d{3}\b", "HTTP Status"),
+    (r"\bHTTP\s+status\s+code\s*[:-]?\s*\d{3}\b", "HTTP Status"),
+    # API response style
+    (r"\bAPI\s+(call|request)\s+\S+\s+completed\s+with\s+status\s+\d{3}", "HTTP Status"),
+    (r"\bEndpoint\s+\S+\s+responded\s+with\s+code\s+\d{3}", "HTTP Status"),
+    # ── Security Alert ──────────────────────────────────────────────────────
+    # Brute force / login failures
+    (r"(multiple\s+)?(bad\s+|failed?\s+)?login\s+(failure|attempt|failures)", "Security Alert"),
+    (r"brute[\s_-]force\s+(login|attack|attempt)", "Security Alert"),
+    # Unauthorized access
+    (r"unauthorized\s+(access|admin|privilege|attempt)", "Security Alert"),
+    (r"access\s+denied\s+(for|to)\s+(user|ip|host)", "Security Alert"),
+    # Privilege escalation
+    (r"(admin\s+)?access\s+escalation\s+detected", "Security Alert"),
+    (r"privilege\s+(elev|escalat)", "Security Alert"),
+    # IP blocking / suspicious traffic
+    (r"IP\s+\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\s+blocked", "Security Alert"),
+    (r"(suspicious|anomalous)\s+(login|traffic|activity|request)", "Security Alert"),
+    (r"potential\s+(DDoS|attack|breach|intrusion)", "Security Alert"),
+    (r"security\s+breach\s+suspected", "Security Alert"),
+    (r"(API\s+security\s+breach|bypass\s+API\s+security)", "Security Alert"),
+    (r"port\s+scan\s+(detected|attempt)", "Security Alert"),
+    # ── User Action ─────────────────────────────────────────────────────────
+    (r"User\s+\w+\d*\s+logged\s+(in|out)", "User Action"),
+    (r"Account\s+(with\s+)?ID\s+\S+\s+created\s+by", "User Action"),
+    (r"User\s+\w+\d*\s+(updated\s+profile|changed\s+password|enabled\s+two|downloaded|exported)", "User Action"),
+    (r"(New\s+user|user\s+\w+\d*)\s+registered", "User Action"),
+    (r"Account\s+\S+\s+deleted\s+by\s+(administrator|admin)", "User Action"),
+    (r"User\s+\w+\d*\s+(tried|attempted)", "User Action"),
+    # ── System Notification ─────────────────────────────────────────────────
+    # Backup events
+    (r"Backup\s+(started|ended|completed\s+successfully|failed|aborted)", "System Notification"),
+    (r"System\s+updated\s+to\s+version", "System Notification"),
+    (r"File\s+\S+\s+uploaded\s+successfully\s+by\s+user", "System Notification"),
+    (r"Disk\s+cleanup\s+completed\s+successfully", "System Notification"),
+    (r"System\s+reboot\s+initiated\s+by\s+user", "System Notification"),
+    (r"Scheduled\s+maintenance\s+(started|completed)", "System Notification"),
+    (r"Service\s+\w+\s+restarted\s+successfully", "System Notification"),
+    # NEW: cache, cron, health check, cert, log rotation
+    (r"Cache\s+cleared\s+successfully", "System Notification"),
+    (r"Log\s+rotation\s+completed", "System Notification"),
+    (r"Health\s+check\s+(passed|failed)\s+for\s+service", "System Notification"),
+    (r"Certificate\s+(renewed|expired|revoked)\s+successfully", "System Notification"),
+    (r"Cron\s+job\s+\S+\s+(executed|failed|completed)\s+successfully", "System Notification"),
+    (r"(Disk|Storage)\s+(usage|space)\s+(at|reached|exceeded)\s+\d+%", "System Notification"),
+    (r"CPU\s+usage\s+at\s+\d+%", "System Notification"),
+    (r"Memory\s+(usage|limit)\s+(at|reached|exceeded)\s+\d+%", "System Notification"),
+    # Deployment / config
+    (r"Deployment\s+(of|for)\s+\S+\s+(completed|failed|started)", "System Notification"),
+    (r"Configuration\s+(reloaded|updated|applied)\s+successfully", "System Notification"),
+    # ── Error ───────────────────────────────────────────────────────────────
+    (r"\bERROR\b.*\b(exception|failed|failure|crash|timeout|unavailable)\b", "Error"),
+    (r"System\s+crashed\s+due\s+to", "Error"),
+    (r"(connection|request|task|job)\s+(timed?\s*out|timeout)", "Error"),
+    (r"service\s+\S+\s+(is\s+down|unavailable|unreachable)", "Error"),
+    (r"database\s+connection\s+(failed|refused|lost|dropped)", "Error"),
+    (r"disk\s+(I/O\s+)?failure", "Error"),
+    (r"driver\s+error(s)?\s+(when|during|on)", "Error"),
+    (r"(replication|sync)\s+task\s+(did\s+not\s+complete|failed)", "Error"),
+    (r"null\s+pointer|segmentation\s+fault|stack\s+overflow", "Error"),
+    # ── Critical Error ──────────────────────────────────────────────────────
+    (r"\bCRITICAL\b", "Critical Error"),
+    (r"(FATAL|PANIC)\b", "Critical Error"),
+    (r"(data\s+loss|data\s+corruption)\s+(detected|occurred)", "Critical Error"),
+    (r"(cluster|node|shard)\s+(failure|crashed|went\s+down)", "Critical Error"),
+    (r"(catastrophic|unrecoverable)\s+(failure|error)", "Critical Error"),
+    (r"kernel\s+panic", "Critical Error"),
+    (r"out[\s-]of[\s-](memory|disk)\s+(error|killed|OOM)", "Critical Error"),
+]
+# Pre-compile all patterns at import time (not per-call)
+REGEX_PATTERNS: list[tuple[re.Pattern, str]] = [
+    (re.compile(pat, re.IGNORECASE), label)
+    for pat, label in _RAW_PATTERNS
+]
+def classify_with_regex(log_message: str) -> Optional[str]:
+    """
+    Tier 1: Rule-based classifier.
+    Returns category label, or None if no pattern matches.
+    Latency: sub-millisecond (patterns pre-compiled at import).
+    """
+    for pattern, label in REGEX_PATTERNS:
+        if pattern.search(log_message):
+            return label
+    return None
+def get_regex_coverage(log_messages: list[str]) -> dict:
+    """Measure regex tier coverage and per-label breakdown."""
+    label_counts: dict[str, int] = {}
+    missed = 0
+    for msg in log_messages:
+        label = classify_with_regex(msg)
+        if label:
+            label_counts[label] = label_counts.get(label, 0) + 1
+        else:
+            missed += 1
+    total   = len(log_messages)
+    matched = total - missed
+    return {
+        "total":        total,
+        "matched":      matched,
+        "missed":       missed,
+        "coverage_pct": round(matched / total * 100, 2) if total else 0.0,
+        "label_breakdown": label_counts,
+    }
+def benchmark_regex(log_messages: list[str], runs: int = 3) -> dict:
+    """Measure regex tier latency (p50 / p95 / p99) over multiple runs."""
+    import statistics
+    per_log_ms: list[float] = []
+    for _ in range(runs):
+        for msg in log_messages:
+            t0 = time.perf_counter()
+            classify_with_regex(msg)
+            per_log_ms.append((time.perf_counter() - t0) * 1000)
+    per_log_ms.sort()
+    return {
+        "p50_ms":  round(statistics.median(per_log_ms), 4),
+        "p95_ms":  round(per_log_ms[int(len(per_log_ms) * 0.95)], 4),
+        "p99_ms":  round(per_log_ms[int(len(per_log_ms) * 0.99)], 4),
+        "mean_ms": round(statistics.mean(per_log_ms), 4),
+    }
+# ── CLI self-test ────────────────────────────────────────────────────────────
+if __name__ == "__main__":
+    test_cases: list[tuple[str, str]] = [
+        # HTTP
+        ("GET /api/v2/resource HTTP/1.1 status: 200 len: 1583 time: 0.19", "HTTP Status"),
+        ("POST /v1/users HTTP/1.1 status: 201 len: 42 time: 0.05", "HTTP Status"),
+        ("nova.osapi_compute.wsgi.server GET /v2/servers/detail HTTP/1.1 status: 404", "HTTP Status"),
+        # Security
+        ("Multiple login failures occurred on user 6454 account", "Security Alert"),
+        ("IP 192.168.133.114 blocked due to potential attack", "Security Alert"),
+        ("Brute force login attempt from 10.0.0.5 detected", "Security Alert"),
+        ("Admin access escalation detected for user 9429", "Security Alert"),
+        # User Action
+        ("User User12345 logged in.", "User Action"),
+        ("Account with ID 456 created by Admin.", "User Action"),
+        # System Notification
+        ("Backup completed successfully.", "System Notification"),
+        ("CPU usage at 98% for the last 10 minutes on node-7", "System Notification"),
+        ("Health check passed for service payments-api", "System Notification"),
+        # Error
+        ("System crashed due to disk I/O failure on node-3", "Error"),
+        ("Database connection failed after 3 retries", "Error"),
+        # Critical
+        ("CRITICAL: data corruption detected on shard-14", "Critical Error"),
+        ("kernel panic: not syncing: VFS: unable to mount root fs", "Critical Error"),
+        # Should be None (unmatched)
+        ("The 'BulkEmailSender' feature will be deprecated in v5.0.", None),
+        ("Case escalation for ticket 7324 failed.", None),
+    ]
+    correct = 0
+    print(f"{'Expected':<22} {'Got':<22} {'✓/✗'} | Log")
+    print("─" * 100)
+    for log, expected in test_cases:
+        got = classify_with_regex(log)
+        ok  = got == expected
+        correct += ok
+        icon = "✓" if ok else "✗"
+        print(f"{str(expected):<22} {str(got):<22} {icon}   | {log[:55]}")
+    print(f"\n{correct}/{len(test_cases)} correct")
+    # Coverage demo
+    all_logs = [log for log, _ in test_cases]
+    cov = get_regex_coverage(all_logs)
+    print(f"\nCoverage: {cov['coverage_pct']}%  ({cov['matched']}/{cov['total']} matched)")
+    print("Label breakdown:", cov["label_breakdown"])
+    # Latency benchmark
+    lat = benchmark_regex(all_logs * 100)
+    print(f"\nLatency (p50/p95/p99): {lat['p50_ms']}ms / {lat['p95_ms']}ms / {lat['p99_ms']}ms")

HF/requirements.txt ADDED Viewed

	@@ -0,0 +1,30 @@

+# Core
+gradio>=4.0.0
+pandas>=2.0.0
+numpy>=1.24.0
+joblib>=1.3.0
+scikit-learn>=1.3.0
+# Embedding + BERT
+sentence-transformers>=2.7.0
+transformers>=4.38.0
+# ONNX (optional, 3-5x speedup)
+onnxruntime>=1.17.0
+optimum[onnxruntime]>=1.16.0
+# LLM
+huggingface-hub>=0.21.0
+# FastAPI (production API)
+fastapi>=0.110.0
+uvicorn[standard]>=0.29.0
+pydantic>=2.0.0
+# Observability
+psutil>=5.9.0
+# Testing
+pytest>=8.0.0
+pytest-asyncio>=0.23.0
+httpx>=0.27.0   # for FastAPI test client