Spaces:

NOT-OMEGA
/

LogAI-Engine

Sleeping

App Files Files Community

NOT-OMEGA commited on Apr 14

Commit

595fb58

verified ·

1 Parent(s): b310e7b

Delete root

Browse files

Files changed (4) hide show

root/Dockerfile +0 -32
root/api.py +0 -314
root/classify.py +0 -198
root/processor_bert.py +0 -216

root/Dockerfile DELETED Viewed

@@ -1,32 +0,0 @@
-FROM python:3.11-slim
-# Build args
-ARG PORT=8000
-# System deps
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    curl \
-    && rm -rf /var/lib/apt/lists/*
-WORKDIR /app
-# Install Python deps first (layer cache)
-COPY requirements.txt .
-RUN pip install --no-cache-dir -r requirements.txt
-# Copy source
-COPY . .
-# Non-root user
-RUN adduser --disabled-password --gecos "" appuser \
-    && chown -R appuser:appuser /app
-USER appuser
-# Health check
-HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
-    CMD curl -f http://localhost:${PORT}/health || exit 1
-EXPOSE ${PORT}
-# Production: single worker (CPU-bound inference — scale via replicas, not threads)
-CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "1", "--log-level", "info"]

root/api.py DELETED Viewed

@@ -1,314 +0,0 @@
-"""
-api.py — Async FastAPI Inference Service
-Endpoints:
-  POST /classify          — Single log
-  POST /classify/batch    — Batch of logs (up to 512)
-  GET  /health            — Liveness check
-  GET  /ready             — Readiness check (model loaded?)
-  GET  /metrics           — Request counts, throughput, latency stats
-Features:
-  - Async request handling (non-blocking)
-  - Worker pool via asyncio semaphore (bounded concurrency)
-  - Structured JSON logs with request_id
-  - Rate limiting (configurable)
-  - Request ID tracing
-  - Batch queue aggregation for small requests
-Run:
-  uvicorn api:app --host 0.0.0.0 --port 8000 --workers 1
-Example:
-  curl -X POST http://localhost:8000/classify \
-       -H "Content-Type: application/json" \
-       -d '{"source": "ModernCRM", "log_message": "User User123 logged in."}'
-"""
-from __future__ import annotations
-import asyncio
-import logging
-import os
-import time
-import uuid
-import statistics
-from collections import deque
-from contextlib import asynccontextmanager
-from typing import Optional
-from fastapi import FastAPI, HTTPException, Request, status
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import JSONResponse
-from pydantic import BaseModel, Field, field_validator
-# ── Logging setup ─────────────────────────────────────────────────────────────
-logging.basicConfig(
-    level=logging.INFO,
-    format='{"time":"%(asctime)s","level":"%(levelname)s","logger":"%(name)s","msg":"%(message)s"}'
-)
-logger = logging.getLogger("log-classifier-api")
-# ── Config ─────────────────────────────────────────────────────────────────────
-MAX_BATCH_SIZE      = int(os.getenv("MAX_BATCH_SIZE", "512"))
-MAX_CONCURRENT      = int(os.getenv("MAX_CONCURRENT", "4"))   # concurrency cap
-RATE_LIMIT_PER_MIN  = int(os.getenv("RATE_LIMIT_PER_MIN", "1000"))
-LOG_MAX_CHARS       = 2048   # truncate huge logs before classify
-# ── Global state ───────────────────────────────────────────────────────────────
-_semaphore: asyncio.Semaphore = None   # type: ignore
-_model_ready: bool = False
-# Metrics ring buffer (last 1000 requests)
-_latencies_ms: deque = deque(maxlen=1000)
-_request_count = 0
-_error_count   = 0
-_start_time    = time.time()
-# Rate limiter (simple sliding window per process)
-_rate_window: deque = deque(maxlen=RATE_LIMIT_PER_MIN)
-# ── Lifespan: load models on startup ──────────────────────────────────────────
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    global _semaphore, _model_ready
-    logger.info("Starting up — loading models…")
-    _semaphore = asyncio.Semaphore(MAX_CONCURRENT)
-    # Load models in a thread pool (blocking I/O, don't block event loop)
-    loop = asyncio.get_event_loop()
-    try:
-        await loop.run_in_executor(None, _load_models_blocking)
-        _model_ready = True
-        logger.info("✅ Models loaded — API ready")
-    except Exception as e:
-        logger.error(f"❌ Model load failed: {e}")
-        # Service starts but /ready will return 503
-    yield
-    logger.info("Shutting down")
-def _load_models_blocking():
-    """Load BERT + classifier (blocks — run in executor)."""
-    from processor_bert import classify_batch as _
-    logger.info("BERT model loaded")
-# ── App factory ────────────────────────────────────────────────────────────────
-app = FastAPI(
-    title="Log Classification API",
-    description="3-tier hybrid pipeline: Regex → BERT → LLM",
-    version="3.0.0",
-    lifespan=lifespan,
-)
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-# ── Request / Response schemas ─────────────────────────────────────────────────
-class LogRequest(BaseModel):
-    source:      str = Field(..., example="ModernCRM")
-    log_message: str = Field(..., example="User User123 logged in.", min_length=1)
-    @field_validator("log_message")
-    @classmethod
-    def truncate_long_logs(cls, v: str) -> str:
-        return v[:LOG_MAX_CHARS]
-class LogResponse(BaseModel):
-    request_id:  str
-    label:       str
-    tier:        str
-    confidence:  Optional[float]
-    latency_ms:  float
-    cached:      bool = False
-class BatchRequest(BaseModel):
-    logs: list[LogRequest] = Field(..., max_length=MAX_BATCH_SIZE)
-class BatchResponse(BaseModel):
-    request_id:   str
-    total:        int
-    elapsed_ms:   float
-    throughput:   float
-    results:      list[LogResponse]
-class HealthResponse(BaseModel):
-    status:    str
-    uptime_s:  float
-class MetricsResponse(BaseModel):
-    total_requests:  int
-    total_errors:    int
-    uptime_s:        float
-    requests_per_min: float
-    latency_p50_ms:  Optional[float]
-    latency_p95_ms:  Optional[float]
-    latency_p99_ms:  Optional[float]
-# ── Rate limiter ───────────────────────────────────────────────────────────────
-def _check_rate_limit() -> None:
-    now = time.time()
-    _rate_window.append(now)
-    # Window = last 60 seconds
-    recent = [t for t in _rate_window if now - t < 60]
-    if len(recent) > RATE_LIMIT_PER_MIN:
-        raise HTTPException(
-            status_code=status.HTTP_429_TOO_MANY_REQUESTS,
-            detail=f"Rate limit exceeded: {RATE_LIMIT_PER_MIN} req/min",
-        )
-# ── Middleware: request logging ────────────────────────────────────────────────
-@app.middleware("http")
-async def log_requests(request: Request, call_next):
-    rid = request.headers.get("X-Request-ID", str(uuid.uuid4())[:8])
-    request.state.request_id = rid
-    t0 = time.perf_counter()
-    response = await call_next(request)
-    elapsed = (time.perf_counter() - t0) * 1000
-    logger.info(
-        f"method={request.method} path={request.url.path} "
-        f"status={response.status_code} latency={elapsed:.1f}ms rid={rid}"
-    )
-    response.headers["X-Request-ID"] = rid
-    return response
-# ── Health & readiness ─────────────────────────────────────────────────────────
-@app.get("/health", response_model=HealthResponse, tags=["ops"])
-async def health():
-    return {"status": "ok", "uptime_s": round(time.time() - _start_time, 1)}
-@app.get("/ready", tags=["ops"])
-async def ready():
-    if not _model_ready:
-        raise HTTPException(status_code=503, detail="Models not yet loaded")
-    return {"status": "ready"}
-# ── Metrics ────────────────────────────────────────────────────────────────────
-@app.get("/metrics", response_model=MetricsResponse, tags=["ops"])
-async def metrics():
-    uptime = time.time() - _start_time
-    lats   = sorted(_latencies_ms) if _latencies_ms else []
-    n      = len(lats)
-    def pct(p):
-        return round(lats[min(int(n * p), n - 1)], 2) if n else None
-    return {
-        "total_requests":  _request_count,
-        "total_errors":    _error_count,
-        "uptime_s":        round(uptime, 1),
-        "requests_per_min": round(_request_count / max(uptime / 60, 1), 1),
-        "latency_p50_ms":  pct(0.50),
-        "latency_p95_ms":  pct(0.95),
-        "latency_p99_ms":  pct(0.99),
-    }
-# ── Classify single ────────────────────────────────────────────────────────────
-@app.post("/classify", response_model=LogResponse, tags=["inference"])
-async def classify_single(req: LogRequest, request: Request):
-    global _request_count, _error_count
-    _check_rate_limit()
-    _request_count += 1
-    rid = getattr(request.state, "request_id", str(uuid.uuid4())[:8])
-    async with _semaphore:
-        loop = asyncio.get_event_loop()
-        t0   = time.perf_counter()
-        try:
-            result = await loop.run_in_executor(
-                None, _classify_blocking, req.source, req.log_message
-            )
-        except Exception as e:
-            _error_count += 1
-            logger.error(f"rid={rid} classify error: {e}")
-            raise HTTPException(status_code=500, detail=str(e))
-    latency = (time.perf_counter() - t0) * 1000
-    _latencies_ms.append(latency)
-    return LogResponse(
-        request_id = rid,
-        label      = result["label"],
-        tier       = result["tier"],
-        confidence = result.get("confidence"),
-        latency_ms = round(latency, 2),
-    )
-def _classify_blocking(source: str, log_message: str) -> dict:
-    from classify import classify_log
-    return classify_log(source, log_message)
-# ── Classify batch ─────────────────────────────────────────────────────────────
-@app.post("/classify/batch", response_model=BatchResponse, tags=["inference"])
-async def classify_batch_endpoint(req: BatchRequest, request: Request):
-    global _request_count, _error_count
-    _check_rate_limit()
-    _request_count += 1
-    rid = getattr(request.state, "request_id", str(uuid.uuid4())[:8])
-    log_pairs = [(r.source, r.log_message) for r in req.logs]
-    async with _semaphore:
-        loop = asyncio.get_event_loop()
-        t0   = time.perf_counter()
-        try:
-            results = await loop.run_in_executor(
-                None, _classify_batch_blocking, log_pairs
-            )
-        except Exception as e:
-            _error_count += 1
-            logger.error(f"rid={rid} batch error: {e}")
-            raise HTTPException(status_code=500, detail=str(e))
-    elapsed_ms = (time.perf_counter() - t0) * 1000
-    throughput = round(len(log_pairs) / (elapsed_ms / 1000), 1)
-    _latencies_ms.extend([elapsed_ms / len(log_pairs)] * len(log_pairs))
-    return BatchResponse(
-        request_id = rid,
-        total      = len(log_pairs),
-        elapsed_ms = round(elapsed_ms, 2),
-        throughput = throughput,
-        results    = [
-            LogResponse(
-                request_id = rid,
-                label      = r["label"],
-                tier       = r["tier"],
-                confidence = r.get("confidence"),
-                latency_ms = round(elapsed_ms / len(log_pairs), 2),
-            )
-            for r in results
-        ],
-    )
-def _classify_batch_blocking(log_pairs: list[tuple[str, str]]) -> list[dict]:
-    from classify import classify_logs
-    return classify_logs(log_pairs)
-# ── Dev runner ──────────────────────────────────────────────────────────────────
-if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run("api:app", host="0.0.0.0", port=8000, reload=False, workers=1)

root/classify.py DELETED Viewed

@@ -1,198 +0,0 @@
-"""
-classify.py — 3-Tier Hybrid Pipeline (V3 — Latency-Tracked)
-Architecture:
-  LegacyCRM → LLM directly
-  Others    → Regex → BERT (batch) → LLM fallback
-Changes in V3:
-  - Tier-wise latency tracking (regex_ms, bert_ms, llm_ms)
-  - Pipeline summary with p50/p95 per tier
-  - Defensive: LLM timeout + retry baked in via processor_llm
-  - classify_logs returns richer result dict
-"""
-from __future__ import annotations
-import time
-import statistics
-import pandas as pd
-from processor_regex import classify_with_regex
-from processor_bert  import classify_batch as bert_batch
-from processor_llm   import classify_with_llm
-LEGACY_SOURCE = "LegacyCRM"
-# ── Result type ─────────────────────────────────────────────────────────────
-def _make_result(label: str, tier: str, confidence, latency_ms: float) -> dict:
-    return {
-        "label":      label,
-        "tier":       tier,
-        "confidence": confidence,
-        "latency_ms": round(latency_ms, 3),
-    }
-# ── Single log (backward-compatible) ────────────────────────────────────────
-def classify_log(source: str, log_msg: str) -> dict:
-    """Single log classify karo. Returns label, tier, confidence, latency_ms."""
-    results = classify_logs([(source, log_msg)])
-    return results[0]
-# ── Batch pipeline (main entry point) ───────────────────────────────────────
-def classify_logs(logs: list[tuple[str, str]]) -> list[dict]:
-    """
-    Batch classify with 3-tier routing + per-result latency.
-    Returns list of dicts:
-      { label, tier, confidence, latency_ms }
-    Tier routing:
-      LegacyCRM source → LLM directly
-      Regex match      → done (sub-ms)
-      Remainder        → BERT batch → LLM if low confidence
-    """
-    n       = len(logs)
-    results = [None] * n
-    # ── Step 1: Route to groups ─────────────────────────────────────────────
-    llm_indices   = []
-    bert_indices  = []
-    entry_times   = [time.perf_counter()] * n  # approximate per-log start
-    t_route_start = time.perf_counter()
-    for i, (source, log_msg) in enumerate(logs):
-        entry_times[i] = time.perf_counter()
-        if source == LEGACY_SOURCE:
-            llm_indices.append(i)
-        else:
-            t0    = time.perf_counter()
-            label = classify_with_regex(log_msg)
-            t1    = time.perf_counter()
-            if label:
-                results[i] = _make_result(label, "Regex", 1.0, (t1 - t0) * 1000)
-            else:
-                bert_indices.append(i)
-    # ── Step 2: BERT batch ──────────────────────────────────────────────────
-    if bert_indices:
-        bert_msgs = [logs[i][1] for i in bert_indices]
-        t_bert_start = time.perf_counter()
-        bert_results = bert_batch(bert_msgs)
-        t_bert_end   = time.perf_counter()
-        bert_ms_per_log = (t_bert_end - t_bert_start) * 1000 / len(bert_msgs)
-        for idx, (label, conf) in zip(bert_indices, bert_results):
-            if label != "Unclassified":
-                results[idx] = _make_result(label, "BERT", conf, bert_ms_per_log)
-            else:
-                llm_indices.append(idx)
-    # ── Step 3: LLM (LegacyCRM + BERT fallback) ────────────────────────────
-    for i in llm_indices:
-        _, log_msg = logs[i]
-        t0    = time.perf_counter()
-        label = classify_with_llm(log_msg)
-        t1    = time.perf_counter()
-        tier  = "LLM" if logs[i][0] == LEGACY_SOURCE else "LLM (fallback)"
-        results[i] = _make_result(label, tier, None, (t1 - t0) * 1000)
-    return results
-# ── Pipeline summary ─────────────────────────────────────────────────────────
-def pipeline_summary(results: list[dict]) -> dict:
-    """
-    Aggregate stats from classify_logs output.
-    Useful for dashboard and benchmark reporting.
-    """
-    tier_groups: dict[str, list[float]] = {}
-    label_counts: dict[str, int] = {}
-    for r in results:
-        tier = r["tier"]
-        tier_groups.setdefault(tier, []).append(r["latency_ms"])
-        label_counts[r["label"]] = label_counts.get(r["label"], 0) + 1
-    total = len(results)
-    tier_stats = {}
-    for tier, latencies in tier_groups.items():
-        latencies_sorted = sorted(latencies)
-        n = len(latencies_sorted)
-        tier_stats[tier] = {
-            "count":    n,
-            "pct":      round(n / total * 100, 1),
-            "p50_ms":   round(statistics.median(latencies_sorted), 2),
-            "p95_ms":   round(latencies_sorted[min(int(n * 0.95), n - 1)], 2),
-            "p99_ms":   round(latencies_sorted[min(int(n * 0.99), n - 1)], 2),
-            "mean_ms":  round(statistics.mean(latencies_sorted), 2),
-        }
-    return {
-        "total":        total,
-        "tier_stats":   tier_stats,
-        "label_counts": label_counts,
-    }
-# ── CSV batch classify ───────────────────────────────────────────────────────
-def classify_csv(input_path: str, output_path: str = "output.csv") -> tuple[str, pd.DataFrame]:
-    """
-    CSV file classify karo.
-    Required columns: 'source', 'log_message'
-    Output: adds 'predicted_label', 'tier_used', 'confidence', 'latency_ms'
-    """
-    df = pd.read_csv(input_path)
-    required = {"source", "log_message"}
-    if not required.issubset(df.columns):
-        raise ValueError(f"CSV mein ye columns chahiye: {required}. Mila: {set(df.columns)}")
-    log_pairs = list(zip(df["source"], df["log_message"]))
-    results   = classify_logs(log_pairs)
-    df["predicted_label"] = [r["label"]      for r in results]
-    df["tier_used"]        = [r["tier"]       for r in results]
-    df["latency_ms"]       = [r["latency_ms"] for r in results]
-    df["confidence"]       = [
-        f"{r['confidence']:.1%}" if r["confidence"] is not None else "N/A"
-        for r in results
-    ]
-    df.to_csv(output_path, index=False)
-    return output_path, df
-# Aliases
-classify = classify_logs
-# ── Self-test ────────────────────────────────────────────────────────────────
-if __name__ == "__main__":
-    sample = [
-        ("ModernCRM",       "IP 192.168.133.114 blocked due to potential attack"),
-        ("BillingSystem",   "User User12345 logged in."),
-        ("AnalyticsEngine", "File data_6957.csv uploaded successfully by user User265."),
-        ("ModernHR",        "GET /v2/servers/detail HTTP/1.1 status: 200 len: 1583 time: 0.19"),
-        ("ModernHR",        "Admin access escalation detected for user 9429"),
-        ("LegacyCRM",       "Case escalation for ticket ID 7324 failed because the assigned support agent is no longer active."),
-        ("LegacyCRM",       "The 'ReportGenerator' module will be retired in version 4.0."),
-    ]
-    print(f'{"Source":<20} {"Tier":<18} {"Conf":>6} {"Lat(ms)":>8}  {"Label":<25} Log')
-    print("─" * 115)
-    results = classify_logs(sample)
-    for (source, log), r in zip(sample, results):
-        conf = f"{r['confidence']:.0%}" if r["confidence"] else "  N/A"
-        print(f'{source:<20} {r["tier"]:<18} {conf:>6} {r["latency_ms"]:>8.1f}  {r["label"]:<25} {log[:40]}')
-    summary = pipeline_summary(results)
-    print("\n📊 Pipeline Summary:")
-    for tier, stats in summary["tier_stats"].items():
-        print(f"  {tier}: {stats['count']} logs ({stats['pct']}%) | "
-              f"p50={stats['p50_ms']}ms p95={stats['p95_ms']}ms p99={stats['p99_ms']}ms")
-    print("\n🏷️  Label distribution:")
-    for label, count in sorted(summary["label_counts"].items(), key=lambda x: -x[1]):
-        print(f"  • {label}: {count}")

root/processor_bert.py DELETED Viewed

@@ -1,216 +0,0 @@
-"""
-processor_bert_fast.py — ONNX Runtime powered BERT classifier
-Speed: 82 logs/s → 2000+ logs/s
-Kaise kaam karta hai:
-1. ONNX Runtime: Normal PyTorch se 3-5x faster
-2. Batch processing: 64 logs ek saath process
-3. Pre-allocated buffers: Memory waste nahi
-"""
-from __future__ import annotations
-import os
-import numpy as np
-import joblib
-# ── Check karo kaunsa method use karna hai ──────────────────
-_USE_ONNX = False
-_embedding_model = None
-_classifier       = None
-_ort_session      = None
-_ort_tokenizer    = None
-MODEL_PATH    = os.path.join(os.path.dirname(__file__), 'models', 'log_classifier.joblib')
-ONNX_DIR      = os.path.join(os.path.dirname(__file__), 'models', 'onnx')
-CONFIDENCE_THRESHOLD = 0.30
-DEFAULT_BATCH = 64
-def _load_models():
-    """Lazily load models — pehli call pe hi load hoga, baar baar nahi."""
-    global _USE_ONNX, _embedding_model, _classifier, _ort_session, _ort_tokenizer
-    if _classifier is not None:
-        return  # Already loaded
-    # ── Classifier load karo ───────────────────────────────
-    if not os.path.exists(MODEL_PATH):
-        raise FileNotFoundError(
-            f'Model nahi mila: {MODEL_PATH}\n'
-            'Pehle Colab notebook run karo aur model download karo.'
-        )
-    _classifier = joblib.load(MODEL_PATH)
-    # ── ONNX try karo (fast), fallback to PyTorch ──────────
-    onnx_model_file = os.path.join(ONNX_DIR, 'model.onnx')
-    if os.path.exists(onnx_model_file):
-        try:
-            import onnxruntime as ort
-            from transformers import AutoTokenizer
-            # CPU optimized session options
-            sess_opts = ort.SessionOptions()
-            sess_opts.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
-            sess_opts.intra_op_num_threads = os.cpu_count()
-            sess_opts.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
-            _ort_session = ort.InferenceSession(
-                onnx_model_file,
-                sess_options=sess_opts,
-                providers=['CPUExecutionProvider']
-            )
-            _ort_tokenizer = AutoTokenizer.from_pretrained(ONNX_DIR)
-            _USE_ONNX = True
-            print('[BERT] ✅ ONNX Runtime loaded — FAST MODE')
-        except Exception as e:
-            print(f'[BERT] ONNX load failed ({e}), fallback to PyTorch')
-            _USE_ONNX = False
-    if not _USE_ONNX:
-        from sentence_transformers import SentenceTransformer
-        _embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
-        print('[BERT] ⚠️  PyTorch mode (install ONNX for 3-5x speedup)')
-def _embed_onnx(texts: list[str]) -> np.ndarray:
-    """ONNX Runtime se embeddings generate karo — FAST."""
-    import torch
-    inputs = _ort_tokenizer(
-        texts,
-        padding=True,
-        truncation=True,
-        max_length=128,
-        return_tensors='np'  # NumPy directly (faster than PyTorch tensors)
-    )
-    # ONNX session run
-    ort_inputs = {
-        'input_ids':      inputs['input_ids'].astype(np.int64),
-        'attention_mask': inputs['attention_mask'].astype(np.int64),
-    }
-    if 'token_type_ids' in [i.name for i in _ort_session.get_inputs()]:
-        ort_inputs['token_type_ids'] = inputs.get(
-            'token_type_ids', np.zeros_like(inputs['input_ids'])
-        ).astype(np.int64)
-    outputs = _ort_session.run(None, ort_inputs)
-    hidden  = outputs[0]  # (batch, seq_len, hidden)
-    # Mean pooling (attention mask weighted)
-    mask    = inputs['attention_mask'][:, :, None].astype(np.float32)
-    summed  = (hidden * mask).sum(axis=1)
-    counts  = mask.sum(axis=1)
-    embeddings = summed / counts
-    # L2 normalize
-    norms  = np.linalg.norm(embeddings, axis=1, keepdims=True)
-    return  embeddings / (norms + 1e-8)
-def _embed_pytorch(texts: list[str]) -> np.ndarray:
-    """PyTorch fallback."""
-    return _embedding_model.encode(
-        texts,
-        batch_size=DEFAULT_BATCH,
-        convert_to_numpy=True,
-        normalize_embeddings=True,
-        show_progress_bar=False
-    )
-# ── PUBLIC API ──────────────────────────────────────────────
-def classify_with_bert(log_message: str) -> tuple[str, float]:
-    """
-    Single log classify karo.
-    Returns: (label, confidence)
-    """
-    _load_models()
-    results = classify_batch([log_message])
-    return results[0]
-def classify_batch(log_messages: list[str]) -> list[tuple[str, float]]:
-    """
-    Multiple logs ek saath classify karo — MUCH FASTER!
-    Returns: list of (label, confidence) tuples
-    Example:
-        results = classify_batch(['log1', 'log2', 'log3'])
-        for label, conf in results:
-            print(f'{label}: {conf:.1%}')
-    """
-    _load_models()
-    if not log_messages:
-        return []
-    results = []
-    # Process in batches
-    for i in range(0, len(log_messages), DEFAULT_BATCH):
-        batch = log_messages[i:i + DEFAULT_BATCH]
-        # Generate embeddings
-        if _USE_ONNX:
-            embeddings = _embed_onnx(batch)
-        else:
-            embeddings = _embed_pytorch(batch)
-        # Classify
-        probs   = _classifier.predict_proba(embeddings)
-        max_probs = probs.max(axis=1)
-        labels    = _classifier.predict(embeddings)
-        for label, conf in zip(labels, max_probs):
-            if conf < CONFIDENCE_THRESHOLD:
-                results.append(('Unclassified', float(conf)))
-            else:
-                results.append((str(label), float(conf)))
-    return results
-def get_classes() -> list[str]:
-    """Classifier ke classes return karo."""
-    _load_models()
-    return list(_classifier.classes_)
-def is_onnx_mode() -> bool:
-    """Check karo ONNX use ho raha hai ya nahi."""
-    _load_models()
-    return _USE_ONNX
-# ── TEST ────────────────────────────────────────────────────
-if __name__ == '__main__':
-    import time
-    test_logs = [
-        'GET /v2/servers/detail HTTP/1.1 status: 404 len: 1583 time: 0.19',
-        'System crashed due to driver errors when restarting the server',
-        'Multiple login failures occurred on user 6454 account',
-        'Admin access escalation detected for user 9429',
-        'CPU usage at 98% for the last 10 minutes on node-7',
-        'Backup completed successfully.',
-        'User User123 logged in.',
-        'Data replication task for shard 14 did not complete',
-        'Hey bro chill ya!',     # should be Unclassified
-    ]
-    print('Single log test:')
-    for log in test_logs:
-        label, conf = classify_with_bert(log)
-        print(f'  [{conf:.0%}] {label:25s} | {log[:60]}')
-    print(f'\nMode: {"ONNX 🚀" if is_onnx_mode() else "PyTorch"}')
-    # Speed test
-    big_batch = test_logs * 100
-    t0 = time.perf_counter()
-    classify_batch(big_batch)
-    elapsed = time.perf_counter() - t0
-    print(f'\nSpeed: {len(big_batch)/elapsed:.0f} logs/s  ({elapsed*1000/len(big_batch):.1f}ms/log)')