Spaces:

NOT-OMEGA
/

LogAI-Engine

Sleeping

File size: 8,641 Bytes

2222383
 
72b0893
9ca9aea
 
 
 
 
2222383
 
 
 
3812273
2222383
4ad7bb3
2222383
 
22aa505
 
72b0893
22aa505
2222383
 
 
 
 
 
8401d25
9ca9aea
 
 
 
 
8401d25
2222383
 
 
 
 
 
72b0893
2222383
 
3812273
2222383
72b0893
 
2222383
 
 
 
 
72b0893
 
 
 
 
 
318fd33
 
 
 
72b0893
318fd33
72b0893
318fd33
 
 
 
72b0893
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2222383
 
 
72b0893
4ad7bb3
2222383
22aa505
318fd33
 
22aa505
72b0893
9ca9aea
318fd33
4ad7bb3
 
 
8401d25
318fd33
9ca9aea
 
 
 
318fd33
4ad7bb3
318fd33
2222383
9ca9aea
4ad7bb3
9ca9aea
 
 
72b0893
4ad7bb3
9ca9aea
2222383
9ca9aea
3812273
 
9ca9aea
3812273
72b0893
9ca9aea
72b0893
9ca9aea
 
72b0893
9ca9aea
 
8401d25
 
 
9ca9aea
 
 
 
8401d25
9ca9aea
 
 
 
 
 
 
 
 
 
8401d25
 
9ca9aea
 
 
8401d25
 
 
 
9ca9aea
 
 
 
 
8401d25
9ca9aea
 
4ad7bb3
8401d25
 
 
ea255c9
9ca9aea
72b0893
4ad7bb3
72b0893
4ad7bb3
72b0893
 
9ca9aea
72b0893
facc0f2
 
318fd33
facc0f2
4ad7bb3
facc0f2
 
318fd33
 
 
2222383
72b0893
2222383
 
318fd33
2222383
72b0893
9ca9aea
72b0893
9ca9aea
2222383
9ca9aea
72b0893
318fd33
9ca9aea
4ad7bb3
318fd33
 
 
72b0893
2222383
 
9ca9aea
72b0893
2222383
9ca9aea
72b0893
9ca9aea
318fd33
4ad7bb3
 
 
 
318fd33
 
9ca9aea

"""
Log Classification System — HuggingFace Spaces
Ultra-Modern 3D UI | Optimized for Gradio 6.0 & HF Free Tier

Bug fixes vs previous version:
  - BERT latency display: no longer shows cumulative sum (was showing 2,962,635 ms).
    Now shows real per-log wall-clock latency from classify.py fix.
  - Added bert_wall_ms tracking in stats display so batch total is visible clearly.
"""
from __future__ import annotations
import io
import time
import uuid
import pandas as pd
import numpy as np
import gradio as gr
from classify import classify_log, classify_csv
from processor_bert import preload_models

# ── Preload models (Start loading BERT into RAM immediately) ──
preload_models()

SOURCES = [
    "ModernCRM", "ModernHR", "BillingSystem",
    "AnalyticsEngine", "ThirdPartyAPI", "LegacyCRM",
]

def get_tier_icon(tier_name: str) -> str:
    if "Regex" in tier_name:      return "🟢"
    if "BERT" in tier_name:       return "🔵"
    if "Cache Hit" in tier_name:  return "⚡"
    if "fallback" in tier_name:   return "🟠"
    if "LLM" in tier_name:        return "🟡"
    return "⚪"

EXAMPLE_LOGS = [
    ["ModernCRM",       "User User12345 logged in."],
    ["ModernHR",        "Multiple login failures occurred on user 6454 account"],
    ["BillingSystem",   "GET /v2/servers/detail HTTP/1.1 status: 200 len: 1583 time: 0.19"],
    ["AnalyticsEngine", "System crashed due to disk I/O failure on node-3"],
    ["LegacyCRM",       "The 'BulkEmailSender' feature will be deprecated in v5.0."],
]

# ── Custom CSS ────────────────────────
CUSTOM_CSS = """
@import url('https://fonts.googleapis.com/css2?family=Rajdhani:wght@600;700&family=Share+Tech+Mono&family=Exo+2:wght@400;600&display=swap');

:root {
    --bg-primary: #050810;
    --accent-cyan: #00d4ff;
    --text-primary: #e2e8f0;
}

body, .gradio-container { 
    background: var(--bg-primary) !important; 
    font-family: 'Exo 2', sans-serif !important; 
}

.gradio-group { 
    background: #0d1425 !important; 
    border: 1px solid rgba(0, 212, 255, 0.1) !important; 
    border-radius: 20px !important; 
    box-shadow: 0 10px 30px rgba(0,0,0,0.5) !important;
}

button.primary {
    background: linear-gradient(135deg, #0066ff, #00d4ff) !important;
    border: none !important;
    color: white !important;
    font-weight: 700 !important;
    letter-spacing: 1.5px !important;
    box-shadow: 0 4px 15px rgba(0, 102, 255, 0.4) !important;
    transition: all 0.2s ease !important;
}

button.primary:hover {
    transform: translateY(-2px) !important;
    box-shadow: 0 8px 25px rgba(0, 212, 255, 0.5) !important;
}

.output-stats textarea {
    font-family: 'Share Tech Mono', monospace !important;
    background: #050810 !important;
    color: #00ff88 !important;
}
"""

# ── Functions ────────────────────────────────────────────────

def classify_single(source: str, log_message: str):
    from processor_bert import _model_ready
    if not log_message.strip():
        return "—", "—", "—", "—"
    if not _model_ready:
        return "⏳ Loading...", "Warming up", "—", "—"

    t0 = time.perf_counter()
    try:
        result = classify_log(source, log_message)
        latency = (time.perf_counter() - t0) * 1000
        icon = get_tier_icon(result["tier"])
        return (
            result["label"],
            f"{icon} {result['tier']}",
            f"{result['confidence']:.1%}" if result["confidence"] else "N/A",
            f"{latency:.4f} ms"
        )
    except Exception as e:
        return f"Error: {str(e)}", "Fail", "—", "—"


def classify_batch(file, progress=gr.Progress(track_tqdm=True)):
    if file is None:
        return None, "⚠️ Please upload a CSV file."

    progress(0, desc="🚀 Initializing Engine...")
    t0 = time.perf_counter()

    try:
        # Generate a unique output path per user to prevent data bleeding
        unique_id = uuid.uuid4().hex
        safe_output_path = f"/tmp/classified_output_{unique_id}.csv"

        output_path, df = classify_csv(file.name, safe_output_path)
        total_time_sec = time.perf_counter() - t0

        progress(0.9, desc="📊 Calculating Metrics...")

        total        = len(df)
        label_counts = df["predicted_label"].value_counts().to_dict()
        tier_counts  = df["tier_used"].value_counts().to_dict()

        tier_lines = []
        for tier, count in tier_counts.items():
            tier_df = df[df["tier_used"] == tier]
            lats    = tier_df["latency_ms"].dropna()
            icon    = get_tier_icon(tier)
            pct     = count / total

            if "BERT" in tier:
                # BUG FIX: latency_ms now holds true per-log wall-clock time.
                # Show per-log p50 AND reconstructed batch total for clarity.
                p50       = np.percentile(lats, 50) if not lats.empty else 0
                # Each stored value is already per-log wall time (total_wall/n),
                # so multiplying by count reconstructs actual batch wall time.
                batch_ms  = p50 * count
                tier_lines.append(
                    f"  {icon} {tier}: p50={p50:.2f} ms/log | "
                    f"Batch total ~{batch_ms/1000:.1f} s (Over {count} logs)"
                )
            elif "Regex" in tier:
                p50 = np.percentile(lats, 50) if not lats.empty else 0
                tier_lines.append(
                    f"  {icon} {tier}: < 0.1 ms (p50: {p50:.4f} ms) | {count} logs ({pct:.0%})"
                )
            else:
                p50 = np.percentile(lats, 50) if not lats.empty else 0
                p95 = np.percentile(lats, 95) if not lats.empty else 0
                p99 = np.percentile(lats, 99) if not lats.empty else 0
                tier_lines.append(
                    f"  {icon} {tier}: {count} logs ({pct:.0%}) | "
                    f"p50={p50:.1f}ms p95={p95:.1f}ms p99={p99:.1f}ms"
                )

        tier_lines_str = "\n".join(tier_lines)
        label_lines    = "\n".join([f"  • {k}: {v}" for k, v in label_counts.items()])

        stats = (
            f"✅ Classified {total} logs in {total_time_sec:.2f} s\n\n"
            f"📊 Performance by Tier:\n{tier_lines_str}\n\n"
            f"🏷️ Label distribution:\n{label_lines}"
        )

        progress(1.0, desc="✅ Success")
        return output_path, stats

    except Exception as e:
        return None, f"❌ System Error: {str(e)}"


# ── Theme & Layout ──────────────────────────────────────────
THEME = gr.themes.Base(
    primary_hue="blue",
    secondary_hue="cyan",
    neutral_hue="slate",
    font=[gr.themes.GoogleFont("Exo 2")],
)

with gr.Blocks(title="Log AI Engine") as demo:
    gr.HTML("<div style='text-align: center; padding: 20px;'><h1>🔍 LOG CLASSIFICATION SYSTEM</h1></div>")

    with gr.Tabs():
        with gr.Tab("⚡ REAL-TIME ANALYZER"):
            with gr.Row():
                with gr.Column(scale=1):
                    src_in = gr.Dropdown(choices=SOURCES, value="ModernCRM", label="SOURCE")
                with gr.Column(scale=3):
                    msg_in = gr.Textbox(label="LOG MESSAGE", placeholder="Paste raw log string...", lines=3)

            run_btn = gr.Button("▶ CLASSIFY LOG", variant="primary")

            with gr.Row():
                lbl_out  = gr.Textbox(label="PREDICTED LABEL")
                tier_out = gr.Textbox(label="TIER USED")
                conf_out = gr.Textbox(label="CONFIDENCE")
                lat_out  = gr.Textbox(label="LATENCY")

            run_btn.click(classify_single, [src_in, msg_in], [lbl_out, tier_out, conf_out, lat_out])
            gr.Examples(examples=EXAMPLE_LOGS, inputs=[src_in, msg_in])

        with gr.Tab("📦 BATCH PROCESSING"):
            with gr.Row():
                with gr.Column():
                    csv_in    = gr.File(label="UPLOAD CSV", file_types=[".csv"])
                    batch_btn = gr.Button("▶ START BATCH PROCESS", variant="primary")
                with gr.Column():
                    csv_out   = gr.File(label="DOWNLOAD CLASSIFIED DATA")
                    stats_out = gr.Textbox(label="PIPELINE ANALYTICS", lines=16, elem_classes="output-stats")

            batch_btn.click(classify_batch, inputs=[csv_in], outputs=[csv_out, stats_out])

demo.queue(default_concurrency_limit=2).launch(
    server_name="0.0.0.0",
    server_port=7860,
    theme=THEME,
    css=CUSTOM_CSS
)