File size: 8,641 Bytes
2222383
 
72b0893
9ca9aea
 
 
 
 
2222383
 
 
 
3812273
2222383
4ad7bb3
2222383
 
22aa505
 
72b0893
22aa505
2222383
 
 
 
 
 
8401d25
9ca9aea
 
 
 
 
8401d25
2222383
 
 
 
 
 
72b0893
2222383
 
3812273
2222383
72b0893
 
2222383
 
 
 
 
72b0893
 
 
 
 
 
318fd33
 
 
 
72b0893
318fd33
72b0893
318fd33
 
 
 
72b0893
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2222383
 
 
72b0893
4ad7bb3
2222383
22aa505
318fd33
 
22aa505
72b0893
9ca9aea
318fd33
4ad7bb3
 
 
8401d25
318fd33
9ca9aea
 
 
 
318fd33
4ad7bb3
318fd33
2222383
9ca9aea
4ad7bb3
9ca9aea
 
 
72b0893
4ad7bb3
9ca9aea
2222383
9ca9aea
3812273
 
9ca9aea
3812273
72b0893
9ca9aea
72b0893
9ca9aea
 
72b0893
9ca9aea
 
8401d25
 
 
9ca9aea
 
 
 
8401d25
9ca9aea
 
 
 
 
 
 
 
 
 
8401d25
 
9ca9aea
 
 
8401d25
 
 
 
9ca9aea
 
 
 
 
8401d25
9ca9aea
 
4ad7bb3
8401d25
 
 
ea255c9
9ca9aea
72b0893
4ad7bb3
72b0893
4ad7bb3
72b0893
 
9ca9aea
72b0893
facc0f2
 
318fd33
facc0f2
4ad7bb3
facc0f2
 
318fd33
 
 
2222383
72b0893
2222383
 
318fd33
2222383
72b0893
9ca9aea
72b0893
9ca9aea
2222383
9ca9aea
72b0893
318fd33
9ca9aea
4ad7bb3
318fd33
 
 
72b0893
2222383
 
9ca9aea
72b0893
2222383
9ca9aea
72b0893
9ca9aea
318fd33
4ad7bb3
 
 
 
318fd33
 
9ca9aea
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
"""
Log Classification System β€” HuggingFace Spaces
Ultra-Modern 3D UI | Optimized for Gradio 6.0 & HF Free Tier

Bug fixes vs previous version:
  - BERT latency display: no longer shows cumulative sum (was showing 2,962,635 ms).
    Now shows real per-log wall-clock latency from classify.py fix.
  - Added bert_wall_ms tracking in stats display so batch total is visible clearly.
"""
from __future__ import annotations
import io
import time
import uuid
import pandas as pd
import numpy as np
import gradio as gr
from classify import classify_log, classify_csv
from processor_bert import preload_models

# ── Preload models (Start loading BERT into RAM immediately) ──
preload_models()

SOURCES = [
    "ModernCRM", "ModernHR", "BillingSystem",
    "AnalyticsEngine", "ThirdPartyAPI", "LegacyCRM",
]

def get_tier_icon(tier_name: str) -> str:
    if "Regex" in tier_name:      return "🟒"
    if "BERT" in tier_name:       return "πŸ”΅"
    if "Cache Hit" in tier_name:  return "⚑"
    if "fallback" in tier_name:   return "🟠"
    if "LLM" in tier_name:        return "🟑"
    return "βšͺ"

EXAMPLE_LOGS = [
    ["ModernCRM",       "User User12345 logged in."],
    ["ModernHR",        "Multiple login failures occurred on user 6454 account"],
    ["BillingSystem",   "GET /v2/servers/detail HTTP/1.1 status: 200 len: 1583 time: 0.19"],
    ["AnalyticsEngine", "System crashed due to disk I/O failure on node-3"],
    ["LegacyCRM",       "The 'BulkEmailSender' feature will be deprecated in v5.0."],
]

# ── Custom CSS ────────────────────────
CUSTOM_CSS = """
@import url('https://fonts.googleapis.com/css2?family=Rajdhani:wght@600;700&family=Share+Tech+Mono&family=Exo+2:wght@400;600&display=swap');

:root {
    --bg-primary: #050810;
    --accent-cyan: #00d4ff;
    --text-primary: #e2e8f0;
}

body, .gradio-container { 
    background: var(--bg-primary) !important; 
    font-family: 'Exo 2', sans-serif !important; 
}

.gradio-group { 
    background: #0d1425 !important; 
    border: 1px solid rgba(0, 212, 255, 0.1) !important; 
    border-radius: 20px !important; 
    box-shadow: 0 10px 30px rgba(0,0,0,0.5) !important;
}

button.primary {
    background: linear-gradient(135deg, #0066ff, #00d4ff) !important;
    border: none !important;
    color: white !important;
    font-weight: 700 !important;
    letter-spacing: 1.5px !important;
    box-shadow: 0 4px 15px rgba(0, 102, 255, 0.4) !important;
    transition: all 0.2s ease !important;
}

button.primary:hover {
    transform: translateY(-2px) !important;
    box-shadow: 0 8px 25px rgba(0, 212, 255, 0.5) !important;
}

.output-stats textarea {
    font-family: 'Share Tech Mono', monospace !important;
    background: #050810 !important;
    color: #00ff88 !important;
}
"""

# ── Functions ────────────────────────────────────────────────

def classify_single(source: str, log_message: str):
    from processor_bert import _model_ready
    if not log_message.strip():
        return "β€”", "β€”", "β€”", "β€”"
    if not _model_ready:
        return "⏳ Loading...", "Warming up", "β€”", "β€”"

    t0 = time.perf_counter()
    try:
        result = classify_log(source, log_message)
        latency = (time.perf_counter() - t0) * 1000
        icon = get_tier_icon(result["tier"])
        return (
            result["label"],
            f"{icon} {result['tier']}",
            f"{result['confidence']:.1%}" if result["confidence"] else "N/A",
            f"{latency:.4f} ms"
        )
    except Exception as e:
        return f"Error: {str(e)}", "Fail", "β€”", "β€”"


def classify_batch(file, progress=gr.Progress(track_tqdm=True)):
    if file is None:
        return None, "⚠️ Please upload a CSV file."

    progress(0, desc="πŸš€ Initializing Engine...")
    t0 = time.perf_counter()

    try:
        # Generate a unique output path per user to prevent data bleeding
        unique_id = uuid.uuid4().hex
        safe_output_path = f"/tmp/classified_output_{unique_id}.csv"

        output_path, df = classify_csv(file.name, safe_output_path)
        total_time_sec = time.perf_counter() - t0

        progress(0.9, desc="πŸ“Š Calculating Metrics...")

        total        = len(df)
        label_counts = df["predicted_label"].value_counts().to_dict()
        tier_counts  = df["tier_used"].value_counts().to_dict()

        tier_lines = []
        for tier, count in tier_counts.items():
            tier_df = df[df["tier_used"] == tier]
            lats    = tier_df["latency_ms"].dropna()
            icon    = get_tier_icon(tier)
            pct     = count / total

            if "BERT" in tier:
                # BUG FIX: latency_ms now holds true per-log wall-clock time.
                # Show per-log p50 AND reconstructed batch total for clarity.
                p50       = np.percentile(lats, 50) if not lats.empty else 0
                # Each stored value is already per-log wall time (total_wall/n),
                # so multiplying by count reconstructs actual batch wall time.
                batch_ms  = p50 * count
                tier_lines.append(
                    f"  {icon} {tier}: p50={p50:.2f} ms/log | "
                    f"Batch total ~{batch_ms/1000:.1f} s (Over {count} logs)"
                )
            elif "Regex" in tier:
                p50 = np.percentile(lats, 50) if not lats.empty else 0
                tier_lines.append(
                    f"  {icon} {tier}: < 0.1 ms (p50: {p50:.4f} ms) | {count} logs ({pct:.0%})"
                )
            else:
                p50 = np.percentile(lats, 50) if not lats.empty else 0
                p95 = np.percentile(lats, 95) if not lats.empty else 0
                p99 = np.percentile(lats, 99) if not lats.empty else 0
                tier_lines.append(
                    f"  {icon} {tier}: {count} logs ({pct:.0%}) | "
                    f"p50={p50:.1f}ms p95={p95:.1f}ms p99={p99:.1f}ms"
                )

        tier_lines_str = "\n".join(tier_lines)
        label_lines    = "\n".join([f"  β€’ {k}: {v}" for k, v in label_counts.items()])

        stats = (
            f"βœ… Classified {total} logs in {total_time_sec:.2f} s\n\n"
            f"πŸ“Š Performance by Tier:\n{tier_lines_str}\n\n"
            f"🏷️ Label distribution:\n{label_lines}"
        )

        progress(1.0, desc="βœ… Success")
        return output_path, stats

    except Exception as e:
        return None, f"❌ System Error: {str(e)}"


# ── Theme & Layout ──────────────────────────────────────────
THEME = gr.themes.Base(
    primary_hue="blue",
    secondary_hue="cyan",
    neutral_hue="slate",
    font=[gr.themes.GoogleFont("Exo 2")],
)

with gr.Blocks(title="Log AI Engine") as demo:
    gr.HTML("<div style='text-align: center; padding: 20px;'><h1>πŸ” LOG CLASSIFICATION SYSTEM</h1></div>")

    with gr.Tabs():
        with gr.Tab("⚑ REAL-TIME ANALYZER"):
            with gr.Row():
                with gr.Column(scale=1):
                    src_in = gr.Dropdown(choices=SOURCES, value="ModernCRM", label="SOURCE")
                with gr.Column(scale=3):
                    msg_in = gr.Textbox(label="LOG MESSAGE", placeholder="Paste raw log string...", lines=3)

            run_btn = gr.Button("β–Ά CLASSIFY LOG", variant="primary")

            with gr.Row():
                lbl_out  = gr.Textbox(label="PREDICTED LABEL")
                tier_out = gr.Textbox(label="TIER USED")
                conf_out = gr.Textbox(label="CONFIDENCE")
                lat_out  = gr.Textbox(label="LATENCY")

            run_btn.click(classify_single, [src_in, msg_in], [lbl_out, tier_out, conf_out, lat_out])
            gr.Examples(examples=EXAMPLE_LOGS, inputs=[src_in, msg_in])

        with gr.Tab("πŸ“¦ BATCH PROCESSING"):
            with gr.Row():
                with gr.Column():
                    csv_in    = gr.File(label="UPLOAD CSV", file_types=[".csv"])
                    batch_btn = gr.Button("β–Ά START BATCH PROCESS", variant="primary")
                with gr.Column():
                    csv_out   = gr.File(label="DOWNLOAD CLASSIFIED DATA")
                    stats_out = gr.Textbox(label="PIPELINE ANALYTICS", lines=16, elem_classes="output-stats")

            batch_btn.click(classify_batch, inputs=[csv_in], outputs=[csv_out, stats_out])

demo.queue(default_concurrency_limit=2).launch(
    server_name="0.0.0.0",
    server_port=7860,
    theme=THEME,
    css=CUSTOM_CSS
)