Spaces:
Sleeping
Sleeping
| """ | |
| Log Classification System β HuggingFace Spaces | |
| Ultra-Modern 3D UI | Optimized for Gradio 6.0 & HF Free Tier | |
| Bug fixes vs previous version: | |
| - BERT latency display: no longer shows cumulative sum (was showing 2,962,635 ms). | |
| Now shows real per-log wall-clock latency from classify.py fix. | |
| - Added bert_wall_ms tracking in stats display so batch total is visible clearly. | |
| """ | |
| from __future__ import annotations | |
| import io | |
| import time | |
| import uuid | |
| import pandas as pd | |
| import numpy as np | |
| import gradio as gr | |
| from classify import classify_log, classify_csv | |
| from processor_bert import preload_models | |
| # ββ Preload models (Start loading BERT into RAM immediately) ββ | |
| preload_models() | |
| SOURCES = [ | |
| "ModernCRM", "ModernHR", "BillingSystem", | |
| "AnalyticsEngine", "ThirdPartyAPI", "LegacyCRM", | |
| ] | |
| def get_tier_icon(tier_name: str) -> str: | |
| if "Regex" in tier_name: return "π’" | |
| if "BERT" in tier_name: return "π΅" | |
| if "Cache Hit" in tier_name: return "β‘" | |
| if "fallback" in tier_name: return "π " | |
| if "LLM" in tier_name: return "π‘" | |
| return "βͺ" | |
| EXAMPLE_LOGS = [ | |
| ["ModernCRM", "User User12345 logged in."], | |
| ["ModernHR", "Multiple login failures occurred on user 6454 account"], | |
| ["BillingSystem", "GET /v2/servers/detail HTTP/1.1 status: 200 len: 1583 time: 0.19"], | |
| ["AnalyticsEngine", "System crashed due to disk I/O failure on node-3"], | |
| ["LegacyCRM", "The 'BulkEmailSender' feature will be deprecated in v5.0."], | |
| ] | |
| # ββ Custom CSS ββββββββββββββββββββββββ | |
| CUSTOM_CSS = """ | |
| @import url('https://fonts.googleapis.com/css2?family=Rajdhani:wght@600;700&family=Share+Tech+Mono&family=Exo+2:wght@400;600&display=swap'); | |
| :root { | |
| --bg-primary: #050810; | |
| --accent-cyan: #00d4ff; | |
| --text-primary: #e2e8f0; | |
| } | |
| body, .gradio-container { | |
| background: var(--bg-primary) !important; | |
| font-family: 'Exo 2', sans-serif !important; | |
| } | |
| .gradio-group { | |
| background: #0d1425 !important; | |
| border: 1px solid rgba(0, 212, 255, 0.1) !important; | |
| border-radius: 20px !important; | |
| box-shadow: 0 10px 30px rgba(0,0,0,0.5) !important; | |
| } | |
| button.primary { | |
| background: linear-gradient(135deg, #0066ff, #00d4ff) !important; | |
| border: none !important; | |
| color: white !important; | |
| font-weight: 700 !important; | |
| letter-spacing: 1.5px !important; | |
| box-shadow: 0 4px 15px rgba(0, 102, 255, 0.4) !important; | |
| transition: all 0.2s ease !important; | |
| } | |
| button.primary:hover { | |
| transform: translateY(-2px) !important; | |
| box-shadow: 0 8px 25px rgba(0, 212, 255, 0.5) !important; | |
| } | |
| .output-stats textarea { | |
| font-family: 'Share Tech Mono', monospace !important; | |
| background: #050810 !important; | |
| color: #00ff88 !important; | |
| } | |
| """ | |
| # ββ Functions ββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def classify_single(source: str, log_message: str): | |
| from processor_bert import _model_ready | |
| if not log_message.strip(): | |
| return "β", "β", "β", "β" | |
| if not _model_ready: | |
| return "β³ Loading...", "Warming up", "β", "β" | |
| t0 = time.perf_counter() | |
| try: | |
| result = classify_log(source, log_message) | |
| latency = (time.perf_counter() - t0) * 1000 | |
| icon = get_tier_icon(result["tier"]) | |
| return ( | |
| result["label"], | |
| f"{icon} {result['tier']}", | |
| f"{result['confidence']:.1%}" if result["confidence"] else "N/A", | |
| f"{latency:.4f} ms" | |
| ) | |
| except Exception as e: | |
| return f"Error: {str(e)}", "Fail", "β", "β" | |
| def classify_batch(file, progress=gr.Progress(track_tqdm=True)): | |
| if file is None: | |
| return None, "β οΈ Please upload a CSV file." | |
| progress(0, desc="π Initializing Engine...") | |
| t0 = time.perf_counter() | |
| try: | |
| # Generate a unique output path per user to prevent data bleeding | |
| unique_id = uuid.uuid4().hex | |
| safe_output_path = f"/tmp/classified_output_{unique_id}.csv" | |
| output_path, df = classify_csv(file.name, safe_output_path) | |
| total_time_sec = time.perf_counter() - t0 | |
| progress(0.9, desc="π Calculating Metrics...") | |
| total = len(df) | |
| label_counts = df["predicted_label"].value_counts().to_dict() | |
| tier_counts = df["tier_used"].value_counts().to_dict() | |
| tier_lines = [] | |
| for tier, count in tier_counts.items(): | |
| tier_df = df[df["tier_used"] == tier] | |
| lats = tier_df["latency_ms"].dropna() | |
| icon = get_tier_icon(tier) | |
| pct = count / total | |
| if "BERT" in tier: | |
| # BUG FIX: latency_ms now holds true per-log wall-clock time. | |
| # Show per-log p50 AND reconstructed batch total for clarity. | |
| p50 = np.percentile(lats, 50) if not lats.empty else 0 | |
| # Each stored value is already per-log wall time (total_wall/n), | |
| # so multiplying by count reconstructs actual batch wall time. | |
| batch_ms = p50 * count | |
| tier_lines.append( | |
| f" {icon} {tier}: p50={p50:.2f} ms/log | " | |
| f"Batch total ~{batch_ms/1000:.1f} s (Over {count} logs)" | |
| ) | |
| elif "Regex" in tier: | |
| p50 = np.percentile(lats, 50) if not lats.empty else 0 | |
| tier_lines.append( | |
| f" {icon} {tier}: < 0.1 ms (p50: {p50:.4f} ms) | {count} logs ({pct:.0%})" | |
| ) | |
| else: | |
| p50 = np.percentile(lats, 50) if not lats.empty else 0 | |
| p95 = np.percentile(lats, 95) if not lats.empty else 0 | |
| p99 = np.percentile(lats, 99) if not lats.empty else 0 | |
| tier_lines.append( | |
| f" {icon} {tier}: {count} logs ({pct:.0%}) | " | |
| f"p50={p50:.1f}ms p95={p95:.1f}ms p99={p99:.1f}ms" | |
| ) | |
| tier_lines_str = "\n".join(tier_lines) | |
| label_lines = "\n".join([f" β’ {k}: {v}" for k, v in label_counts.items()]) | |
| stats = ( | |
| f"β Classified {total} logs in {total_time_sec:.2f} s\n\n" | |
| f"π Performance by Tier:\n{tier_lines_str}\n\n" | |
| f"π·οΈ Label distribution:\n{label_lines}" | |
| ) | |
| progress(1.0, desc="β Success") | |
| return output_path, stats | |
| except Exception as e: | |
| return None, f"β System Error: {str(e)}" | |
| # ββ Theme & Layout ββββββββββββββββββββββββββββββββββββββββββ | |
| THEME = gr.themes.Base( | |
| primary_hue="blue", | |
| secondary_hue="cyan", | |
| neutral_hue="slate", | |
| font=[gr.themes.GoogleFont("Exo 2")], | |
| ) | |
| with gr.Blocks(title="Log AI Engine") as demo: | |
| gr.HTML("<div style='text-align: center; padding: 20px;'><h1>π LOG CLASSIFICATION SYSTEM</h1></div>") | |
| with gr.Tabs(): | |
| with gr.Tab("β‘ REAL-TIME ANALYZER"): | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| src_in = gr.Dropdown(choices=SOURCES, value="ModernCRM", label="SOURCE") | |
| with gr.Column(scale=3): | |
| msg_in = gr.Textbox(label="LOG MESSAGE", placeholder="Paste raw log string...", lines=3) | |
| run_btn = gr.Button("βΆ CLASSIFY LOG", variant="primary") | |
| with gr.Row(): | |
| lbl_out = gr.Textbox(label="PREDICTED LABEL") | |
| tier_out = gr.Textbox(label="TIER USED") | |
| conf_out = gr.Textbox(label="CONFIDENCE") | |
| lat_out = gr.Textbox(label="LATENCY") | |
| run_btn.click(classify_single, [src_in, msg_in], [lbl_out, tier_out, conf_out, lat_out]) | |
| gr.Examples(examples=EXAMPLE_LOGS, inputs=[src_in, msg_in]) | |
| with gr.Tab("π¦ BATCH PROCESSING"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| csv_in = gr.File(label="UPLOAD CSV", file_types=[".csv"]) | |
| batch_btn = gr.Button("βΆ START BATCH PROCESS", variant="primary") | |
| with gr.Column(): | |
| csv_out = gr.File(label="DOWNLOAD CLASSIFIED DATA") | |
| stats_out = gr.Textbox(label="PIPELINE ANALYTICS", lines=16, elem_classes="output-stats") | |
| batch_btn.click(classify_batch, inputs=[csv_in], outputs=[csv_out, stats_out]) | |
| demo.queue(default_concurrency_limit=2).launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| theme=THEME, | |
| css=CUSTOM_CSS | |
| ) | |