Spaces:
Running
Running
Upload app_gradio.py
Browse files- app_gradio.py +187 -0
app_gradio.py
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Log Classification System β HuggingFace Spaces
|
| 3 |
+
Gradio UI for the 3-tier hybrid log classification pipeline.
|
| 4 |
+
"""
|
| 5 |
+
from __future__ import annotations
|
| 6 |
+
import io
|
| 7 |
+
import time
|
| 8 |
+
import pandas as pd
|
| 9 |
+
import gradio as gr
|
| 10 |
+
from classify import classify_log, classify_csv
|
| 11 |
+
|
| 12 |
+
# ββ Source options ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 13 |
+
SOURCES = [
|
| 14 |
+
"ModernCRM",
|
| 15 |
+
"ModernHR",
|
| 16 |
+
"BillingSystem",
|
| 17 |
+
"AnalyticsEngine",
|
| 18 |
+
"ThirdPartyAPI",
|
| 19 |
+
"LegacyCRM",
|
| 20 |
+
]
|
| 21 |
+
|
| 22 |
+
TIER_COLORS = {
|
| 23 |
+
"Regex": "π’",
|
| 24 |
+
"BERT": "π΅",
|
| 25 |
+
"LLM": "π‘",
|
| 26 |
+
"LLM (fallback)": "π ",
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
EXAMPLE_LOGS = [
|
| 30 |
+
["ModernCRM", "User User12345 logged in."],
|
| 31 |
+
["ModernHR", "Multiple login failures occurred on user 6454 account"],
|
| 32 |
+
["BillingSystem", "GET /v2/servers/detail HTTP/1.1 status: 200 len: 1583 time: 0.19"],
|
| 33 |
+
["AnalyticsEngine", "System crashed due to disk I/O failure on node-3"],
|
| 34 |
+
["LegacyCRM", "Case escalation for ticket ID 7324 failed β support agent is no longer active."],
|
| 35 |
+
["LegacyCRM", "The 'BulkEmailSender' feature will be deprecated in v5.0. Use 'EmailCampaignManager'."],
|
| 36 |
+
]
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
# ββ Single log tab ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 40 |
+
def classify_single(source: str, log_message: str):
|
| 41 |
+
if not log_message.strip():
|
| 42 |
+
return "β", "β", "β", "β"
|
| 43 |
+
|
| 44 |
+
t0 = time.perf_counter()
|
| 45 |
+
result = classify_log(source, log_message)
|
| 46 |
+
latency_ms = (time.perf_counter() - t0) * 1000
|
| 47 |
+
|
| 48 |
+
label = result["label"]
|
| 49 |
+
tier = result["tier"]
|
| 50 |
+
confidence = f"{result['confidence']:.1%}" if result["confidence"] is not None else "N/A"
|
| 51 |
+
icon = TIER_COLORS.get(tier, "βͺ")
|
| 52 |
+
|
| 53 |
+
return (
|
| 54 |
+
label,
|
| 55 |
+
f"{icon} {tier}",
|
| 56 |
+
confidence,
|
| 57 |
+
f"{latency_ms:.1f} ms",
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
# ββ Batch CSV tab βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 62 |
+
def classify_batch(file):
|
| 63 |
+
if file is None:
|
| 64 |
+
return None, "β οΈ Please upload a CSV file."
|
| 65 |
+
|
| 66 |
+
try:
|
| 67 |
+
output_path, df = classify_csv(file.name, "/tmp/classified_output.csv")
|
| 68 |
+
except ValueError as e:
|
| 69 |
+
return None, f"β οΈ {e}"
|
| 70 |
+
except Exception as e:
|
| 71 |
+
return None, f"β Error: {e}"
|
| 72 |
+
|
| 73 |
+
total = len(df)
|
| 74 |
+
tier_counts = df["tier_used"].value_counts().to_dict()
|
| 75 |
+
label_counts = df["predicted_label"].value_counts().to_dict()
|
| 76 |
+
|
| 77 |
+
tier_lines = "\n".join(f" {TIER_COLORS.get(k,'βͺ')} {k}: {v} ({v/total:.0%})" for k, v in tier_counts.items())
|
| 78 |
+
label_lines = "\n".join(f" β’ {k}: {v}" for k, v in label_counts.items())
|
| 79 |
+
|
| 80 |
+
stats = (
|
| 81 |
+
f"β
Classified {total} logs\n\n"
|
| 82 |
+
f"π Tier breakdown:\n{tier_lines}\n\n"
|
| 83 |
+
f"π·οΈ Label distribution:\n{label_lines}"
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
return output_path, stats
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
# ββ UI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 90 |
+
with gr.Blocks(title="Log Classification System", theme=gr.themes.Soft()) as demo:
|
| 91 |
+
|
| 92 |
+
gr.Markdown("""
|
| 93 |
+
# π Log Classification System
|
| 94 |
+
**3-tier hybrid pipeline** β π’ Regex Β· π΅ BERT + LogReg Β· π‘ LLM
|
| 95 |
+
Built to mimic production enterprise log monitoring architecture.
|
| 96 |
+
""")
|
| 97 |
+
|
| 98 |
+
with gr.Tabs():
|
| 99 |
+
|
| 100 |
+
# ββ Tab 1: Single Log ββββββββββββββββββββββββββββββββββββββββββββ
|
| 101 |
+
with gr.Tab("Single Log"):
|
| 102 |
+
with gr.Row():
|
| 103 |
+
source_input = gr.Dropdown(
|
| 104 |
+
choices=SOURCES,
|
| 105 |
+
value="ModernCRM",
|
| 106 |
+
label="Source System",
|
| 107 |
+
)
|
| 108 |
+
log_input = gr.Textbox(
|
| 109 |
+
label="Log Message",
|
| 110 |
+
placeholder="Paste a log message here...",
|
| 111 |
+
lines=3,
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
classify_btn = gr.Button("Classify", variant="primary")
|
| 115 |
+
|
| 116 |
+
with gr.Row():
|
| 117 |
+
label_out = gr.Textbox(label="π·οΈ Predicted Label", interactive=False)
|
| 118 |
+
tier_out = gr.Textbox(label="βοΈ Tier Used", interactive=False)
|
| 119 |
+
confidence_out = gr.Textbox(label="π Confidence", interactive=False)
|
| 120 |
+
latency_out = gr.Textbox(label="β±οΈ Latency", interactive=False)
|
| 121 |
+
|
| 122 |
+
classify_btn.click(
|
| 123 |
+
fn=classify_single,
|
| 124 |
+
inputs=[source_input, log_input],
|
| 125 |
+
outputs=[label_out, tier_out, confidence_out, latency_out],
|
| 126 |
+
)
|
| 127 |
+
|
| 128 |
+
gr.Examples(
|
| 129 |
+
examples=EXAMPLE_LOGS,
|
| 130 |
+
inputs=[source_input, log_input],
|
| 131 |
+
label="π Example Logs (click to try)",
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
# ββ Tab 2: Batch CSV βββββββββββββββββββββββββββββββββββββββββββββ
|
| 135 |
+
with gr.Tab("Batch CSV Upload"):
|
| 136 |
+
gr.Markdown("""
|
| 137 |
+
Upload a CSV with columns: **`source`**, **`log_message`**
|
| 138 |
+
Download the classified CSV with added columns: `predicted_label`, `tier_used`, `confidence`.
|
| 139 |
+
""")
|
| 140 |
+
with gr.Row():
|
| 141 |
+
with gr.Column():
|
| 142 |
+
csv_input = gr.File(label="π Upload CSV", file_types=[".csv"])
|
| 143 |
+
batch_btn = gr.Button("Classify All", variant="primary")
|
| 144 |
+
with gr.Column():
|
| 145 |
+
csv_output = gr.File(label="π₯ Download Classified CSV")
|
| 146 |
+
stats_out = gr.Textbox(label="π Stats", lines=12, interactive=False)
|
| 147 |
+
|
| 148 |
+
batch_btn.click(
|
| 149 |
+
fn=classify_batch,
|
| 150 |
+
inputs=[csv_input],
|
| 151 |
+
outputs=[csv_output, stats_out],
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
gr.Markdown("""
|
| 155 |
+
**Sample CSV format:**
|
| 156 |
+
```
|
| 157 |
+
source,log_message
|
| 158 |
+
ModernCRM,User User123 logged in.
|
| 159 |
+
LegacyCRM,Case escalation for ticket ID 7324 failed.
|
| 160 |
+
BillingSystem,GET /api/v2/invoice HTTP/1.1 status: 500
|
| 161 |
+
```
|
| 162 |
+
""")
|
| 163 |
+
|
| 164 |
+
# ββ Tab 3: Architecture ββββββββββββββββββββββββββββββββββββββββββ
|
| 165 |
+
with gr.Tab("Architecture"):
|
| 166 |
+
gr.Markdown("""
|
| 167 |
+
## ποΈ 3-Tier Hybrid Pipeline
|
| 168 |
+
|
| 169 |
+
| Tier | Method | Coverage | Latency | When Used |
|
| 170 |
+
|------|--------|----------|---------|-----------|
|
| 171 |
+
| π’ Regex | Python `re` patterns | ~21% | < 1ms | Fixed patterns (login, backup, etc.) |
|
| 172 |
+
| π΅ BERT | `all-MiniLM-L6-v2` + LogReg | ~79% | 20β80ms | High-volume categories with 150+ samples |
|
| 173 |
+
| π‘ LLM | HuggingFace Inference API | ~0.3% | 500β2000ms | LegacyCRM logs, rare patterns |
|
| 174 |
+
|
| 175 |
+
## π Model Performance (from training)
|
| 176 |
+
- **BERT + LogReg** trained on 2,410 synthetic enterprise logs
|
| 177 |
+
- **Confidence threshold**: 0.5 (below β escalate to LLM)
|
| 178 |
+
- **Source-aware routing**: `LegacyCRM` bypasses ML entirely (only 7 training samples)
|
| 179 |
+
|
| 180 |
+
## π Environment Variables
|
| 181 |
+
| Secret | Required For |
|
| 182 |
+
|--------|-------------|
|
| 183 |
+
| `HF_TOKEN` | LLM inference (LegacyCRM logs) |
|
| 184 |
+
""")
|
| 185 |
+
|
| 186 |
+
if __name__ == "__main__":
|
| 187 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|