NOT-OMEGA commited on
Commit
50e1a1b
Β·
verified Β·
1 Parent(s): 7020c87

Upload app_gradio.py

Browse files
Files changed (1) hide show
  1. app_gradio.py +187 -0
app_gradio.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Log Classification System β€” HuggingFace Spaces
3
+ Gradio UI for the 3-tier hybrid log classification pipeline.
4
+ """
5
+ from __future__ import annotations
6
+ import io
7
+ import time
8
+ import pandas as pd
9
+ import gradio as gr
10
+ from classify import classify_log, classify_csv
11
+
12
+ # ── Source options ──────────────────────────────────────────────────────────
13
+ SOURCES = [
14
+ "ModernCRM",
15
+ "ModernHR",
16
+ "BillingSystem",
17
+ "AnalyticsEngine",
18
+ "ThirdPartyAPI",
19
+ "LegacyCRM",
20
+ ]
21
+
22
+ TIER_COLORS = {
23
+ "Regex": "🟒",
24
+ "BERT": "πŸ”΅",
25
+ "LLM": "🟑",
26
+ "LLM (fallback)": "🟠",
27
+ }
28
+
29
+ EXAMPLE_LOGS = [
30
+ ["ModernCRM", "User User12345 logged in."],
31
+ ["ModernHR", "Multiple login failures occurred on user 6454 account"],
32
+ ["BillingSystem", "GET /v2/servers/detail HTTP/1.1 status: 200 len: 1583 time: 0.19"],
33
+ ["AnalyticsEngine", "System crashed due to disk I/O failure on node-3"],
34
+ ["LegacyCRM", "Case escalation for ticket ID 7324 failed β€” support agent is no longer active."],
35
+ ["LegacyCRM", "The 'BulkEmailSender' feature will be deprecated in v5.0. Use 'EmailCampaignManager'."],
36
+ ]
37
+
38
+
39
+ # ── Single log tab ──────────────────────────────────────────────────────────
40
+ def classify_single(source: str, log_message: str):
41
+ if not log_message.strip():
42
+ return "β€”", "β€”", "β€”", "β€”"
43
+
44
+ t0 = time.perf_counter()
45
+ result = classify_log(source, log_message)
46
+ latency_ms = (time.perf_counter() - t0) * 1000
47
+
48
+ label = result["label"]
49
+ tier = result["tier"]
50
+ confidence = f"{result['confidence']:.1%}" if result["confidence"] is not None else "N/A"
51
+ icon = TIER_COLORS.get(tier, "βšͺ")
52
+
53
+ return (
54
+ label,
55
+ f"{icon} {tier}",
56
+ confidence,
57
+ f"{latency_ms:.1f} ms",
58
+ )
59
+
60
+
61
+ # ── Batch CSV tab ───────────────────────────────────────────────────────────
62
+ def classify_batch(file):
63
+ if file is None:
64
+ return None, "⚠️ Please upload a CSV file."
65
+
66
+ try:
67
+ output_path, df = classify_csv(file.name, "/tmp/classified_output.csv")
68
+ except ValueError as e:
69
+ return None, f"⚠️ {e}"
70
+ except Exception as e:
71
+ return None, f"❌ Error: {e}"
72
+
73
+ total = len(df)
74
+ tier_counts = df["tier_used"].value_counts().to_dict()
75
+ label_counts = df["predicted_label"].value_counts().to_dict()
76
+
77
+ tier_lines = "\n".join(f" {TIER_COLORS.get(k,'βšͺ')} {k}: {v} ({v/total:.0%})" for k, v in tier_counts.items())
78
+ label_lines = "\n".join(f" β€’ {k}: {v}" for k, v in label_counts.items())
79
+
80
+ stats = (
81
+ f"βœ… Classified {total} logs\n\n"
82
+ f"πŸ“Š Tier breakdown:\n{tier_lines}\n\n"
83
+ f"🏷️ Label distribution:\n{label_lines}"
84
+ )
85
+
86
+ return output_path, stats
87
+
88
+
89
+ # ── UI ──────────────────────────────────────────────────────────────────────
90
+ with gr.Blocks(title="Log Classification System", theme=gr.themes.Soft()) as demo:
91
+
92
+ gr.Markdown("""
93
+ # πŸ” Log Classification System
94
+ **3-tier hybrid pipeline** β†’ 🟒 Regex Β· πŸ”΅ BERT + LogReg Β· 🟑 LLM
95
+ Built to mimic production enterprise log monitoring architecture.
96
+ """)
97
+
98
+ with gr.Tabs():
99
+
100
+ # ── Tab 1: Single Log ────────────────────────────────────────────
101
+ with gr.Tab("Single Log"):
102
+ with gr.Row():
103
+ source_input = gr.Dropdown(
104
+ choices=SOURCES,
105
+ value="ModernCRM",
106
+ label="Source System",
107
+ )
108
+ log_input = gr.Textbox(
109
+ label="Log Message",
110
+ placeholder="Paste a log message here...",
111
+ lines=3,
112
+ )
113
+
114
+ classify_btn = gr.Button("Classify", variant="primary")
115
+
116
+ with gr.Row():
117
+ label_out = gr.Textbox(label="🏷️ Predicted Label", interactive=False)
118
+ tier_out = gr.Textbox(label="βš™οΈ Tier Used", interactive=False)
119
+ confidence_out = gr.Textbox(label="πŸ“ˆ Confidence", interactive=False)
120
+ latency_out = gr.Textbox(label="⏱️ Latency", interactive=False)
121
+
122
+ classify_btn.click(
123
+ fn=classify_single,
124
+ inputs=[source_input, log_input],
125
+ outputs=[label_out, tier_out, confidence_out, latency_out],
126
+ )
127
+
128
+ gr.Examples(
129
+ examples=EXAMPLE_LOGS,
130
+ inputs=[source_input, log_input],
131
+ label="πŸ“‹ Example Logs (click to try)",
132
+ )
133
+
134
+ # ── Tab 2: Batch CSV ─────────────────────────────────────────────
135
+ with gr.Tab("Batch CSV Upload"):
136
+ gr.Markdown("""
137
+ Upload a CSV with columns: **`source`**, **`log_message`**
138
+ Download the classified CSV with added columns: `predicted_label`, `tier_used`, `confidence`.
139
+ """)
140
+ with gr.Row():
141
+ with gr.Column():
142
+ csv_input = gr.File(label="πŸ“‚ Upload CSV", file_types=[".csv"])
143
+ batch_btn = gr.Button("Classify All", variant="primary")
144
+ with gr.Column():
145
+ csv_output = gr.File(label="πŸ“₯ Download Classified CSV")
146
+ stats_out = gr.Textbox(label="πŸ“Š Stats", lines=12, interactive=False)
147
+
148
+ batch_btn.click(
149
+ fn=classify_batch,
150
+ inputs=[csv_input],
151
+ outputs=[csv_output, stats_out],
152
+ )
153
+
154
+ gr.Markdown("""
155
+ **Sample CSV format:**
156
+ ```
157
+ source,log_message
158
+ ModernCRM,User User123 logged in.
159
+ LegacyCRM,Case escalation for ticket ID 7324 failed.
160
+ BillingSystem,GET /api/v2/invoice HTTP/1.1 status: 500
161
+ ```
162
+ """)
163
+
164
+ # ── Tab 3: Architecture ──────────────────────────────────────────
165
+ with gr.Tab("Architecture"):
166
+ gr.Markdown("""
167
+ ## πŸ—οΈ 3-Tier Hybrid Pipeline
168
+
169
+ | Tier | Method | Coverage | Latency | When Used |
170
+ |------|--------|----------|---------|-----------|
171
+ | 🟒 Regex | Python `re` patterns | ~21% | < 1ms | Fixed patterns (login, backup, etc.) |
172
+ | πŸ”΅ BERT | `all-MiniLM-L6-v2` + LogReg | ~79% | 20–80ms | High-volume categories with 150+ samples |
173
+ | 🟑 LLM | HuggingFace Inference API | ~0.3% | 500–2000ms | LegacyCRM logs, rare patterns |
174
+
175
+ ## πŸ“Š Model Performance (from training)
176
+ - **BERT + LogReg** trained on 2,410 synthetic enterprise logs
177
+ - **Confidence threshold**: 0.5 (below β†’ escalate to LLM)
178
+ - **Source-aware routing**: `LegacyCRM` bypasses ML entirely (only 7 training samples)
179
+
180
+ ## πŸ”‘ Environment Variables
181
+ | Secret | Required For |
182
+ |--------|-------------|
183
+ | `HF_TOKEN` | LLM inference (LegacyCRM logs) |
184
+ """)
185
+
186
+ if __name__ == "__main__":
187
+ demo.launch(server_name="0.0.0.0", server_port=7860)