NOT-OMEGA commited on
Commit
4e312a0
·
verified ·
1 Parent(s): 595fb58

Delete app_gradio.py

Browse files
Files changed (1) hide show
  1. app_gradio.py +0 -187
app_gradio.py DELETED
@@ -1,187 +0,0 @@
1
- """
2
- Log Classification System — HuggingFace Spaces
3
- Gradio UI for the 3-tier hybrid log classification pipeline.
4
- """
5
- from __future__ import annotations
6
- import io
7
- import time
8
- import pandas as pd
9
- import gradio as gr
10
- from classify import classify_log, classify_csv
11
-
12
- # ── Source options ──────────────────────────────────────────────────────────
13
- SOURCES = [
14
- "ModernCRM",
15
- "ModernHR",
16
- "BillingSystem",
17
- "AnalyticsEngine",
18
- "ThirdPartyAPI",
19
- "LegacyCRM",
20
- ]
21
-
22
- TIER_COLORS = {
23
- "Regex": "🟢",
24
- "BERT": "🔵",
25
- "LLM": "🟡",
26
- "LLM (fallback)": "🟠",
27
- }
28
-
29
- EXAMPLE_LOGS = [
30
- ["ModernCRM", "User User12345 logged in."],
31
- ["ModernHR", "Multiple login failures occurred on user 6454 account"],
32
- ["BillingSystem", "GET /v2/servers/detail HTTP/1.1 status: 200 len: 1583 time: 0.19"],
33
- ["AnalyticsEngine", "System crashed due to disk I/O failure on node-3"],
34
- ["LegacyCRM", "Case escalation for ticket ID 7324 failed — support agent is no longer active."],
35
- ["LegacyCRM", "The 'BulkEmailSender' feature will be deprecated in v5.0. Use 'EmailCampaignManager'."],
36
- ]
37
-
38
-
39
- # ── Single log tab ──────────────────────────────────────────────────────────
40
- def classify_single(source: str, log_message: str):
41
- if not log_message.strip():
42
- return "—", "—", "—", "—"
43
-
44
- t0 = time.perf_counter()
45
- result = classify_log(source, log_message)
46
- latency_ms = (time.perf_counter() - t0) * 1000
47
-
48
- label = result["label"]
49
- tier = result["tier"]
50
- confidence = f"{result['confidence']:.1%}" if result["confidence"] is not None else "N/A"
51
- icon = TIER_COLORS.get(tier, "⚪")
52
-
53
- return (
54
- label,
55
- f"{icon} {tier}",
56
- confidence,
57
- f"{latency_ms:.1f} ms",
58
- )
59
-
60
-
61
- # ── Batch CSV tab ───────────────────────────────────────────────────────────
62
- def classify_batch(file):
63
- if file is None:
64
- return None, "⚠️ Please upload a CSV file."
65
-
66
- try:
67
- output_path, df = classify_csv(file.name, "/tmp/classified_output.csv")
68
- except ValueError as e:
69
- return None, f"⚠️ {e}"
70
- except Exception as e:
71
- return None, f"❌ Error: {e}"
72
-
73
- total = len(df)
74
- tier_counts = df["tier_used"].value_counts().to_dict()
75
- label_counts = df["predicted_label"].value_counts().to_dict()
76
-
77
- tier_lines = "\n".join(f" {TIER_COLORS.get(k,'⚪')} {k}: {v} ({v/total:.0%})" for k, v in tier_counts.items())
78
- label_lines = "\n".join(f" • {k}: {v}" for k, v in label_counts.items())
79
-
80
- stats = (
81
- f"✅ Classified {total} logs\n\n"
82
- f"📊 Tier breakdown:\n{tier_lines}\n\n"
83
- f"🏷️ Label distribution:\n{label_lines}"
84
- )
85
-
86
- return output_path, stats
87
-
88
-
89
- # ── UI ──────────────────────────────────────────────────────────────────────
90
- with gr.Blocks(title="Log Classification System", theme=gr.themes.Soft()) as demo:
91
-
92
- gr.Markdown("""
93
- # 🔍 Log Classification System
94
- **3-tier hybrid pipeline** → 🟢 Regex · 🔵 BERT + LogReg · 🟡 LLM
95
- Built to mimic production enterprise log monitoring architecture.
96
- """)
97
-
98
- with gr.Tabs():
99
-
100
- # ── Tab 1: Single Log ────────────────────────────────────────────
101
- with gr.Tab("Single Log"):
102
- with gr.Row():
103
- source_input = gr.Dropdown(
104
- choices=SOURCES,
105
- value="ModernCRM",
106
- label="Source System",
107
- )
108
- log_input = gr.Textbox(
109
- label="Log Message",
110
- placeholder="Paste a log message here...",
111
- lines=3,
112
- )
113
-
114
- classify_btn = gr.Button("Classify", variant="primary")
115
-
116
- with gr.Row():
117
- label_out = gr.Textbox(label="🏷️ Predicted Label", interactive=False)
118
- tier_out = gr.Textbox(label="⚙️ Tier Used", interactive=False)
119
- confidence_out = gr.Textbox(label="📈 Confidence", interactive=False)
120
- latency_out = gr.Textbox(label="⏱️ Latency", interactive=False)
121
-
122
- classify_btn.click(
123
- fn=classify_single,
124
- inputs=[source_input, log_input],
125
- outputs=[label_out, tier_out, confidence_out, latency_out],
126
- )
127
-
128
- gr.Examples(
129
- examples=EXAMPLE_LOGS,
130
- inputs=[source_input, log_input],
131
- label="📋 Example Logs (click to try)",
132
- )
133
-
134
- # ── Tab 2: Batch CSV ─────────────────────────────────────────────
135
- with gr.Tab("Batch CSV Upload"):
136
- gr.Markdown("""
137
- Upload a CSV with columns: **`source`**, **`log_message`**
138
- Download the classified CSV with added columns: `predicted_label`, `tier_used`, `confidence`.
139
- """)
140
- with gr.Row():
141
- with gr.Column():
142
- csv_input = gr.File(label="📂 Upload CSV", file_types=[".csv"])
143
- batch_btn = gr.Button("Classify All", variant="primary")
144
- with gr.Column():
145
- csv_output = gr.File(label="📥 Download Classified CSV")
146
- stats_out = gr.Textbox(label="📊 Stats", lines=12, interactive=False)
147
-
148
- batch_btn.click(
149
- fn=classify_batch,
150
- inputs=[csv_input],
151
- outputs=[csv_output, stats_out],
152
- )
153
-
154
- gr.Markdown("""
155
- **Sample CSV format:**
156
- ```
157
- source,log_message
158
- ModernCRM,User User123 logged in.
159
- LegacyCRM,Case escalation for ticket ID 7324 failed.
160
- BillingSystem,GET /api/v2/invoice HTTP/1.1 status: 500
161
- ```
162
- """)
163
-
164
- # ── Tab 3: Architecture ──────────────────────────────────────────
165
- with gr.Tab("Architecture"):
166
- gr.Markdown("""
167
- ## 🏗️ 3-Tier Hybrid Pipeline
168
-
169
- | Tier | Method | Coverage | Latency | When Used |
170
- |------|--------|----------|---------|-----------|
171
- | 🟢 Regex | Python `re` patterns | ~21% | < 1ms | Fixed patterns (login, backup, etc.) |
172
- | 🔵 BERT | `all-MiniLM-L6-v2` + LogReg | ~79% | 20–80ms | High-volume categories with 150+ samples |
173
- | 🟡 LLM | HuggingFace Inference API | ~0.3% | 500–2000ms | LegacyCRM logs, rare patterns |
174
-
175
- ## 📊 Model Performance (from training)
176
- - **BERT + LogReg** trained on 2,410 synthetic enterprise logs
177
- - **Confidence threshold**: 0.5 (below → escalate to LLM)
178
- - **Source-aware routing**: `LegacyCRM` bypasses ML entirely (only 7 training samples)
179
-
180
- ## 🔑 Environment Variables
181
- | Secret | Required For |
182
- |--------|-------------|
183
- | `HF_TOKEN` | LLM inference (LegacyCRM logs) |
184
- """)
185
-
186
- if __name__ == "__main__":
187
- demo.launch(server_name="0.0.0.0", server_port=7860)