NOT-OMEGA commited on
Commit
9399c84
·
verified ·
1 Parent(s): b5d6b20

Delete hf_space

Browse files
hf_space/Dockerfile DELETED
@@ -1,20 +0,0 @@
1
- FROM python:3.11-slim
2
-
3
- RUN apt-get update && apt-get install -y --no-install-recommends \
4
- curl \
5
- && rm -rf /var/lib/apt/lists/*
6
-
7
- WORKDIR /app
8
-
9
- COPY requirements.txt .
10
- RUN pip install --no-cache-dir -r requirements.txt
11
-
12
- COPY . .
13
-
14
- RUN useradd -m -u 1000 appuser \
15
- && chown -R appuser:appuser /app
16
- USER appuser
17
-
18
- EXPOSE 7860
19
-
20
- CMD ["python", "app_gradio.py"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hf_space/app_gradio.py DELETED
@@ -1,542 +0,0 @@
1
- """
2
- Log Classification System — HuggingFace Spaces
3
- Ultra-modern 3D UI with custom CSS
4
- """
5
- from __future__ import annotations
6
- import io
7
- import time
8
- import pandas as pd
9
- import gradio as gr
10
- from classify import classify_log, classify_csv
11
-
12
- SOURCES = [
13
- "ModernCRM", "ModernHR", "BillingSystem",
14
- "AnalyticsEngine", "ThirdPartyAPI", "LegacyCRM",
15
- ]
16
-
17
- TIER_COLORS = {
18
- "Regex": "🟢",
19
- "BERT": "🔵",
20
- "LLM": "🟡",
21
- "LLM (fallback)": "🟠",
22
- }
23
-
24
- EXAMPLE_LOGS = [
25
- ["ModernCRM", "User User12345 logged in."],
26
- ["ModernHR", "Multiple login failures occurred on user 6454 account"],
27
- ["BillingSystem", "GET /v2/servers/detail HTTP/1.1 status: 200 len: 1583 time: 0.19"],
28
- ["AnalyticsEngine", "System crashed due to disk I/O failure on node-3"],
29
- ["LegacyCRM", "Case escalation for ticket ID 7324 failed — support agent is no longer active."],
30
- ["LegacyCRM", "The 'BulkEmailSender' feature will be deprecated in v5.0. Use 'EmailCampaignManager'."],
31
- ]
32
-
33
- # ── Custom CSS — 3D Modern Dark Theme ──────────────────────────────────────
34
- CUSTOM_CSS = """
35
- @import url('https://fonts.googleapis.com/css2?family=Rajdhani:wght@400;500;600;700&family=Share+Tech+Mono&family=Exo+2:wght@300;400;600;700&display=swap');
36
-
37
- :root {
38
- --bg-primary: #050810;
39
- --bg-secondary: #0a0f1e;
40
- --bg-card: #0d1425;
41
- --bg-card-hover: #111a30;
42
- --accent-cyan: #00d4ff;
43
- --accent-blue: #0066ff;
44
- --accent-purple: #7c3aed;
45
- --accent-green: #00ff88;
46
- --accent-orange: #ff6b00;
47
- --text-primary: #e2e8f0;
48
- --text-secondary: #94a3b8;
49
- --text-muted: #475569;
50
- --border-glow: rgba(0, 212, 255, 0.3);
51
- --shadow-3d: 0 20px 60px rgba(0, 0, 0, 0.8), 0 0 40px rgba(0, 102, 255, 0.15);
52
- --glow-cyan: 0 0 20px rgba(0, 212, 255, 0.4), 0 0 40px rgba(0, 212, 255, 0.2);
53
- --glow-blue: 0 0 20px rgba(0, 102, 255, 0.4);
54
- }
55
-
56
- /* ── Base ── */
57
- body, .gradio-container {
58
- background: var(--bg-primary) !important;
59
- font-family: 'Exo 2', sans-serif !important;
60
- color: var(--text-primary) !important;
61
- }
62
-
63
- .gradio-container {
64
- background:
65
- radial-gradient(ellipse at 20% 20%, rgba(0, 102, 255, 0.08) 0%, transparent 50%),
66
- radial-gradient(ellipse at 80% 80%, rgba(124, 58, 237, 0.08) 0%, transparent 50%),
67
- radial-gradient(ellipse at 50% 50%, rgba(0, 212, 255, 0.03) 0%, transparent 70%),
68
- var(--bg-primary) !important;
69
- min-height: 100vh;
70
- }
71
-
72
- /* ── Header ── */
73
- .main-header {
74
- text-align: center;
75
- padding: 48px 24px 32px;
76
- position: relative;
77
- }
78
-
79
- .main-header::before {
80
- content: '';
81
- position: absolute;
82
- top: 0; left: 50%;
83
- transform: translateX(-50%);
84
- width: 600px; height: 2px;
85
- background: linear-gradient(90deg, transparent, var(--accent-cyan), var(--accent-blue), transparent);
86
- box-shadow: var(--glow-cyan);
87
- }
88
-
89
- /* ── Tab Navigation ── */
90
- .tab-nav {
91
- background: rgba(13, 20, 37, 0.8) !important;
92
- border: 1px solid rgba(0, 212, 255, 0.15) !important;
93
- border-radius: 16px !important;
94
- padding: 6px !important;
95
- backdrop-filter: blur(20px) !important;
96
- box-shadow: var(--shadow-3d) !important;
97
- }
98
-
99
- .tab-nav button {
100
- font-family: 'Rajdhani', sans-serif !important;
101
- font-weight: 600 !important;
102
- font-size: 14px !important;
103
- letter-spacing: 1.5px !important;
104
- text-transform: uppercase !important;
105
- color: var(--text-secondary) !important;
106
- background: transparent !important;
107
- border: none !important;
108
- border-radius: 10px !important;
109
- padding: 12px 24px !important;
110
- transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
111
- }
112
-
113
- .tab-nav button.selected {
114
- color: var(--accent-cyan) !important;
115
- background: linear-gradient(135deg, rgba(0, 212, 255, 0.1), rgba(0, 102, 255, 0.1)) !important;
116
- box-shadow: 0 0 20px rgba(0, 212, 255, 0.2), inset 0 1px 0 rgba(0, 212, 255, 0.3) !important;
117
- border: 1px solid rgba(0, 212, 255, 0.3) !important;
118
- }
119
-
120
- /* ── Cards / Blocks ── */
121
- .gradio-group, .gr-group {
122
- background: var(--bg-card) !important;
123
- border: 1px solid rgba(0, 212, 255, 0.1) !important;
124
- border-radius: 20px !important;
125
- box-shadow: var(--shadow-3d), inset 0 1px 0 rgba(255,255,255,0.03) !important;
126
- transition: all 0.4s ease !important;
127
- transform: perspective(1000px) rotateX(0deg);
128
- position: relative;
129
- overflow: hidden;
130
- }
131
-
132
- .gradio-group::before {
133
- content: '';
134
- position: absolute;
135
- top: 0; left: 0; right: 0;
136
- height: 1px;
137
- background: linear-gradient(90deg, transparent, rgba(0, 212, 255, 0.5), transparent);
138
- }
139
-
140
- .gradio-group:hover {
141
- border-color: rgba(0, 212, 255, 0.25) !important;
142
- box-shadow: var(--shadow-3d), var(--glow-cyan) !important;
143
- transform: perspective(1000px) translateY(-4px) !important;
144
- }
145
-
146
- /* ── Labels ── */
147
- label span, .gr-label {
148
- font-family: 'Rajdhani', sans-serif !important;
149
- font-weight: 600 !important;
150
- letter-spacing: 1.5px !important;
151
- text-transform: uppercase !important;
152
- font-size: 11px !important;
153
- color: var(--accent-cyan) !important;
154
- opacity: 0.85;
155
- }
156
-
157
- /* ── Inputs ── */
158
- input, textarea, select, .gr-input {
159
- background: rgba(5, 8, 16, 0.8) !important;
160
- border: 1px solid rgba(0, 212, 255, 0.15) !important;
161
- border-radius: 12px !important;
162
- color: var(--text-primary) !important;
163
- font-family: 'Share Tech Mono', monospace !important;
164
- font-size: 13px !important;
165
- transition: all 0.3s ease !important;
166
- padding: 12px 16px !important;
167
- }
168
-
169
- input:focus, textarea:focus {
170
- border-color: var(--accent-cyan) !important;
171
- box-shadow: 0 0 0 3px rgba(0, 212, 255, 0.1), var(--glow-cyan) !important;
172
- outline: none !important;
173
- background: rgba(0, 212, 255, 0.03) !important;
174
- }
175
-
176
- /* ── Dropdown ── */
177
- .gr-dropdown select, .gradio-dropdown {
178
- background: rgba(5, 8, 16, 0.9) !important;
179
- border: 1px solid rgba(0, 212, 255, 0.2) !important;
180
- border-radius: 12px !important;
181
- color: var(--accent-cyan) !important;
182
- font-family: 'Rajdhani', sans-serif !important;
183
- font-weight: 600 !important;
184
- }
185
-
186
- /* ── Primary Button ── */
187
- button.primary, .gr-button-primary, button[variant="primary"] {
188
- font-family: 'Rajdhani', sans-serif !important;
189
- font-weight: 700 !important;
190
- font-size: 15px !important;
191
- letter-spacing: 2px !important;
192
- text-transform: uppercase !important;
193
- background: linear-gradient(135deg, #0066ff 0%, #00d4ff 50%, #0066ff 100%) !important;
194
- background-size: 200% 200% !important;
195
- border: none !important;
196
- border-radius: 12px !important;
197
- padding: 14px 32px !important;
198
- color: #fff !important;
199
- box-shadow:
200
- 0 8px 32px rgba(0, 102, 255, 0.4),
201
- 0 2px 8px rgba(0, 0, 0, 0.5),
202
- inset 0 1px 0 rgba(255,255,255,0.2) !important;
203
- transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
204
- animation: gradientShift 3s ease infinite !important;
205
- position: relative !important;
206
- overflow: hidden !important;
207
- }
208
-
209
- button.primary::before {
210
- content: '';
211
- position: absolute;
212
- top: -50%; left: -60%;
213
- width: 40%; height: 200%;
214
- background: rgba(255,255,255,0.1);
215
- transform: skewX(-20deg);
216
- transition: left 0.6s ease;
217
- }
218
-
219
- button.primary:hover::before {
220
- left: 120%;
221
- }
222
-
223
- button.primary:hover {
224
- transform: translateY(-3px) scale(1.02) !important;
225
- box-shadow:
226
- 0 16px 48px rgba(0, 102, 255, 0.5),
227
- 0 0 30px rgba(0, 212, 255, 0.3),
228
- inset 0 1px 0 rgba(255,255,255,0.3) !important;
229
- }
230
-
231
- button.primary:active {
232
- transform: translateY(0px) scale(0.98) !important;
233
- }
234
-
235
- @keyframes gradientShift {
236
- 0%, 100% { background-position: 0% 50%; }
237
- 50% { background-position: 100% 50%; }
238
- }
239
-
240
- /* ── Output Textboxes — 3D Result Cards ── */
241
- .output-card input, .output-card textarea {
242
- background: linear-gradient(135deg, rgba(0, 212, 255, 0.05), rgba(0, 102, 255, 0.05)) !important;
243
- border: 1px solid rgba(0, 212, 255, 0.2) !important;
244
- border-radius: 14px !important;
245
- font-family: 'Share Tech Mono', monospace !important;
246
- font-size: 16px !important;
247
- font-weight: bold !important;
248
- color: var(--accent-cyan) !important;
249
- text-align: center !important;
250
- box-shadow: inset 0 2px 8px rgba(0,0,0,0.3), 0 0 20px rgba(0, 212, 255, 0.1) !important;
251
- }
252
-
253
- /* ── Table / DataFrame ── */
254
- table {
255
- border-collapse: separate !important;
256
- border-spacing: 0 4px !important;
257
- font-family: 'Share Tech Mono', monospace !important;
258
- font-size: 12px !important;
259
- }
260
-
261
- th {
262
- background: rgba(0, 102, 255, 0.2) !important;
263
- color: var(--accent-cyan) !important;
264
- font-family: 'Rajdhani', sans-serif !important;
265
- letter-spacing: 1.5px !important;
266
- text-transform: uppercase !important;
267
- font-size: 11px !important;
268
- padding: 10px 16px !important;
269
- border: none !important;
270
- }
271
-
272
- td {
273
- background: rgba(13, 20, 37, 0.6) !important;
274
- color: var(--text-secondary) !important;
275
- padding: 8px 16px !important;
276
- border: none !important;
277
- border-top: 1px solid rgba(0, 212, 255, 0.05) !important;
278
- transition: background 0.2s ease !important;
279
- }
280
-
281
- tr:hover td {
282
- background: rgba(0, 212, 255, 0.05) !important;
283
- color: var(--text-primary) !important;
284
- }
285
-
286
- /* ── Markdown ── */
287
- .prose, .markdown {
288
- color: var(--text-secondary) !important;
289
- font-family: 'Exo 2', sans-serif !important;
290
- }
291
-
292
- .prose h1, .markdown h1 {
293
- font-family: 'Rajdhani', sans-serif !important;
294
- font-size: 3rem !important;
295
- font-weight: 700 !important;
296
- letter-spacing: 3px !important;
297
- text-transform: uppercase !important;
298
- background: linear-gradient(135deg, #ffffff 0%, var(--accent-cyan) 40%, var(--accent-blue) 100%) !important;
299
- -webkit-background-clip: text !important;
300
- -webkit-text-fill-color: transparent !important;
301
- background-clip: text !important;
302
- filter: drop-shadow(0 0 30px rgba(0, 212, 255, 0.3)) !important;
303
- margin-bottom: 8px !important;
304
- }
305
-
306
- .prose h2, .markdown h2 {
307
- font-family: 'Rajdhani', sans-serif !important;
308
- font-size: 1.4rem !important;
309
- font-weight: 600 !important;
310
- letter-spacing: 2px !important;
311
- color: var(--accent-cyan) !important;
312
- text-transform: uppercase !important;
313
- border-bottom: 1px solid rgba(0, 212, 255, 0.2) !important;
314
- padding-bottom: 8px !important;
315
- }
316
-
317
- .prose p, .markdown p {
318
- color: var(--text-secondary) !important;
319
- line-height: 1.7 !important;
320
- font-size: 14px !important;
321
- }
322
-
323
- .prose strong, .markdown strong {
324
- color: var(--accent-cyan) !important;
325
- }
326
-
327
- /* ── Code blocks ── */
328
- code, pre {
329
- font-family: 'Share Tech Mono', monospace !important;
330
- background: rgba(0, 212, 255, 0.05) !important;
331
- border: 1px solid rgba(0, 212, 255, 0.15) !important;
332
- border-radius: 8px !important;
333
- color: var(--accent-cyan) !important;
334
- font-size: 12px !important;
335
- }
336
-
337
- /* ── Examples Table ── */
338
- .examples {
339
- background: var(--bg-card) !important;
340
- border: 1px solid rgba(0, 212, 255, 0.1) !important;
341
- border-radius: 14px !important;
342
- overflow: hidden !important;
343
- }
344
-
345
- .examples table th {
346
- background: rgba(0, 102, 255, 0.15) !important;
347
- }
348
-
349
- /* ── File Upload ── */
350
- .gr-file {
351
- background: rgba(5, 8, 16, 0.8) !important;
352
- border: 2px dashed rgba(0, 212, 255, 0.25) !important;
353
- border-radius: 16px !important;
354
- transition: all 0.3s ease !important;
355
- }
356
-
357
- .gr-file:hover {
358
- border-color: var(--accent-cyan) !important;
359
- background: rgba(0, 212, 255, 0.03) !important;
360
- box-shadow: var(--glow-cyan) !important;
361
- }
362
-
363
- /* ── Scrollbar ── */
364
- ::-webkit-scrollbar { width: 6px; height: 6px; }
365
- ::-webkit-scrollbar-track { background: var(--bg-secondary); }
366
- ::-webkit-scrollbar-thumb {
367
- background: linear-gradient(var(--accent-blue), var(--accent-cyan));
368
- border-radius: 3px;
369
- }
370
-
371
- /* ── Pulsing accent line ── */
372
- @keyframes pulse-glow {
373
- 0%, 100% { opacity: 0.4; box-shadow: 0 0 10px rgba(0,212,255,0.3); }
374
- 50% { opacity: 1; box-shadow: 0 0 30px rgba(0,212,255,0.8); }
375
- }
376
-
377
- /* ── Tier badge colors ── */
378
- .tier-regex { color: #00ff88 !important; }
379
- .tier-bert { color: #00d4ff !important; }
380
- .tier-llm { color: #ffd700 !important; }
381
- """
382
-
383
- # ── Functions ───────────────────────────────────────────────────────────────
384
- def classify_single(source: str, log_message: str):
385
- if not log_message.strip():
386
- return "—", "—", "—", "—"
387
- t0 = time.perf_counter()
388
- result = classify_log(source, log_message)
389
- latency_ms = (time.perf_counter() - t0) * 1000
390
- label = result["label"]
391
- tier = result["tier"]
392
- confidence = f"{result['confidence']:.1%}" if result["confidence"] is not None else "N/A"
393
- icon = TIER_COLORS.get(tier, "⚪")
394
- return label, f"{icon} {tier}", confidence, f"{latency_ms:.1f} ms"
395
-
396
-
397
- def classify_batch(file):
398
- if file is None:
399
- return None, "⚠️ Please upload a CSV file."
400
- try:
401
- output_path, df = classify_csv(file.name, "/tmp/classified_output.csv")
402
- except ValueError as e:
403
- return None, f"⚠️ {e}"
404
- except Exception as e:
405
- return None, f"❌ Error: {e}"
406
- total = len(df)
407
- tier_counts = df["tier_used"].value_counts().to_dict()
408
- label_counts = df["predicted_label"].value_counts().to_dict()
409
- tier_lines = "\n".join(f" {TIER_COLORS.get(k,'⚪')} {k}: {v} ({v/total:.0%})" for k, v in tier_counts.items())
410
- label_lines = "\n".join(f" • {k}: {v}" for k, v in label_counts.items())
411
- stats = (
412
- f"✅ Classified {total} logs\n\n"
413
- f"📊 Tier breakdown:\n{tier_lines}\n\n"
414
- f"🏷️ Label distribution:\n{label_lines}"
415
- )
416
- return output_path, stats
417
-
418
-
419
- # ── UI ───────────────────────────────────────────────────────────────────────
420
- with gr.Blocks(
421
- title="LOG CLASSIFICATION SYSTEM",
422
- theme=gr.themes.Base(
423
- primary_hue="blue",
424
- secondary_hue="cyan",
425
- neutral_hue="slate",
426
- font=[gr.themes.GoogleFont("Exo 2"), "sans-serif"],
427
- font_mono=[gr.themes.GoogleFont("Share Tech Mono"), "monospace"],
428
- ).set(
429
- body_background_fill="#050810",
430
- body_text_color="#e2e8f0",
431
- block_background_fill="#0d1425",
432
- block_border_color="rgba(0,212,255,0.15)",
433
- block_label_text_color="#00d4ff",
434
- input_background_fill="#050810",
435
- input_border_color="rgba(0,212,255,0.2)",
436
- button_primary_background_fill="linear-gradient(135deg, #0066ff, #00d4ff)",
437
- button_primary_text_color="#ffffff",
438
- border_color_accent="#00d4ff",
439
- color_accent_soft="rgba(0,212,255,0.1)",
440
- ),
441
- css=CUSTOM_CSS
442
- ) as demo:
443
-
444
- gr.Markdown("""
445
- # 🔍 LOG CLASSIFICATION SYSTEM
446
- **3-tier hybrid pipeline** — 🟢 Regex · 🔵 BERT + ML · 🟡 LLM
447
- *Enterprise-grade log monitoring at production scale*
448
- """)
449
-
450
- with gr.Tabs():
451
-
452
- # ── Tab 1: Single Log ─────────────────────────────────────────────
453
- with gr.Tab("⚡ SINGLE LOG"):
454
- with gr.Row():
455
- with gr.Column(scale=1):
456
- source_input = gr.Dropdown(
457
- choices=SOURCES,
458
- value="ModernCRM",
459
- label="SOURCE SYSTEM",
460
- )
461
- with gr.Column(scale=3):
462
- log_input = gr.Textbox(
463
- label="LOG MESSAGE",
464
- placeholder="Paste a log message here...",
465
- lines=3,
466
- )
467
-
468
- classify_btn = gr.Button("▶ CLASSIFY LOG", variant="primary", size="lg")
469
-
470
- with gr.Row():
471
- label_out = gr.Textbox(label="🏷️ PREDICTED LABEL", interactive=False)
472
- tier_out = gr.Textbox(label="⚙️ TIER USED", interactive=False)
473
- confidence_out = gr.Textbox(label="📈 CONFIDENCE", interactive=False)
474
- latency_out = gr.Textbox(label="⏱️ LATENCY", interactive=False)
475
-
476
- classify_btn.click(
477
- fn=classify_single,
478
- inputs=[source_input, log_input],
479
- outputs=[label_out, tier_out, confidence_out, latency_out],
480
- )
481
-
482
- gr.Examples(
483
- examples=EXAMPLE_LOGS,
484
- inputs=[source_input, log_input],
485
- label="📋 EXAMPLE LOGS — click to try",
486
- )
487
-
488
- # ── Tab 2: Batch CSV ──────────────────────────────────────────────
489
- with gr.Tab("📦 BATCH CSV"):
490
- gr.Markdown("""
491
- ### Bulk Classification
492
- Upload a CSV with columns: **`source`**, **`log_message`**
493
- Output includes: `predicted_label`, `tier_used`, `confidence`, `latency_ms`
494
- """)
495
- with gr.Row():
496
- with gr.Column():
497
- csv_input = gr.File(label="📂 UPLOAD CSV", file_types=[".csv"])
498
- batch_btn = gr.Button("▶ CLASSIFY ALL", variant="primary")
499
- with gr.Column():
500
- csv_output = gr.File(label="📥 DOWNLOAD RESULTS")
501
- stats_out = gr.Textbox(label="📊 STATISTICS", lines=12, interactive=False)
502
-
503
- batch_btn.click(
504
- fn=classify_batch,
505
- inputs=[csv_input],
506
- outputs=[csv_output, stats_out],
507
- )
508
-
509
- gr.Markdown("""
510
- **Sample CSV format:**
511
- ```
512
- source,log_message
513
- ModernCRM,User User123 logged in.
514
- LegacyCRM,Case escalation for ticket ID 7324 failed.
515
- BillingSystem,GET /api/v2/invoice HTTP/1.1 status: 500
516
- ```
517
- """)
518
-
519
- # ── Tab 3: Architecture ───────────────────────────────────────────
520
- with gr.Tab("🏗️ ARCHITECTURE"):
521
- gr.Markdown("""
522
- ## 3-Tier Hybrid Pipeline
523
-
524
- | Tier | Method | Coverage | Latency | Trigger |
525
- |------|--------|----------|---------|---------|
526
- | 🟢 **Regex** | Python `re` patterns | ~21% | < 1ms | Fixed patterns |
527
- | 🔵 **BERT** | `all-MiniLM-L6-v2` + LogReg | ~79% | 20–80ms | High-volume categories |
528
- | 🟡 **LLM** | HuggingFace Inference API | ~0.3% | 500–2000ms | LegacyCRM + rare patterns |
529
-
530
- ## Model Performance
531
- - **Training data**: 2,410 synthetic enterprise logs
532
- - **Confidence threshold**: 0.5 (below → escalate to LLM)
533
- - **Source-aware routing**: `LegacyCRM` → LLM directly
534
-
535
- ## Environment Variables
536
- | Secret | Purpose |
537
- |--------|---------|
538
- | `HF_TOKEN` | LLM inference for LegacyCRM logs |
539
- """)
540
-
541
- if __name__ == "__main__":
542
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hf_space/classify.py DELETED
@@ -1,198 +0,0 @@
1
- """
2
- classify.py — 3-Tier Hybrid Pipeline (V3 — Latency-Tracked)
3
-
4
- Architecture:
5
- LegacyCRM → LLM directly
6
- Others → Regex → BERT (batch) → LLM fallback
7
-
8
- Changes in V3:
9
- - Tier-wise latency tracking (regex_ms, bert_ms, llm_ms)
10
- - Pipeline summary with p50/p95 per tier
11
- - Defensive: LLM timeout + retry baked in via processor_llm
12
- - classify_logs returns richer result dict
13
- """
14
- from __future__ import annotations
15
- import time
16
- import statistics
17
- import pandas as pd
18
- from processor_regex import classify_with_regex
19
- from processor_bert import classify_batch as bert_batch
20
- from processor_llm import classify_with_llm
21
-
22
- LEGACY_SOURCE = "LegacyCRM"
23
-
24
-
25
- # ── Result type ─────────────────────────────────────────────────────────────
26
- def _make_result(label: str, tier: str, confidence, latency_ms: float) -> dict:
27
- return {
28
- "label": label,
29
- "tier": tier,
30
- "confidence": confidence,
31
- "latency_ms": round(latency_ms, 3),
32
- }
33
-
34
-
35
- # ── Single log (backward-compatible) ────────────────────────────────────────
36
- def classify_log(source: str, log_msg: str) -> dict:
37
- """Single log classify karo. Returns label, tier, confidence, latency_ms."""
38
- results = classify_logs([(source, log_msg)])
39
- return results[0]
40
-
41
-
42
- # ── Batch pipeline (main entry point) ───────────────────────────────────────
43
- def classify_logs(logs: list[tuple[str, str]]) -> list[dict]:
44
- """
45
- Batch classify with 3-tier routing + per-result latency.
46
-
47
- Returns list of dicts:
48
- { label, tier, confidence, latency_ms }
49
-
50
- Tier routing:
51
- LegacyCRM source → LLM directly
52
- Regex match → done (sub-ms)
53
- Remainder → BERT batch → LLM if low confidence
54
- """
55
- n = len(logs)
56
- results = [None] * n
57
-
58
- # ── Step 1: Route to groups ─────────────────────────────────────────────
59
- llm_indices = []
60
- bert_indices = []
61
- entry_times = [time.perf_counter()] * n # approximate per-log start
62
-
63
- t_route_start = time.perf_counter()
64
- for i, (source, log_msg) in enumerate(logs):
65
- entry_times[i] = time.perf_counter()
66
- if source == LEGACY_SOURCE:
67
- llm_indices.append(i)
68
- else:
69
- t0 = time.perf_counter()
70
- label = classify_with_regex(log_msg)
71
- t1 = time.perf_counter()
72
- if label:
73
- results[i] = _make_result(label, "Regex", 1.0, (t1 - t0) * 1000)
74
- else:
75
- bert_indices.append(i)
76
-
77
- # ── Step 2: BERT batch ──────────────────────────────────────────────────
78
- if bert_indices:
79
- bert_msgs = [logs[i][1] for i in bert_indices]
80
-
81
- t_bert_start = time.perf_counter()
82
- bert_results = bert_batch(bert_msgs)
83
- t_bert_end = time.perf_counter()
84
-
85
- bert_ms_per_log = (t_bert_end - t_bert_start) * 1000 / len(bert_msgs)
86
-
87
- for idx, (label, conf) in zip(bert_indices, bert_results):
88
- if label != "Unclassified":
89
- results[idx] = _make_result(label, "BERT", conf, bert_ms_per_log)
90
- else:
91
- llm_indices.append(idx)
92
-
93
- # ── Step 3: LLM (LegacyCRM + BERT fallback) ────────────────────────────
94
- for i in llm_indices:
95
- _, log_msg = logs[i]
96
- t0 = time.perf_counter()
97
- label = classify_with_llm(log_msg)
98
- t1 = time.perf_counter()
99
- tier = "LLM" if logs[i][0] == LEGACY_SOURCE else "LLM (fallback)"
100
- results[i] = _make_result(label, tier, None, (t1 - t0) * 1000)
101
-
102
- return results
103
-
104
-
105
- # ── Pipeline summary ─────────────────────────────────────────────────────────
106
- def pipeline_summary(results: list[dict]) -> dict:
107
- """
108
- Aggregate stats from classify_logs output.
109
- Useful for dashboard and benchmark reporting.
110
- """
111
- tier_groups: dict[str, list[float]] = {}
112
- label_counts: dict[str, int] = {}
113
-
114
- for r in results:
115
- tier = r["tier"]
116
- tier_groups.setdefault(tier, []).append(r["latency_ms"])
117
- label_counts[r["label"]] = label_counts.get(r["label"], 0) + 1
118
-
119
- total = len(results)
120
- tier_stats = {}
121
- for tier, latencies in tier_groups.items():
122
- latencies_sorted = sorted(latencies)
123
- n = len(latencies_sorted)
124
- tier_stats[tier] = {
125
- "count": n,
126
- "pct": round(n / total * 100, 1),
127
- "p50_ms": round(statistics.median(latencies_sorted), 2),
128
- "p95_ms": round(latencies_sorted[min(int(n * 0.95), n - 1)], 2),
129
- "p99_ms": round(latencies_sorted[min(int(n * 0.99), n - 1)], 2),
130
- "mean_ms": round(statistics.mean(latencies_sorted), 2),
131
- }
132
-
133
- return {
134
- "total": total,
135
- "tier_stats": tier_stats,
136
- "label_counts": label_counts,
137
- }
138
-
139
-
140
- # ── CSV batch classify ───────────────────────────────────────────────────────
141
- def classify_csv(input_path: str, output_path: str = "output.csv") -> tuple[str, pd.DataFrame]:
142
- """
143
- CSV file classify karo.
144
- Required columns: 'source', 'log_message'
145
- Output: adds 'predicted_label', 'tier_used', 'confidence', 'latency_ms'
146
- """
147
- df = pd.read_csv(input_path)
148
- required = {"source", "log_message"}
149
- if not required.issubset(df.columns):
150
- raise ValueError(f"CSV mein ye columns chahiye: {required}. Mila: {set(df.columns)}")
151
-
152
- log_pairs = list(zip(df["source"], df["log_message"]))
153
- results = classify_logs(log_pairs)
154
-
155
- df["predicted_label"] = [r["label"] for r in results]
156
- df["tier_used"] = [r["tier"] for r in results]
157
- df["latency_ms"] = [r["latency_ms"] for r in results]
158
- df["confidence"] = [
159
- f"{r['confidence']:.1%}" if r["confidence"] is not None else "N/A"
160
- for r in results
161
- ]
162
-
163
- df.to_csv(output_path, index=False)
164
- return output_path, df
165
-
166
-
167
- # Aliases
168
- classify = classify_logs
169
-
170
-
171
- # ── Self-test ────────────────────────────────────────────────────────────────
172
- if __name__ == "__main__":
173
- sample = [
174
- ("ModernCRM", "IP 192.168.133.114 blocked due to potential attack"),
175
- ("BillingSystem", "User User12345 logged in."),
176
- ("AnalyticsEngine", "File data_6957.csv uploaded successfully by user User265."),
177
- ("ModernHR", "GET /v2/servers/detail HTTP/1.1 status: 200 len: 1583 time: 0.19"),
178
- ("ModernHR", "Admin access escalation detected for user 9429"),
179
- ("LegacyCRM", "Case escalation for ticket ID 7324 failed because the assigned support agent is no longer active."),
180
- ("LegacyCRM", "The 'ReportGenerator' module will be retired in version 4.0."),
181
- ]
182
-
183
- print(f'{"Source":<20} {"Tier":<18} {"Conf":>6} {"Lat(ms)":>8} {"Label":<25} Log')
184
- print("─" * 115)
185
- results = classify_logs(sample)
186
- for (source, log), r in zip(sample, results):
187
- conf = f"{r['confidence']:.0%}" if r["confidence"] else " N/A"
188
- print(f'{source:<20} {r["tier"]:<18} {conf:>6} {r["latency_ms"]:>8.1f} {r["label"]:<25} {log[:40]}')
189
-
190
- summary = pipeline_summary(results)
191
- print("\n📊 Pipeline Summary:")
192
- for tier, stats in summary["tier_stats"].items():
193
- print(f" {tier}: {stats['count']} logs ({stats['pct']}%) | "
194
- f"p50={stats['p50_ms']}ms p95={stats['p95_ms']}ms p99={stats['p99_ms']}ms")
195
-
196
- print("\n🏷️ Label distribution:")
197
- for label, count in sorted(summary["label_counts"].items(), key=lambda x: -x[1]):
198
- print(f" • {label}: {count}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hf_space/models/log_classifier.joblib DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bfe9c71b71412797de0d426be2255566dbf6cf87b3f2ae5d2cd1fd69a98d18d
3
- size 23997
 
 
 
 
hf_space/onnx_model/config.json DELETED
@@ -1,24 +0,0 @@
1
- {
2
- "architectures": [
3
- "BertModel"
4
- ],
5
- "attention_probs_dropout_prob": 0.1,
6
- "classifier_dropout": null,
7
- "gradient_checkpointing": false,
8
- "hidden_act": "gelu",
9
- "hidden_dropout_prob": 0.1,
10
- "hidden_size": 384,
11
- "initializer_range": 0.02,
12
- "intermediate_size": 1536,
13
- "layer_norm_eps": 1e-12,
14
- "max_position_embeddings": 512,
15
- "model_type": "bert",
16
- "num_attention_heads": 12,
17
- "num_hidden_layers": 6,
18
- "pad_token_id": 0,
19
- "position_embedding_type": "absolute",
20
- "transformers_version": "4.57.6",
21
- "type_vocab_size": 2,
22
- "use_cache": true,
23
- "vocab_size": 30522
24
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hf_space/onnx_model/special_tokens_map.json DELETED
@@ -1,37 +0,0 @@
1
- {
2
- "cls_token": {
3
- "content": "[CLS]",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "mask_token": {
10
- "content": "[MASK]",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "pad_token": {
17
- "content": "[PAD]",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
- "sep_token": {
24
- "content": "[SEP]",
25
- "lstrip": false,
26
- "normalized": false,
27
- "rstrip": false,
28
- "single_word": false
29
- },
30
- "unk_token": {
31
- "content": "[UNK]",
32
- "lstrip": false,
33
- "normalized": false,
34
- "rstrip": false,
35
- "single_word": false
36
- }
37
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hf_space/onnx_model/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
hf_space/onnx_model/tokenizer_config.json DELETED
@@ -1,65 +0,0 @@
1
- {
2
- "added_tokens_decoder": {
3
- "0": {
4
- "content": "[PAD]",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false,
9
- "special": true
10
- },
11
- "100": {
12
- "content": "[UNK]",
13
- "lstrip": false,
14
- "normalized": false,
15
- "rstrip": false,
16
- "single_word": false,
17
- "special": true
18
- },
19
- "101": {
20
- "content": "[CLS]",
21
- "lstrip": false,
22
- "normalized": false,
23
- "rstrip": false,
24
- "single_word": false,
25
- "special": true
26
- },
27
- "102": {
28
- "content": "[SEP]",
29
- "lstrip": false,
30
- "normalized": false,
31
- "rstrip": false,
32
- "single_word": false,
33
- "special": true
34
- },
35
- "103": {
36
- "content": "[MASK]",
37
- "lstrip": false,
38
- "normalized": false,
39
- "rstrip": false,
40
- "single_word": false,
41
- "special": true
42
- }
43
- },
44
- "clean_up_tokenization_spaces": false,
45
- "cls_token": "[CLS]",
46
- "do_basic_tokenize": true,
47
- "do_lower_case": true,
48
- "extra_special_tokens": {},
49
- "mask_token": "[MASK]",
50
- "max_length": 128,
51
- "model_max_length": 512,
52
- "never_split": null,
53
- "pad_to_multiple_of": null,
54
- "pad_token": "[PAD]",
55
- "pad_token_type_id": 0,
56
- "padding_side": "right",
57
- "sep_token": "[SEP]",
58
- "stride": 0,
59
- "strip_accents": null,
60
- "tokenize_chinese_chars": true,
61
- "tokenizer_class": "BertTokenizer",
62
- "truncation_side": "right",
63
- "truncation_strategy": "longest_first",
64
- "unk_token": "[UNK]"
65
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hf_space/onnx_model/vocab.txt DELETED
The diff for this file is too large to render. See raw diff
 
hf_space/processor_bert.py DELETED
@@ -1,216 +0,0 @@
1
- """
2
- processor_bert_fast.py — ONNX Runtime powered BERT classifier
3
- Speed: 82 logs/s → 2000+ logs/s
4
-
5
- Kaise kaam karta hai:
6
- 1. ONNX Runtime: Normal PyTorch se 3-5x faster
7
- 2. Batch processing: 64 logs ek saath process
8
- 3. Pre-allocated buffers: Memory waste nahi
9
- """
10
- from __future__ import annotations
11
- import os
12
- import numpy as np
13
- import joblib
14
-
15
- # ── Check karo kaunsa method use karna hai ──────────────────
16
- _USE_ONNX = False
17
- _embedding_model = None
18
- _classifier = None
19
- _ort_session = None
20
- _ort_tokenizer = None
21
-
22
- MODEL_PATH = os.path.join(os.path.dirname(__file__), 'models', 'log_classifier.joblib')
23
- ONNX_DIR = os.path.join(os.path.dirname(__file__), 'models', 'onnx')
24
- CONFIDENCE_THRESHOLD = 0.30
25
- DEFAULT_BATCH = 64
26
-
27
-
28
- def _load_models():
29
- """Lazily load models — pehli call pe hi load hoga, baar baar nahi."""
30
- global _USE_ONNX, _embedding_model, _classifier, _ort_session, _ort_tokenizer
31
-
32
- if _classifier is not None:
33
- return # Already loaded
34
-
35
- # ── Classifier load karo ───────────────────────────────
36
- if not os.path.exists(MODEL_PATH):
37
- raise FileNotFoundError(
38
- f'Model nahi mila: {MODEL_PATH}\n'
39
- 'Pehle Colab notebook run karo aur model download karo.'
40
- )
41
- _classifier = joblib.load(MODEL_PATH)
42
-
43
- # ── ONNX try karo (fast), fallback to PyTorch ──────────
44
- onnx_model_file = os.path.join(ONNX_DIR, 'model.onnx')
45
-
46
- if os.path.exists(onnx_model_file):
47
- try:
48
- import onnxruntime as ort
49
- from transformers import AutoTokenizer
50
-
51
- # CPU optimized session options
52
- sess_opts = ort.SessionOptions()
53
- sess_opts.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
54
- sess_opts.intra_op_num_threads = os.cpu_count()
55
- sess_opts.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
56
-
57
- _ort_session = ort.InferenceSession(
58
- onnx_model_file,
59
- sess_options=sess_opts,
60
- providers=['CPUExecutionProvider']
61
- )
62
- _ort_tokenizer = AutoTokenizer.from_pretrained(ONNX_DIR)
63
- _USE_ONNX = True
64
- print('[BERT] ✅ ONNX Runtime loaded — FAST MODE')
65
-
66
- except Exception as e:
67
- print(f'[BERT] ONNX load failed ({e}), fallback to PyTorch')
68
- _USE_ONNX = False
69
-
70
- if not _USE_ONNX:
71
- from sentence_transformers import SentenceTransformer
72
- _embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
73
- print('[BERT] ⚠️ PyTorch mode (install ONNX for 3-5x speedup)')
74
-
75
-
76
- def _embed_onnx(texts: list[str]) -> np.ndarray:
77
- """ONNX Runtime se embeddings generate karo — FAST."""
78
- import torch
79
-
80
- inputs = _ort_tokenizer(
81
- texts,
82
- padding=True,
83
- truncation=True,
84
- max_length=128,
85
- return_tensors='np' # NumPy directly (faster than PyTorch tensors)
86
- )
87
-
88
- # ONNX session run
89
- ort_inputs = {
90
- 'input_ids': inputs['input_ids'].astype(np.int64),
91
- 'attention_mask': inputs['attention_mask'].astype(np.int64),
92
- }
93
- if 'token_type_ids' in [i.name for i in _ort_session.get_inputs()]:
94
- ort_inputs['token_type_ids'] = inputs.get(
95
- 'token_type_ids', np.zeros_like(inputs['input_ids'])
96
- ).astype(np.int64)
97
-
98
- outputs = _ort_session.run(None, ort_inputs)
99
- hidden = outputs[0] # (batch, seq_len, hidden)
100
-
101
- # Mean pooling (attention mask weighted)
102
- mask = inputs['attention_mask'][:, :, None].astype(np.float32)
103
- summed = (hidden * mask).sum(axis=1)
104
- counts = mask.sum(axis=1)
105
- embeddings = summed / counts
106
-
107
- # L2 normalize
108
- norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
109
- return embeddings / (norms + 1e-8)
110
-
111
-
112
- def _embed_pytorch(texts: list[str]) -> np.ndarray:
113
- """PyTorch fallback."""
114
- return _embedding_model.encode(
115
- texts,
116
- batch_size=DEFAULT_BATCH,
117
- convert_to_numpy=True,
118
- normalize_embeddings=True,
119
- show_progress_bar=False
120
- )
121
-
122
-
123
- # ── PUBLIC API ──────────────────────────────────────────────
124
-
125
- def classify_with_bert(log_message: str) -> tuple[str, float]:
126
- """
127
- Single log classify karo.
128
- Returns: (label, confidence)
129
- """
130
- _load_models()
131
- results = classify_batch([log_message])
132
- return results[0]
133
-
134
-
135
- def classify_batch(log_messages: list[str]) -> list[tuple[str, float]]:
136
- """
137
- Multiple logs ek saath classify karo — MUCH FASTER!
138
- Returns: list of (label, confidence) tuples
139
-
140
- Example:
141
- results = classify_batch(['log1', 'log2', 'log3'])
142
- for label, conf in results:
143
- print(f'{label}: {conf:.1%}')
144
- """
145
- _load_models()
146
-
147
- if not log_messages:
148
- return []
149
-
150
- results = []
151
-
152
- # Process in batches
153
- for i in range(0, len(log_messages), DEFAULT_BATCH):
154
- batch = log_messages[i:i + DEFAULT_BATCH]
155
-
156
- # Generate embeddings
157
- if _USE_ONNX:
158
- embeddings = _embed_onnx(batch)
159
- else:
160
- embeddings = _embed_pytorch(batch)
161
-
162
- # Classify
163
- probs = _classifier.predict_proba(embeddings)
164
- max_probs = probs.max(axis=1)
165
- labels = _classifier.predict(embeddings)
166
-
167
- for label, conf in zip(labels, max_probs):
168
- if conf < CONFIDENCE_THRESHOLD:
169
- results.append(('Unclassified', float(conf)))
170
- else:
171
- results.append((str(label), float(conf)))
172
-
173
- return results
174
-
175
-
176
- def get_classes() -> list[str]:
177
- """Classifier ke classes return karo."""
178
- _load_models()
179
- return list(_classifier.classes_)
180
-
181
-
182
- def is_onnx_mode() -> bool:
183
- """Check karo ONNX use ho raha hai ya nahi."""
184
- _load_models()
185
- return _USE_ONNX
186
-
187
-
188
- # ── TEST ────────────────────────────────────────────────────
189
- if __name__ == '__main__':
190
- import time
191
-
192
- test_logs = [
193
- 'GET /v2/servers/detail HTTP/1.1 status: 404 len: 1583 time: 0.19',
194
- 'System crashed due to driver errors when restarting the server',
195
- 'Multiple login failures occurred on user 6454 account',
196
- 'Admin access escalation detected for user 9429',
197
- 'CPU usage at 98% for the last 10 minutes on node-7',
198
- 'Backup completed successfully.',
199
- 'User User123 logged in.',
200
- 'Data replication task for shard 14 did not complete',
201
- 'Hey bro chill ya!', # should be Unclassified
202
- ]
203
-
204
- print('Single log test:')
205
- for log in test_logs:
206
- label, conf = classify_with_bert(log)
207
- print(f' [{conf:.0%}] {label:25s} | {log[:60]}')
208
-
209
- print(f'\nMode: {"ONNX 🚀" if is_onnx_mode() else "PyTorch"}')
210
-
211
- # Speed test
212
- big_batch = test_logs * 100
213
- t0 = time.perf_counter()
214
- classify_batch(big_batch)
215
- elapsed = time.perf_counter() - t0
216
- print(f'\nSpeed: {len(big_batch)/elapsed:.0f} logs/s ({elapsed*1000/len(big_batch):.1f}ms/log)')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hf_space/processor_llm.py DELETED
@@ -1,192 +0,0 @@
1
- """
2
- processor_llm.py — Tier 3: LLM-based Classifier
3
-
4
- Used for:
5
- - LegacyCRM logs (Workflow Error, Deprecation Warning)
6
- - BERT fallback when confidence < threshold
7
-
8
- Production hardening in V3:
9
- - Timeout (configurable, default 5s)
10
- - Retry with exponential backoff (max 2 retries)
11
- - Explicit failure modes: returns "Unclassified" on all error paths
12
- - Caching for repeated log patterns (hash-based, in-memory)
13
- - Token budget enforcement (max_tokens=15)
14
- """
15
- from __future__ import annotations
16
- import os
17
- import re
18
- import time
19
- import hashlib
20
- import logging
21
- from functools import lru_cache
22
- from typing import Optional
23
-
24
- logger = logging.getLogger(__name__)
25
-
26
- # ── Config ─────────────────────────────────────────────────────────────────
27
- HF_TOKEN = os.getenv("HF_TOKEN")
28
- LLM_MODEL = "mistralai/Mistral-7B-Instruct-v0.3"
29
-
30
- VALID_CATEGORIES = ["Workflow Error", "Deprecation Warning"]
31
-
32
- # Retry / timeout config
33
- MAX_RETRIES = 2
34
- RETRY_DELAY_SEC = 1.0 # doubles on each retry (exponential backoff)
35
- REQUEST_TIMEOUT = 5 # seconds — fail fast, do not hang pipeline
36
-
37
- # In-memory cache to avoid redundant LLM calls for repeated logs
38
- _RESPONSE_CACHE: dict[str, str] = {}
39
- MAX_CACHE_SIZE = 1000 # evict oldest when full (simple FIFO)
40
-
41
- SYSTEM_PROMPT = (
42
- "You are an enterprise log classifier. "
43
- "Classify log messages into exactly one category. "
44
- "Return ONLY the category name — no explanation, no punctuation."
45
- )
46
-
47
- FEW_SHOT_EXAMPLES = [
48
- {
49
- "log": "Case escalation for ticket ID 7324 failed because the assigned support agent is no longer active.",
50
- "label": "Workflow Error",
51
- },
52
- {
53
- "log": "The 'BulkEmailSender' feature is no longer supported. Use 'EmailCampaignManager' instead.",
54
- "label": "Deprecation Warning",
55
- },
56
- {
57
- "log": "Invoice generation aborted for order ID 8910 due to invalid tax calculation module.",
58
- "label": "Workflow Error",
59
- },
60
- ]
61
-
62
-
63
- # ── Cache helpers ────────────────────────────────────────────────────────────
64
- def _cache_key(log_msg: str) -> str:
65
- return hashlib.md5(log_msg.strip().encode()).hexdigest()
66
-
67
-
68
- def _cache_get(log_msg: str) -> Optional[str]:
69
- return _RESPONSE_CACHE.get(_cache_key(log_msg))
70
-
71
-
72
- def _cache_set(log_msg: str, label: str) -> None:
73
- key = _cache_key(log_msg)
74
- if len(_RESPONSE_CACHE) >= MAX_CACHE_SIZE:
75
- # Evict oldest (first inserted) key
76
- oldest = next(iter(_RESPONSE_CACHE))
77
- del _RESPONSE_CACHE[oldest]
78
- _RESPONSE_CACHE[key] = label
79
-
80
-
81
- def get_cache_stats() -> dict:
82
- return {"size": len(_RESPONSE_CACHE), "max_size": MAX_CACHE_SIZE}
83
-
84
-
85
- # ── Prompt builder ───────────────────────────────────────────────────────────
86
- def _build_messages(log_msg: str) -> list[dict]:
87
- categories_str = ", ".join(f'"{c}"' for c in VALID_CATEGORIES)
88
- user_content = (
89
- f'Classify the following log into one of these categories: {categories_str}.\n'
90
- 'If none fits, return "Unclassified".\n\n'
91
- )
92
- for ex in FEW_SHOT_EXAMPLES:
93
- user_content += f'Log: {ex["log"]}\nCategory: {ex["label"]}\n\n'
94
- user_content += f"Log: {log_msg}\nCategory:"
95
-
96
- return [
97
- {"role": "system", "content": SYSTEM_PROMPT},
98
- {"role": "user", "content": user_content},
99
- ]
100
-
101
-
102
- # ── Normalize raw LLM output ─────────────────────────────────────────────────
103
- def _normalize(raw: str) -> str:
104
- """Map raw LLM output to a valid category or 'Unclassified'."""
105
- raw = raw.strip().strip('"').strip("'")
106
- for cat in VALID_CATEGORIES:
107
- if cat.lower() in raw.lower():
108
- return cat
109
- return "Unclassified"
110
-
111
-
112
- # ── Main classify function ────────────────────────────────────────────────────
113
- def classify_with_llm(log_msg: str) -> str:
114
- """
115
- Tier 3 LLM classifier with:
116
- - In-memory cache (avoids duplicate API calls)
117
- - Timeout (REQUEST_TIMEOUT seconds)
118
- - Retry with exponential backoff (MAX_RETRIES attempts)
119
- - Explicit fallback to "Unclassified" on all error paths
120
-
121
- Latency: 500–2000ms on cache miss; ~0ms on cache hit.
122
- """
123
- # ── Cache hit ────────────────────────────────────────────────────────────
124
- cached = _cache_get(log_msg)
125
- if cached is not None:
126
- logger.debug(f"[LLM] Cache hit for: {log_msg[:60]}")
127
- return cached
128
-
129
- # ── Inference with retry ─────────────────────────────────────────────────
130
- if not HF_TOKEN:
131
- logger.warning("[LLM] HF_TOKEN not set — returning Unclassified")
132
- return "Unclassified"
133
-
134
- from huggingface_hub import InferenceClient
135
-
136
- client = InferenceClient(token=HF_TOKEN, timeout=REQUEST_TIMEOUT)
137
- delay = RETRY_DELAY_SEC
138
- last_err: Optional[Exception] = None
139
-
140
- for attempt in range(1, MAX_RETRIES + 2): # +2: initial + MAX_RETRIES
141
- try:
142
- response = client.chat.completions.create(
143
- model=LLM_MODEL,
144
- messages=_build_messages(log_msg),
145
- max_tokens=15,
146
- temperature=0.1,
147
- )
148
- raw = response.choices[0].message.content
149
- label = _normalize(raw)
150
-
151
- _cache_set(log_msg, label)
152
- logger.debug(f"[LLM] Attempt {attempt}: '{raw.strip()}' → '{label}'")
153
- return label
154
-
155
- except Exception as e:
156
- last_err = e
157
- if attempt <= MAX_RETRIES:
158
- logger.warning(f"[LLM] Attempt {attempt} failed ({e}), retrying in {delay:.1f}s…")
159
- time.sleep(delay)
160
- delay *= 2 # exponential backoff
161
- else:
162
- logger.error(f"[LLM] All {MAX_RETRIES + 1} attempts failed. Last error: {e}")
163
-
164
- return "Unclassified"
165
-
166
-
167
- # ── Batch classify (serial — LLM is already rate-limited) ────────────────────
168
- def classify_batch_llm(log_msgs: list[str]) -> list[str]:
169
- """Classify multiple logs through LLM. Each call is sequential to respect rate limits."""
170
- return [classify_with_llm(msg) for msg in log_msgs]
171
-
172
-
173
- # ── CLI test ─────────────────────────────────────────────────────────────────
174
- if __name__ == "__main__":
175
- logging.basicConfig(level=logging.INFO)
176
-
177
- test_logs = [
178
- "Case escalation for ticket ID 7324 failed because the assigned support agent is no longer active.",
179
- "The 'ReportGenerator' module will be retired in version 4.0. Migrate to 'AdvancedAnalyticsSuite'.",
180
- "System reboot initiated by user 12345.", # should be Unclassified
181
- ]
182
- for log in test_logs:
183
- result = classify_with_llm(log)
184
- print(f"{result:25s} | {log[:80]}")
185
-
186
- # Cache hit test
187
- print("\n── Cache hit test ──")
188
- t0 = time.perf_counter()
189
- classify_with_llm(test_logs[0])
190
- t1 = time.perf_counter()
191
- print(f"Cache hit latency: {(t1-t0)*1000:.2f}ms")
192
- print(f"Cache stats: {get_cache_stats()}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hf_space/processor_regex.py DELETED
@@ -1,220 +0,0 @@
1
- """
2
- processor_regex.py — Tier 1: Rule-based Classifier
3
-
4
- Target coverage: 40%+ (up from 15%)
5
- Latency: sub-millisecond per log
6
-
7
- New pattern groups added:
8
- - HTTP request/response logs (was completely missing!)
9
- - Auth / credential events (login failures, MFA, lockouts)
10
- - System/infra events (disk, CPU, memory, cron)
11
- - Network / firewall events (IP block, port scan)
12
- - Structured error codes (ERROR, CRITICAL prefix logs)
13
- """
14
- from __future__ import annotations
15
- import re
16
- import time
17
- from typing import Optional
18
-
19
- # ---------------------------------------------------------------------------
20
- # Pattern registry: (compiled_pattern, label)
21
- # Order matters — more specific patterns FIRST to avoid mis-labeling.
22
- # ---------------------------------------------------------------------------
23
- _RAW_PATTERNS: list[tuple[str, str]] = [
24
-
25
- # ── HTTP Status ─────────────────────────────────────────────────────────
26
- # Covers: GET/POST/PUT/DELETE/PATCH + status code in request line
27
- (r"\b(GET|POST|PUT|DELETE|PATCH|HEAD|OPTIONS)\s+\S+\s+HTTP/\d", "HTTP Status"),
28
- # Nova / OpenStack style
29
- (r"nova\.\S+\s+(GET|POST|PUT|DELETE)\s+\S+\s+HTTP/\d", "HTTP Status"),
30
- # Status code only style: "returned HTTP 200" or "status: 404"
31
- (r"\bstatus[:\s]+\d{3}\b", "HTTP Status"),
32
- (r"\breturned\s+HTTP\s+\d{3}\b", "HTTP Status"),
33
- (r"\bHTTP\s+status\s+code\s*[:-]?\s*\d{3}\b", "HTTP Status"),
34
- # API response style
35
- (r"\bAPI\s+(call|request)\s+\S+\s+completed\s+with\s+status\s+\d{3}", "HTTP Status"),
36
- (r"\bEndpoint\s+\S+\s+responded\s+with\s+code\s+\d{3}", "HTTP Status"),
37
-
38
- # ── Security Alert ──────────────────────────────────────────────────────
39
- # Brute force / login failures
40
- (r"(multiple\s+)?(bad\s+|failed?\s+)?login\s+(failure|attempt|failures)", "Security Alert"),
41
- (r"brute[\s_-]force\s+(login|attack|attempt)", "Security Alert"),
42
- # Unauthorized access
43
- (r"unauthorized\s+(access|admin|privilege|attempt)", "Security Alert"),
44
- (r"access\s+denied\s+(for|to)\s+(user|ip|host)", "Security Alert"),
45
- # Privilege escalation
46
- (r"(admin\s+)?access\s+escalation\s+detected", "Security Alert"),
47
- (r"privilege\s+(elev|escalat)", "Security Alert"),
48
- # IP blocking / suspicious traffic
49
- (r"IP\s+\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\s+blocked", "Security Alert"),
50
- (r"(suspicious|anomalous)\s+(login|traffic|activity|request)", "Security Alert"),
51
- (r"potential\s+(DDoS|attack|breach|intrusion)", "Security Alert"),
52
- (r"security\s+breach\s+suspected", "Security Alert"),
53
- (r"(API\s+security\s+breach|bypass\s+API\s+security)", "Security Alert"),
54
- (r"port\s+scan\s+(detected|attempt)", "Security Alert"),
55
-
56
- # ── User Action ─────────────────────────────────────────────────────────
57
- (r"User\s+\w+\d*\s+logged\s+(in|out)", "User Action"),
58
- (r"Account\s+(with\s+)?ID\s+\S+\s+created\s+by", "User Action"),
59
- (r"User\s+\w+\d*\s+(updated\s+profile|changed\s+password|enabled\s+two|downloaded|exported)", "User Action"),
60
- (r"(New\s+user|user\s+\w+\d*)\s+registered", "User Action"),
61
- (r"Account\s+\S+\s+deleted\s+by\s+(administrator|admin)", "User Action"),
62
- (r"User\s+\w+\d*\s+(tried|attempted)", "User Action"),
63
-
64
- # ── System Notification ─────────────────────────────────────────────────
65
- # Backup events
66
- (r"Backup\s+(started|ended|completed\s+successfully|failed|aborted)", "System Notification"),
67
- (r"System\s+updated\s+to\s+version", "System Notification"),
68
- (r"File\s+\S+\s+uploaded\s+successfully\s+by\s+user", "System Notification"),
69
- (r"Disk\s+cleanup\s+completed\s+successfully", "System Notification"),
70
- (r"System\s+reboot\s+initiated\s+by\s+user", "System Notification"),
71
- (r"Scheduled\s+maintenance\s+(started|completed)", "System Notification"),
72
- (r"Service\s+\w+\s+restarted\s+successfully", "System Notification"),
73
- # NEW: cache, cron, health check, cert, log rotation
74
- (r"Cache\s+cleared\s+successfully", "System Notification"),
75
- (r"Log\s+rotation\s+completed", "System Notification"),
76
- (r"Health\s+check\s+(passed|failed)\s+for\s+service", "System Notification"),
77
- (r"Certificate\s+(renewed|expired|revoked)\s+successfully", "System Notification"),
78
- (r"Cron\s+job\s+\S+\s+(executed|failed|completed)\s+successfully", "System Notification"),
79
- (r"(Disk|Storage)\s+(usage|space)\s+(at|reached|exceeded)\s+\d+%", "System Notification"),
80
- (r"CPU\s+usage\s+at\s+\d+%", "System Notification"),
81
- (r"Memory\s+(usage|limit)\s+(at|reached|exceeded)\s+\d+%", "System Notification"),
82
- # Deployment / config
83
- (r"Deployment\s+(of|for)\s+\S+\s+(completed|failed|started)", "System Notification"),
84
- (r"Configuration\s+(reloaded|updated|applied)\s+successfully", "System Notification"),
85
-
86
- # ── Error ───────────────────────────────────────────────────────────────
87
- (r"\bERROR\b.*\b(exception|failed|failure|crash|timeout|unavailable)\b", "Error"),
88
- (r"System\s+crashed\s+due\s+to", "Error"),
89
- (r"(connection|request|task|job)\s+(timed?\s*out|timeout)", "Error"),
90
- (r"service\s+\S+\s+(is\s+down|unavailable|unreachable)", "Error"),
91
- (r"database\s+connection\s+(failed|refused|lost|dropped)", "Error"),
92
- (r"disk\s+(I/O\s+)?failure", "Error"),
93
- (r"driver\s+error(s)?\s+(when|during|on)", "Error"),
94
- (r"(replication|sync)\s+task\s+(did\s+not\s+complete|failed)", "Error"),
95
- (r"null\s+pointer|segmentation\s+fault|stack\s+overflow", "Error"),
96
-
97
- # ── Critical Error ──────────────────────────────────────────────────────
98
- (r"\bCRITICAL\b", "Critical Error"),
99
- (r"(FATAL|PANIC)\b", "Critical Error"),
100
- (r"(data\s+loss|data\s+corruption)\s+(detected|occurred)", "Critical Error"),
101
- (r"(cluster|node|shard)\s+(failure|crashed|went\s+down)", "Critical Error"),
102
- (r"(catastrophic|unrecoverable)\s+(failure|error)", "Critical Error"),
103
- (r"kernel\s+panic", "Critical Error"),
104
- (r"out[\s-]of[\s-](memory|disk)\s+(error|killed|OOM)", "Critical Error"),
105
- ]
106
-
107
- # Pre-compile all patterns at import time (not per-call)
108
- REGEX_PATTERNS: list[tuple[re.Pattern, str]] = [
109
- (re.compile(pat, re.IGNORECASE), label)
110
- for pat, label in _RAW_PATTERNS
111
- ]
112
-
113
-
114
- def classify_with_regex(log_message: str) -> Optional[str]:
115
- """
116
- Tier 1: Rule-based classifier.
117
- Returns category label, or None if no pattern matches.
118
- Latency: sub-millisecond (patterns pre-compiled at import).
119
- """
120
- for pattern, label in REGEX_PATTERNS:
121
- if pattern.search(log_message):
122
- return label
123
- return None
124
-
125
-
126
- def get_regex_coverage(log_messages: list[str]) -> dict:
127
- """Measure regex tier coverage and per-label breakdown."""
128
- label_counts: dict[str, int] = {}
129
- missed = 0
130
-
131
- for msg in log_messages:
132
- label = classify_with_regex(msg)
133
- if label:
134
- label_counts[label] = label_counts.get(label, 0) + 1
135
- else:
136
- missed += 1
137
-
138
- total = len(log_messages)
139
- matched = total - missed
140
-
141
- return {
142
- "total": total,
143
- "matched": matched,
144
- "missed": missed,
145
- "coverage_pct": round(matched / total * 100, 2) if total else 0.0,
146
- "label_breakdown": label_counts,
147
- }
148
-
149
-
150
- def benchmark_regex(log_messages: list[str], runs: int = 3) -> dict:
151
- """Measure regex tier latency (p50 / p95 / p99) over multiple runs."""
152
- import statistics
153
- per_log_ms: list[float] = []
154
-
155
- for _ in range(runs):
156
- for msg in log_messages:
157
- t0 = time.perf_counter()
158
- classify_with_regex(msg)
159
- per_log_ms.append((time.perf_counter() - t0) * 1000)
160
-
161
- per_log_ms.sort()
162
- return {
163
- "p50_ms": round(statistics.median(per_log_ms), 4),
164
- "p95_ms": round(per_log_ms[int(len(per_log_ms) * 0.95)], 4),
165
- "p99_ms": round(per_log_ms[int(len(per_log_ms) * 0.99)], 4),
166
- "mean_ms": round(statistics.mean(per_log_ms), 4),
167
- }
168
-
169
-
170
- # ── CLI self-test ────────────────────────────────────────────────────────────
171
- if __name__ == "__main__":
172
- test_cases: list[tuple[str, str]] = [
173
- # HTTP
174
- ("GET /api/v2/resource HTTP/1.1 status: 200 len: 1583 time: 0.19", "HTTP Status"),
175
- ("POST /v1/users HTTP/1.1 status: 201 len: 42 time: 0.05", "HTTP Status"),
176
- ("nova.osapi_compute.wsgi.server GET /v2/servers/detail HTTP/1.1 status: 404", "HTTP Status"),
177
- # Security
178
- ("Multiple login failures occurred on user 6454 account", "Security Alert"),
179
- ("IP 192.168.133.114 blocked due to potential attack", "Security Alert"),
180
- ("Brute force login attempt from 10.0.0.5 detected", "Security Alert"),
181
- ("Admin access escalation detected for user 9429", "Security Alert"),
182
- # User Action
183
- ("User User12345 logged in.", "User Action"),
184
- ("Account with ID 456 created by Admin.", "User Action"),
185
- # System Notification
186
- ("Backup completed successfully.", "System Notification"),
187
- ("CPU usage at 98% for the last 10 minutes on node-7", "System Notification"),
188
- ("Health check passed for service payments-api", "System Notification"),
189
- # Error
190
- ("System crashed due to disk I/O failure on node-3", "Error"),
191
- ("Database connection failed after 3 retries", "Error"),
192
- # Critical
193
- ("CRITICAL: data corruption detected on shard-14", "Critical Error"),
194
- ("kernel panic: not syncing: VFS: unable to mount root fs", "Critical Error"),
195
- # Should be None (unmatched)
196
- ("The 'BulkEmailSender' feature will be deprecated in v5.0.", None),
197
- ("Case escalation for ticket 7324 failed.", None),
198
- ]
199
-
200
- correct = 0
201
- print(f"{'Expected':<22} {'Got':<22} {'✓/✗'} | Log")
202
- print("─" * 100)
203
- for log, expected in test_cases:
204
- got = classify_with_regex(log)
205
- ok = got == expected
206
- correct += ok
207
- icon = "✓" if ok else "✗"
208
- print(f"{str(expected):<22} {str(got):<22} {icon} | {log[:55]}")
209
-
210
- print(f"\n{correct}/{len(test_cases)} correct")
211
-
212
- # Coverage demo
213
- all_logs = [log for log, _ in test_cases]
214
- cov = get_regex_coverage(all_logs)
215
- print(f"\nCoverage: {cov['coverage_pct']}% ({cov['matched']}/{cov['total']} matched)")
216
- print("Label breakdown:", cov["label_breakdown"])
217
-
218
- # Latency benchmark
219
- lat = benchmark_regex(all_logs * 100)
220
- print(f"\nLatency (p50/p95/p99): {lat['p50_ms']}ms / {lat['p95_ms']}ms / {lat['p99_ms']}ms")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hf_space/requirements.txt DELETED
@@ -1,25 +0,0 @@
1
- # Core
2
- gradio>=4.44.0
3
- pandas>=2.0.0
4
- numpy>=1.24.0
5
- joblib>=1.3.0
6
- scikit-learn>=1.3.0
7
-
8
- # Embedding + BERT
9
- sentence-transformers>=2.7.0
10
- transformers>=4.38.0
11
-
12
- # ONNX (optional, 3-5x speedup)
13
- onnxruntime>=1.17.0
14
- optimum[onnxruntime]>=1.16.0
15
-
16
- # LLM
17
- huggingface-hub>=0.21.0
18
-
19
- # FastAPI (production API)
20
- fastapi>=0.110.0
21
- uvicorn[standard]>=0.29.0
22
- pydantic>=2.0.0
23
-
24
- # Observability
25
- psutil>=5.9.0