NOT-OMEGA commited on
Commit
9f35272
·
verified ·
1 Parent(s): 8e569c7

Update processor_bert.py

Browse files
Files changed (1) hide show
  1. processor_bert.py +1 -214
processor_bert.py CHANGED
@@ -1,18 +1,3 @@
1
- """
2
- processor_bert_fast.py — ONNX Runtime powered BERT classifier
3
- Speed: 82 logs/s → 3200+ logs/s
4
-
5
- How it works:
6
- 1. ONNX Runtime: 3-5x faster than standard PyTorch
7
- 2. Batch processing: 64 logs processed concurrently
8
- 3. Pre-allocated buffers: Zero memory waste
9
- """
10
- from __future__ import annotations
11
- import os
12
- import threading
13
- import numpy as np
14
- import joblib
15
-
16
  # ── Configuration & State ──────────────────────────────────────────────
17
  _USE_ONNX = False
18
  _embedding_model = None
@@ -21,202 +6,4 @@ _ort_session = None
21
  _ort_tokenizer = None
22
  _model_ready = False
23
  _load_lock = threading.Lock()
24
-
25
- MODEL_PATH = os.path.join(os.path.dirname(__file__), 'models', 'log_classifier.joblib')
26
- ONNX_DIR = os.path.join(os.path.dirname(__file__), 'models', 'onnx')
27
- CONFIDENCE_THRESHOLD = 0.30
28
- DEFAULT_BATCH = 512
29
-
30
-
31
- def preload_models():
32
- """Lazily load models — thread-safe, strict single initialization."""
33
- global _USE_ONNX, _embedding_model, _classifier, _ort_session, _ort_tokenizer, _model_ready
34
-
35
- # 🚨 GOOGLE-LEVEL FIX: Everything critical must be INSIDE the lock
36
- with _load_lock:
37
- if _classifier is not None:
38
- return # Already loaded
39
-
40
- print("Initializing BERT pipeline...")
41
-
42
- # ── Load Classifier ────────────────────────────────────────────
43
- if not os.path.exists(MODEL_PATH):
44
- raise FileNotFoundError(
45
- f'Model not found: {MODEL_PATH}\n'
46
- 'Please run the training notebook and download the model first.'
47
- )
48
- _classifier = joblib.load(MODEL_PATH)
49
-
50
- # ── Try ONNX (Fast Mode), Fallback to PyTorch ──────────────────
51
- onnx_model_file = os.path.join(ONNX_DIR, 'model.onnx')
52
-
53
- if os.path.exists(onnx_model_file):
54
- try:
55
- import onnxruntime as ort
56
- from transformers import AutoTokenizer
57
-
58
- # CPU optimized session options
59
- sess_opts = ort.SessionOptions()
60
- sess_opts.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
61
- sess_opts.intra_op_num_threads = os.cpu_count() or 1
62
- sess_opts.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
63
-
64
- _ort_session = ort.InferenceSession(
65
- onnx_model_file,
66
- sess_options=sess_opts,
67
- providers=['CPUExecutionProvider']
68
- )
69
- _ort_tokenizer = AutoTokenizer.from_pretrained(ONNX_DIR)
70
- _USE_ONNX = True
71
- print('[BERT] ✅ ONNX Runtime loaded — FAST MODE')
72
-
73
- except Exception as e:
74
- print(f'[BERT] ONNX load failed ({e}), fallback to PyTorch')
75
- _USE_ONNX = False
76
-
77
- if not _USE_ONNX:
78
- from sentence_transformers import SentenceTransformer
79
- _embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
80
- print('[BERT] ⚠️ PyTorch mode active (install ONNX for 3-5x speedup)')
81
-
82
- _model_ready = True
83
- print('[BERT] ✅ Models ready!')
84
-
85
- # Map legacy function name to new one for backward compatibility
86
- _load_models = preload_models
87
-
88
-
89
- def _embed_onnx(texts: list[str]) -> np.ndarray:
90
- """Generate embeddings using ONNX Runtime — FAST."""
91
- inputs = _ort_tokenizer(
92
- texts,
93
- padding=True,
94
- truncation=True,
95
- max_length=128,
96
- return_tensors='np' # NumPy directly (faster than PyTorch tensors)
97
- )
98
-
99
- # ONNX session run
100
- ort_inputs = {
101
- 'input_ids': inputs['input_ids'].astype(np.int64),
102
- 'attention_mask': inputs['attention_mask'].astype(np.int64),
103
- }
104
- if 'token_type_ids' in [i.name for i in _ort_session.get_inputs()]:
105
- ort_inputs['token_type_ids'] = inputs.get(
106
- 'token_type_ids', np.zeros_like(inputs['input_ids'])
107
- ).astype(np.int64)
108
-
109
- outputs = _ort_session.run(None, ort_inputs)
110
- hidden = outputs[0] # (batch, seq_len, hidden)
111
-
112
- # Mean pooling (attention mask weighted)
113
- mask = inputs['attention_mask'][:, :, None].astype(np.float32)
114
- summed = (hidden * mask).sum(axis=1)
115
- counts = mask.sum(axis=1)
116
- embeddings = summed / counts
117
-
118
- # L2 normalize
119
- norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
120
- return embeddings / (norms + 1e-8)
121
-
122
-
123
- def _embed_pytorch(texts: list[str]) -> np.ndarray:
124
- """PyTorch fallback for embeddings."""
125
- return _embedding_model.encode(
126
- texts,
127
- batch_size=DEFAULT_BATCH,
128
- convert_to_numpy=True,
129
- normalize_embeddings=True,
130
- show_progress_bar=False
131
- )
132
-
133
-
134
- # ── PUBLIC API ──────────────────────────────────────────────
135
-
136
- def classify_with_bert(log_message: str) -> tuple[str, float]:
137
- """
138
- Classify a single log.
139
- Returns: (label, confidence)
140
- """
141
- preload_models()
142
- results = classify_batch([log_message])
143
- return results[0]
144
-
145
-
146
- def classify_batch(log_messages: list[str]) -> list[tuple[str, float]]:
147
- """
148
- Classify multiple logs concurrently.
149
- Returns: list of (label, confidence) tuples
150
- """
151
- preload_models()
152
-
153
- if not log_messages:
154
- return []
155
-
156
- results = []
157
-
158
- # Process in batches
159
- for i in range(0, len(log_messages), DEFAULT_BATCH):
160
- batch = log_messages[i:i + DEFAULT_BATCH]
161
-
162
- # Generate embeddings
163
- if _USE_ONNX:
164
- embeddings = _embed_onnx(batch)
165
- else:
166
- embeddings = _embed_pytorch(batch)
167
-
168
- # Classify
169
- probs = _classifier.predict_proba(embeddings)
170
- max_probs = probs.max(axis=1)
171
- labels = _classifier.predict(embeddings)
172
-
173
- for label, conf in zip(labels, max_probs):
174
- if conf < CONFIDENCE_THRESHOLD:
175
- results.append(('Unclassified', float(conf)))
176
- else:
177
- results.append((str(label), float(conf)))
178
-
179
- return results
180
-
181
-
182
- def get_classes() -> list[str]:
183
- """Return the list of classes from the classifier."""
184
- preload_models()
185
- return list(_classifier.classes_)
186
-
187
-
188
- def is_onnx_mode() -> bool:
189
- """Check if ONNX execution provider is active."""
190
- preload_models()
191
- return _USE_ONNX
192
-
193
-
194
- # ── TEST ────────────────────────────────────────────────────
195
- if __name__ == '__main__':
196
- import time
197
-
198
- test_logs = [
199
- 'GET /v2/servers/detail HTTP/1.1 status: 404 len: 1583 time: 0.19',
200
- 'System crashed due to driver errors when restarting the server',
201
- 'Multiple login failures occurred on user 6454 account',
202
- 'Admin access escalation detected for user 9429',
203
- 'CPU usage at 98% for the last 10 minutes on node-7',
204
- 'Backup completed successfully.',
205
- 'User User123 logged in.',
206
- 'Data replication task for shard 14 did not complete',
207
- 'Hey bro chill ya!', # should be Unclassified
208
- ]
209
-
210
- print('Single log test:')
211
- for log in test_logs:
212
- label, conf = classify_with_bert(log)
213
- print(f' [{conf:.0%}] {label:25s} | {log[:60]}')
214
-
215
- print(f'\nMode: {"ONNX 🚀" if is_onnx_mode() else "PyTorch"}')
216
-
217
- # Speed test
218
- big_batch = test_logs * 100
219
- t0 = time.perf_counter()
220
- classify_batch(big_batch)
221
- elapsed = time.perf_counter() - t0
222
- print(f'\nSpeed: {len(big_batch)/elapsed:.0f} logs/s ({elapsed*1000/len(big_batch):.1f}ms/log)')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # ── Configuration & State ──────────────────────────────────────────────
2
  _USE_ONNX = False
3
  _embedding_model = None
 
6
  _ort_tokenizer = None
7
  _model_ready = False
8
  _load_lock = threading.Lock()
9
+ _pytorch_lock = threading.Lock() # FIX: Added lock for thread-safe fallback inference