File size: 7,968 Bytes
2222383
 
 
 
 
 
 
 
 
 
32a28a7
2222383
f17175f
2222383
 
 
 
 
 
 
 
 
f17175f
 
2222383
 
 
 
 
 
 
f17175f
c96bf7e
 
 
 
 
a075d7d
c96bf7e
 
 
 
 
 
 
 
a075d7d
2222383
f17175f
 
2222383
f17175f
 
 
2222383
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f17175f
 
 
2222383
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
"""
processor_bert_fast.py β€” ONNX Runtime powered BERT classifier
Speed: 82 logs/s β†’ 2000+ logs/s

Kaise kaam karta hai:
1. ONNX Runtime: Normal PyTorch se 3-5x faster
2. Batch processing: 64 logs ek saath process
3. Pre-allocated buffers: Memory waste nahi
"""
from __future__ import annotations
from transformers import pipeline
import os
import threading
import numpy as np
import joblib

# ── Check karo kaunsa method use karna hai ──────────────────
_USE_ONNX = False
_embedding_model = None
_classifier       = None
_ort_session      = None
_ort_tokenizer    = None
_model_ready      = False
_load_lock        = threading.Lock()

MODEL_PATH    = os.path.join(os.path.dirname(__file__), 'models', 'log_classifier.joblib')
ONNX_DIR      = os.path.join(os.path.dirname(__file__), 'models', 'onnx')
CONFIDENCE_THRESHOLD = 0.30
DEFAULT_BATCH = 64


def preload_models():
    """Lazily load models β€” thread-safe, strict single initialization."""
    global _classifier, _model_ready
    
    with _load_lock:
        if _classifier is None:
            print("Initializing BERT pipeline...") # Yahan change kiya
            
            _classifier = pipeline(
                task="text-classification",
                model=MODEL_NAME,
                device=-1, # CPU
                top_k=1
            )
            _model_ready = True
            print("BERT pipeline ready.") # Yahan change kiya
def _load_models():
    """Lazily load models β€” thread-safe, sirf ek baar load hoga."""
    global _USE_ONNX, _embedding_model, _classifier, _ort_session, _ort_tokenizer, _model_ready

    with _load_lock:
        if _classifier is not None:
            return  # Already loaded

    # ── Classifier load karo ───────────────────────────────
    if not os.path.exists(MODEL_PATH):
        raise FileNotFoundError(
            f'Model nahi mila: {MODEL_PATH}\n'
            'Pehle Colab notebook run karo aur model download karo.'
        )
    _classifier = joblib.load(MODEL_PATH)

    # ── ONNX try karo (fast), fallback to PyTorch ──────────
    onnx_model_file = os.path.join(ONNX_DIR, 'model.onnx')

    if os.path.exists(onnx_model_file):
        try:
            import onnxruntime as ort
            from transformers import AutoTokenizer

            # CPU optimized session options
            sess_opts = ort.SessionOptions()
            sess_opts.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
            sess_opts.intra_op_num_threads = os.cpu_count()
            sess_opts.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL

            _ort_session = ort.InferenceSession(
                onnx_model_file,
                sess_options=sess_opts,
                providers=['CPUExecutionProvider']
            )
            _ort_tokenizer = AutoTokenizer.from_pretrained(ONNX_DIR)
            _USE_ONNX = True
            print('[BERT] βœ… ONNX Runtime loaded β€” FAST MODE')

        except Exception as e:
            print(f'[BERT] ONNX load failed ({e}), fallback to PyTorch')
            _USE_ONNX = False

    if not _USE_ONNX:
        from sentence_transformers import SentenceTransformer
        _embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
        print('[BERT] ⚠️  PyTorch mode (install ONNX for 3-5x speedup)')

    _model_ready = True
    print('[BERT] βœ… Models ready!')


def _embed_onnx(texts: list[str]) -> np.ndarray:
    """ONNX Runtime se embeddings generate karo β€” FAST."""
    import torch

    inputs = _ort_tokenizer(
        texts,
        padding=True,
        truncation=True,
        max_length=128,
        return_tensors='np'  # NumPy directly (faster than PyTorch tensors)
    )

    # ONNX session run
    ort_inputs = {
        'input_ids':      inputs['input_ids'].astype(np.int64),
        'attention_mask': inputs['attention_mask'].astype(np.int64),
    }
    if 'token_type_ids' in [i.name for i in _ort_session.get_inputs()]:
        ort_inputs['token_type_ids'] = inputs.get(
            'token_type_ids', np.zeros_like(inputs['input_ids'])
        ).astype(np.int64)

    outputs = _ort_session.run(None, ort_inputs)
    hidden  = outputs[0]  # (batch, seq_len, hidden)

    # Mean pooling (attention mask weighted)
    mask    = inputs['attention_mask'][:, :, None].astype(np.float32)
    summed  = (hidden * mask).sum(axis=1)
    counts  = mask.sum(axis=1)
    embeddings = summed / counts

    # L2 normalize
    norms  = np.linalg.norm(embeddings, axis=1, keepdims=True)
    return  embeddings / (norms + 1e-8)


def _embed_pytorch(texts: list[str]) -> np.ndarray:
    """PyTorch fallback."""
    return _embedding_model.encode(
        texts,
        batch_size=DEFAULT_BATCH,
        convert_to_numpy=True,
        normalize_embeddings=True,
        show_progress_bar=False
    )


# ── PUBLIC API ──────────────────────────────────────────────

def classify_with_bert(log_message: str) -> tuple[str, float]:
    """
    Single log classify karo.
    Returns: (label, confidence)
    """
    _load_models()
    results = classify_batch([log_message])
    return results[0]


def classify_batch(log_messages: list[str]) -> list[tuple[str, float]]:
    """
    Multiple logs ek saath classify karo β€” MUCH FASTER!
    Returns: list of (label, confidence) tuples
    
    Example:
        results = classify_batch(['log1', 'log2', 'log3'])
        for label, conf in results:
            print(f'{label}: {conf:.1%}')
    """
    _load_models()

    if not log_messages:
        return []

    results = []

    # Process in batches
    for i in range(0, len(log_messages), DEFAULT_BATCH):
        batch = log_messages[i:i + DEFAULT_BATCH]

        # Generate embeddings
        if _USE_ONNX:
            embeddings = _embed_onnx(batch)
        else:
            embeddings = _embed_pytorch(batch)

        # Classify
        probs   = _classifier.predict_proba(embeddings)
        max_probs = probs.max(axis=1)
        labels    = _classifier.predict(embeddings)

        for label, conf in zip(labels, max_probs):
            if conf < CONFIDENCE_THRESHOLD:
                results.append(('Unclassified', float(conf)))
            else:
                results.append((str(label), float(conf)))

    return results


def get_classes() -> list[str]:
    """Classifier ke classes return karo."""
    _load_models()
    return list(_classifier.classes_)


def is_onnx_mode() -> bool:
    """Check karo ONNX use ho raha hai ya nahi."""
    _load_models()
    return _USE_ONNX


# ── TEST ────────────────────────────────────────────────────
if __name__ == '__main__':
    import time

    test_logs = [
        'GET /v2/servers/detail HTTP/1.1 status: 404 len: 1583 time: 0.19',
        'System crashed due to driver errors when restarting the server',
        'Multiple login failures occurred on user 6454 account',
        'Admin access escalation detected for user 9429',
        'CPU usage at 98% for the last 10 minutes on node-7',
        'Backup completed successfully.',
        'User User123 logged in.',
        'Data replication task for shard 14 did not complete',
        'Hey bro chill ya!',     # should be Unclassified
    ]

    print('Single log test:')
    for log in test_logs:
        label, conf = classify_with_bert(log)
        print(f'  [{conf:.0%}] {label:25s} | {log[:60]}')

    print(f'\nMode: {"ONNX πŸš€" if is_onnx_mode() else "PyTorch"}')

    # Speed test
    big_batch = test_logs * 100
    t0 = time.perf_counter()
    classify_batch(big_batch)
    elapsed = time.perf_counter() - t0
    print(f'\nSpeed: {len(big_batch)/elapsed:.0f} logs/s  ({elapsed*1000/len(big_batch):.1f}ms/log)')